Rename regex_t to parse_tree_t and merge module into parser

This commit is contained in:
2024-10-26 11:52:03 +01:00
parent a85367c2df
commit 0c4b033d75
6 changed files with 206 additions and 227 deletions

View File

@@ -6,243 +6,243 @@
#include "parser.h"
#include "testing.h"
#define PARSE_REGEX_STRING(s, r) parse_regex(s, strlen(s), r)
#define PARSE_EXPR_STRING(s, r) parse_expr(s, strlen(s), r)
static void a_has_no_alternative(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("a", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("a", &t);
ASSERT_NE(-1, result);
ASSERT_NULL(r.alternative);
regex_free_children(&r);
ASSERT_NULL(t.alternative);
parse_tree_free_children(&t);
}
static void a_pipe_b_has_alternative(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("a|b", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("a|b", &t);
ASSERT_NE(-1, result);
ASSERT_NOT_NULL(r.alternative);
regex_free_children(&r);
ASSERT_NOT_NULL(t.alternative);
parse_tree_free_children(&t);
}
static void a_pipe_b_pipe_c_result_alternative_has_alternative(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("a|b|c", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("a|b|c", &t);
ASSERT_NE(-1, result);
ASSERT_NOT_NULL(r.alternative);
ASSERT_NOT_NULL(r.alternative->alternative);
regex_free_children(&r);
ASSERT_NOT_NULL(t.alternative);
ASSERT_NOT_NULL(t.alternative->alternative);
parse_tree_free_children(&t);
}
static void a_is_parsed_as_unquantified_literal(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("a", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("a", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type);
ASSERT_EQ('a', r.sequence.contents[0].literal);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type);
ASSERT_EQ('a', t.sequence.contents[0].literal);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void b_is_parsed_as_unquantified_literal(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("b", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("b", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type);
ASSERT_EQ('b', r.sequence.contents[0].literal);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type);
ASSERT_EQ('b', t.sequence.contents[0].literal);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void abc_is_parsed_as_sequence_of_unquantified_literals(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("abc", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("abc", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(3, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type);
ASSERT_EQ('a', r.sequence.contents[0].literal);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[1].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[1].type);
ASSERT_EQ('b', r.sequence.contents[1].literal);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[2].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[2].type);
ASSERT_EQ('c', r.sequence.contents[2].literal);
ASSERT_EQ(3, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type);
ASSERT_EQ('a', t.sequence.contents[0].literal);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[1].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[1].type);
ASSERT_EQ('b', t.sequence.contents[1].literal);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[2].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[2].type);
ASSERT_EQ('c', t.sequence.contents[2].literal);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void dot_is_parsed_as_unquantified_wildcard_term(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING(".", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING(".", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_WILDCARD, r.sequence.contents[0].type);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_WILDCARD, t.sequence.contents[0].type);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void backslash_dot_is_parsed_as_unquantified_literal(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("\\.", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("\\.", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type);
ASSERT_EQ('.', r.sequence.contents[0].literal);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type);
ASSERT_EQ('.', t.sequence.contents[0].literal);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void backslash_backslash_is_parsed_as_unquantified_literal(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("\\\\", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("\\\\", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type);
ASSERT_EQ('\\', r.sequence.contents[0].literal);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type);
ASSERT_EQ('\\', t.sequence.contents[0].literal);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void a_pipe_b_in_parens_is_parsed_as_regex_term(void)
static void a_pipe_b_in_parens_is_parsed_as_subexpr_term(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("(a|b)", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("(a|b)", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_REGEX, r.sequence.contents[0].type);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_SUBEXPR, t.sequence.contents[0].type);
const regex_t *inner = &r.sequence.contents[0].regex;
const parse_tree_t *inner = &t.sequence.contents[0].subexpr;
ASSERT_EQ(1, inner->sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, inner->sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, inner->sequence.contents[0].type);
ASSERT_EQ('a', inner->sequence.contents[0].literal);
const regex_t *inner_alt = inner->alternative;
const parse_tree_t *inner_alt = inner->alternative;
ASSERT_EQ(1, inner->sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, inner_alt->sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, inner_alt->sequence.contents[0].type);
ASSERT_EQ('b', inner_alt->sequence.contents[0].literal);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void a_in_parens_b_is_parsed_as_sequence_with_regex_term(void)
static void a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("(a)b", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("(a)b", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(2, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_REGEX, r.sequence.contents[0].type);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[1].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[1].type);
ASSERT_EQ('b', r.sequence.contents[1].literal);
ASSERT_EQ(2, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_SUBEXPR, t.sequence.contents[0].type);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[1].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[1].type);
ASSERT_EQ('b', t.sequence.contents[1].literal);
const regex_t *inner = &r.sequence.contents[0].regex;
const parse_tree_t *inner = &t.sequence.contents[0].subexpr;
ASSERT_EQ(1, inner->sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, inner->sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, inner->sequence.contents[0].type);
ASSERT_EQ('a', inner->sequence.contents[0].literal);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void dot_star_is_parsed_as_zero_or_more_wildcard(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING(".*", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING(".*", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_ZERO_OR_MORE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_WILDCARD, r.sequence.contents[0].type);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_ZERO_OR_MORE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_WILDCARD, t.sequence.contents[0].type);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void dot_plus_is_parsed_as_one_or_more_wildcard(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING(".+", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING(".+", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_ONE_OR_MORE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_WILDCARD, r.sequence.contents[0].type);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_ONE_OR_MORE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_WILDCARD, t.sequence.contents[0].type);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void dot_question_mark_is_parsed_as_zero_or_one_wildcard(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING(".?", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING(".?", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_ZERO_OR_ONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_WILDCARD, r.sequence.contents[0].type);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_ZERO_OR_ONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_WILDCARD, t.sequence.contents[0].type);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void a_in_brackets_is_parsed_as_class_containing_only_a(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("[a]", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("[a]", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_CLASS, r.sequence.contents[0].type);
ASSERT_FALSE(r.sequence.contents[0].class.negated);
ASSERT_EQ(1, r.sequence.contents[0].class.count);
ASSERT_NOT_NULL(r.sequence.contents[0].class.contents);
ASSERT_EQ('a', r.sequence.contents[0].class.contents[0]);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_CLASS, t.sequence.contents[0].type);
ASSERT_FALSE(t.sequence.contents[0].class.negated);
ASSERT_EQ(1, t.sequence.contents[0].class.count);
ASSERT_NOT_NULL(t.sequence.contents[0].class.contents);
ASSERT_EQ('a', t.sequence.contents[0].class.contents[0]);
regex_free_children(&r);
parse_tree_free_children(&t);
}
static void caret_a_in_brackets_parses_as_negated_class(void)
{
regex_t r;
const int result = PARSE_REGEX_STRING("[^a]", &r);
parse_tree_t t;
const int result = PARSE_EXPR_STRING("[^a]", &t);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_CLASS, r.sequence.contents[0].type);
ASSERT_TRUE(r.sequence.contents[0].class.negated);
ASSERT_EQ(1, r.sequence.contents[0].class.count);
ASSERT_NOT_NULL(r.sequence.contents[0].class.contents);
ASSERT_EQ('a', r.sequence.contents[0].class.contents[0]);
ASSERT_EQ(1, t.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_CLASS, t.sequence.contents[0].type);
ASSERT_TRUE(t.sequence.contents[0].class.negated);
ASSERT_EQ(1, t.sequence.contents[0].class.count);
ASSERT_NOT_NULL(t.sequence.contents[0].class.contents);
ASSERT_EQ('a', t.sequence.contents[0].class.contents[0]);
regex_free_children(&r);
parse_tree_free_children(&t);
}
int main(void)
@@ -257,8 +257,8 @@ int main(void)
dot_is_parsed_as_unquantified_wildcard_term();
backslash_dot_is_parsed_as_unquantified_literal();
backslash_backslash_is_parsed_as_unquantified_literal();
a_pipe_b_in_parens_is_parsed_as_regex_term();
a_in_parens_b_is_parsed_as_sequence_with_regex_term();
a_pipe_b_in_parens_is_parsed_as_subexpr_term();
a_in_parens_b_is_parsed_as_sequence_with_subexpr_term();
dot_star_is_parsed_as_zero_or_more_wildcard();
dot_plus_is_parsed_as_one_or_more_wildcard();
dot_question_mark_is_parsed_as_zero_or_one_wildcard();