Add paren support to parser

This commit is contained in:
Camden Dixie O'Brien 2024-10-25 14:50:56 +01:00
parent da0175105d
commit 80a26997b8
2 changed files with 68 additions and 4 deletions

View File

@ -16,6 +16,8 @@ static bool is_special(char c)
case '|': case '|':
case '.': case '.':
case '\\': case '\\':
case '(':
case ')':
return true; return true;
default: default:
return false; return false;
@ -39,9 +41,24 @@ static int parse_term(const char *input, int rem, term_t *out)
{ {
int result, used = 0; int result, used = 0;
if (used < rem && '.' == input[0]) { if (1 > rem)
return -1;
if ('.' == input[0]) {
out->type = TERM_TYPE_WILDCARD; out->type = TERM_TYPE_WILDCARD;
++used; ++used;
} else if ('(' == input[0]) {
++used;
result = parse_regex(input + used, rem - used, &out->regex);
if (result < 0)
return -1;
out->type = TERM_TYPE_REGEX;
used += result;
if (')' != input[used])
return -1;
++used;
} else { } else {
result = parse_literal(input + used, rem - used, &out->literal); result = parse_literal(input + used, rem - used, &out->literal);
if (result < 0) if (result < 0)
@ -94,9 +111,7 @@ int parse_regex(const char *input, int rem, regex_t *out)
return -1; return -1;
used += result; used += result;
if (used < rem) { if (used < rem && '|' == input[used]) {
if (input[used] != '|')
return -1;
++used; ++used;
out->alternative = calloc(1, sizeof(regex_t)); out->alternative = calloc(1, sizeof(regex_t));

View File

@ -125,6 +125,53 @@ static void backslash_backslash_is_parsed_as_unquantified_literal(void)
regex_free_children(&r); regex_free_children(&r);
} }
static void a_pipe_b_in_parens_is_parsed_as_regex_term(void)
{
regex_t r = { 0 };
const int result = PARSE_REGEX_STRING("(a|b)", &r);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_REGEX, r.sequence.contents[0].type);
const regex_t *inner = &r.sequence.contents[0].regex;
ASSERT_EQ(1, inner->sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, inner->sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, inner->sequence.contents[0].type);
ASSERT_EQ('a', inner->sequence.contents[0].literal);
const regex_t *inner_alt = inner->alternative;
ASSERT_EQ(1, inner->sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, inner_alt->sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, inner_alt->sequence.contents[0].type);
ASSERT_EQ('b', inner_alt->sequence.contents[0].literal);
regex_free_children(&r);
}
static void a_in_parens_b_is_parsed_as_sequence_with_regex_term(void)
{
regex_t r = { 0 };
const int result = PARSE_REGEX_STRING("(a)b", &r);
ASSERT_NE(-1, result);
ASSERT_EQ(2, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_REGEX, r.sequence.contents[0].type);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[1].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[1].type);
ASSERT_EQ('b', r.sequence.contents[1].literal);
const regex_t *inner = &r.sequence.contents[0].regex;
ASSERT_EQ(1, inner->sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, inner->sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, inner->sequence.contents[0].type);
ASSERT_EQ('a', inner->sequence.contents[0].literal);
regex_free_children(&r);
}
int main(void) int main(void)
{ {
TESTING_BEGIN(); TESTING_BEGIN();
@ -137,5 +184,7 @@ int main(void)
dot_is_parsed_as_unquantified_wildcard_term(); dot_is_parsed_as_unquantified_wildcard_term();
backslash_dot_is_parsed_as_unquantified_literal(); backslash_dot_is_parsed_as_unquantified_literal();
backslash_backslash_is_parsed_as_unquantified_literal(); backslash_backslash_is_parsed_as_unquantified_literal();
a_pipe_b_in_parens_is_parsed_as_regex_term();
a_in_parens_b_is_parsed_as_sequence_with_regex_term();
return TESTING_END(); return TESTING_END();
} }