diff --git a/lib/parser.c b/lib/parser.c index 24fe2ee..e8897e3 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -16,6 +16,8 @@ static bool is_special(char c) case '|': case '.': case '\\': + case '(': + case ')': return true; default: return false; @@ -39,9 +41,24 @@ static int parse_term(const char *input, int rem, term_t *out) { int result, used = 0; - if (used < rem && '.' == input[0]) { + if (1 > rem) + return -1; + + if ('.' == input[0]) { out->type = TERM_TYPE_WILDCARD; ++used; + } else if ('(' == input[0]) { + ++used; + + result = parse_regex(input + used, rem - used, &out->regex); + if (result < 0) + return -1; + out->type = TERM_TYPE_REGEX; + used += result; + + if (')' != input[used]) + return -1; + ++used; } else { result = parse_literal(input + used, rem - used, &out->literal); if (result < 0) @@ -94,9 +111,7 @@ int parse_regex(const char *input, int rem, regex_t *out) return -1; used += result; - if (used < rem) { - if (input[used] != '|') - return -1; + if (used < rem && '|' == input[used]) { ++used; out->alternative = calloc(1, sizeof(regex_t)); diff --git a/tests/parser_tests.c b/tests/parser_tests.c index 99cc913..0f96f14 100644 --- a/tests/parser_tests.c +++ b/tests/parser_tests.c @@ -125,6 +125,53 @@ static void backslash_backslash_is_parsed_as_unquantified_literal(void) regex_free_children(&r); } +static void a_pipe_b_in_parens_is_parsed_as_regex_term(void) +{ + regex_t r = { 0 }; + const int result = PARSE_REGEX_STRING("(a|b)", &r); + ASSERT_NE(-1, result); + + ASSERT_EQ(1, r.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_REGEX, r.sequence.contents[0].type); + + const regex_t *inner = &r.sequence.contents[0].regex; + ASSERT_EQ(1, inner->sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, inner->sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, inner->sequence.contents[0].type); + ASSERT_EQ('a', inner->sequence.contents[0].literal); + + const regex_t *inner_alt = inner->alternative; + ASSERT_EQ(1, inner->sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, inner_alt->sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, inner_alt->sequence.contents[0].type); + ASSERT_EQ('b', inner_alt->sequence.contents[0].literal); + + regex_free_children(&r); +} + +static void a_in_parens_b_is_parsed_as_sequence_with_regex_term(void) +{ + regex_t r = { 0 }; + const int result = PARSE_REGEX_STRING("(a)b", &r); + ASSERT_NE(-1, result); + + ASSERT_EQ(2, r.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_REGEX, r.sequence.contents[0].type); + ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[1].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[1].type); + ASSERT_EQ('b', r.sequence.contents[1].literal); + + const regex_t *inner = &r.sequence.contents[0].regex; + ASSERT_EQ(1, inner->sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, inner->sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, inner->sequence.contents[0].type); + ASSERT_EQ('a', inner->sequence.contents[0].literal); + + regex_free_children(&r); +} + int main(void) { TESTING_BEGIN(); @@ -137,5 +184,7 @@ int main(void) dot_is_parsed_as_unquantified_wildcard_term(); backslash_dot_is_parsed_as_unquantified_literal(); backslash_backslash_is_parsed_as_unquantified_literal(); + a_pipe_b_in_parens_is_parsed_as_regex_term(); + a_in_parens_b_is_parsed_as_sequence_with_regex_term(); return TESTING_END(); }