From 34fee99232aadb5bca6af102d81e1c71d07cd4de Mon Sep 17 00:00:00 2001 From: Camden Dixie O'Brien Date: Sat, 2 Nov 2024 21:54:30 +0000 Subject: [PATCH] Fix bug in construct_nfa Intermediate final states were being left in by add_fsa(); we always want to mark the added FSA's final state as non-final. --- lib/construct.c | 3 +++ tests/construct_tests.c | 42 +++++++++++++++++++++++++++++++++++++++ tests/integration_tests.c | 17 ++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/lib/construct.c b/lib/construct.c index b4f86e1..35ba7e7 100644 --- a/lib/construct.c +++ b/lib/construct.c @@ -25,6 +25,9 @@ static void add_fsa(fsa_t *f, const fsa_t *o, int *init_out, int *final_out) } memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t)); + // Mark o's final state as non-final. + f->states[f->count].final = false; + // Retarget the rules of the copied states to refer to the new // state indices. for (int i = f->count; i < count; ++i) { diff --git a/tests/construct_tests.c b/tests/construct_tests.c index f8e94ff..eb398cf 100644 --- a/tests/construct_tests.c +++ b/tests/construct_tests.c @@ -273,6 +273,47 @@ test_union_of_single_term_and_sequence_containing_starred_term(void) fsa_free(&fsa); } +static void test_sequence_of_subexpr_a_or_empty_and_b(void) +{ + // (a|ε)b + regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t)); + inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE; + inner_terms0[0].type = REGEX_TERM_LITERAL; + inner_terms0[0].literal = 'a'; + regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t)); + inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE; + inner_terms1[0].type = REGEX_TERM_EMPTY; + regex_sequence_t *inner_alternatives + = malloc(2 * sizeof(regex_sequence_t)); + inner_alternatives[0].count = inner_alternatives[0].capacity = 1; + inner_alternatives[0].contents = inner_terms0; + inner_alternatives[1].count = inner_alternatives[1].capacity = 1; + inner_alternatives[1].contents = inner_terms1; + regex_term_t *terms = malloc(2 * sizeof(regex_term_t)); + terms[0].quantifier = REGEX_QUANTIFIER_NONE; + terms[0].type = REGEX_TERM_SUBEXPR; + terms[0].subexpr.count = terms[0].subexpr.capacity = 2; + terms[0].subexpr.contents = inner_alternatives; + terms[1].quantifier = REGEX_QUANTIFIER_NONE; + terms[1].type = REGEX_TERM_LITERAL; + terms[1].literal = 'b'; + regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); + alternatives[0].count = alternatives[0].capacity = 2; + alternatives[0].contents = terms; + regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; + + fsa_t fsa; + construct_nfa(®ex, &fsa); + + ASSERT_TRUE(accepts(&fsa, "ab")); + ASSERT_TRUE(accepts(&fsa, "b")); + ASSERT_FALSE(accepts(&fsa, "")); + ASSERT_FALSE(accepts(&fsa, "a")); + + regex_free(®ex); + fsa_free(&fsa); +} + int main(void) { TESTING_BEGIN(); @@ -288,6 +329,7 @@ int main(void) // Compound expressions test_sequence_containing_starred_union(); test_union_of_single_term_and_sequence_containing_starred_term(); + test_sequence_of_subexpr_a_or_empty_and_b(); return TESTING_END(); } diff --git a/tests/integration_tests.c b/tests/integration_tests.c index 5ae7b22..5e95090 100644 --- a/tests/integration_tests.c +++ b/tests/integration_tests.c @@ -47,11 +47,28 @@ static void test_arbitrary_regex_1(void) fsa_free(&dfa); } +static void test_arbitrary_regex_2(void) +{ + fsa_t dfa; + const char *regex = "(l|wh)?[aeiou]+"; + const bool success = compile(regex, strlen(regex), &dfa); + ASSERT_TRUE(success); + ASSERT_ACCEPTS(&dfa, "laaaa"); + ASSERT_ACCEPTS(&dfa, "eeeee"); + ASSERT_ACCEPTS(&dfa, "iii"); + ASSERT_ACCEPTS(&dfa, "whooo"); + ASSERT_ACCEPTS(&dfa, "u"); + ASSERT_REJECTS(&dfa, "wh"); + ASSERT_REJECTS(&dfa, "lxxx"); + fsa_free(&dfa); +} + int main(void) { TESTING_BEGIN(); test_foo_or_bar_regex(); test_even_number_of_Is_regex(); test_arbitrary_regex_1(); + test_arbitrary_regex_2(); return TESTING_END(); }