Fix bug in construct_nfa

Intermediate final states were being left in by add_fsa(); we always
want to mark the added FSA's final state as non-final.
This commit is contained in:
Camden Dixie O'Brien 2024-11-02 21:54:30 +00:00
parent 074b174d0f
commit 34fee99232
3 changed files with 62 additions and 0 deletions

View File

@ -25,6 +25,9 @@ static void add_fsa(fsa_t *f, const fsa_t *o, int *init_out, int *final_out)
}
memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t));
// Mark o's final state as non-final.
f->states[f->count].final = false;
// Retarget the rules of the copied states to refer to the new
// state indices.
for (int i = f->count; i < count; ++i) {

View File

@ -273,6 +273,47 @@ test_union_of_single_term_and_sequence_containing_starred_term(void)
fsa_free(&fsa);
}
static void test_sequence_of_subexpr_a_or_empty_and_b(void)
{
// (a|ε)b
regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t));
inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms0[0].type = REGEX_TERM_LITERAL;
inner_terms0[0].literal = 'a';
regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t));
inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms1[0].type = REGEX_TERM_EMPTY;
regex_sequence_t *inner_alternatives
= malloc(2 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1;
regex_term_t *terms = malloc(2 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 2;
terms[0].subexpr.contents = inner_alternatives;
terms[1].quantifier = REGEX_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "ab"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a"));
regex_free(&regex);
fsa_free(&fsa);
}
int main(void)
{
TESTING_BEGIN();
@ -288,6 +329,7 @@ int main(void)
// Compound expressions
test_sequence_containing_starred_union();
test_union_of_single_term_and_sequence_containing_starred_term();
test_sequence_of_subexpr_a_or_empty_and_b();
return TESTING_END();
}

View File

@ -47,11 +47,28 @@ static void test_arbitrary_regex_1(void)
fsa_free(&dfa);
}
static void test_arbitrary_regex_2(void)
{
fsa_t dfa;
const char *regex = "(l|wh)?[aeiou]+";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "laaaa");
ASSERT_ACCEPTS(&dfa, "eeeee");
ASSERT_ACCEPTS(&dfa, "iii");
ASSERT_ACCEPTS(&dfa, "whooo");
ASSERT_ACCEPTS(&dfa, "u");
ASSERT_REJECTS(&dfa, "wh");
ASSERT_REJECTS(&dfa, "lxxx");
fsa_free(&dfa);
}
int main(void)
{
TESTING_BEGIN();
test_foo_or_bar_regex();
test_even_number_of_Is_regex();
test_arbitrary_regex_1();
test_arbitrary_regex_2();
return TESTING_END();
}