diff --git a/lib/construct.c b/lib/construct.c index 15586be..f3fcaef 100644 --- a/lib/construct.c +++ b/lib/construct.c @@ -143,7 +143,7 @@ static void construct_term(const regex_term_t *term, fsa_t *out) construct_base(out, term->literal); break; case REGEX_TERM_SUBEXPR: - construct(&term->subexpr, out); + construct_nfa(&term->subexpr, out); break; case REGEX_TERM_WILDCARD: case REGEX_TERM_CLASS: @@ -199,7 +199,7 @@ static void construct_union(fsa_t *f, const fsa_t *o) fsa_add_rule(f, final, 0, EPSILON); } -void construct(const regex_t *regex, fsa_t *out) +void construct_nfa(const regex_t *regex, fsa_t *out) { assert(regex->count > 0); diff --git a/lib/include/construct.h b/lib/include/construct.h index 0154c45..cdd0a30 100644 --- a/lib/include/construct.h +++ b/lib/include/construct.h @@ -9,6 +9,6 @@ #include "fsa.h" #include "regex.h" -void construct(const regex_t *regex, fsa_t *out); +void construct_nfa(const regex_t *regex, fsa_t *out); #endif diff --git a/tests/construct_tests.c b/tests/construct_tests.c index 0e12354..f8e94ff 100644 --- a/tests/construct_tests.c +++ b/tests/construct_tests.c @@ -6,8 +6,8 @@ #include "construct.h" #include "testing.h" -static const char * -match_from_state(const fsa_t *nfa, int state_id, const char *input) +static bool +accepts_from_state(const fsa_t *nfa, int state_id, const char *input) { const fsa_state_t *state = &nfa->states[state_id]; @@ -15,25 +15,21 @@ match_from_state(const fsa_t *nfa, int state_id, const char *input) const bool end_of_input = '\0' == *input; for (int i = 0; i < state->count; ++i) { if ((!final || !end_of_input) && EPSILON == state->rules[i].input) { - const char *s - = match_from_state(nfa, state->rules[i].next, input); - if (NULL != s) - return s; + if (accepts_from_state(nfa, state->rules[i].next, input)) + return true; } if (!end_of_input && *input == state->rules[i].input) { - const char *s - = match_from_state(nfa, state->rules[i].next, input + 1); - if (NULL != s) - return s; + if (accepts_from_state(nfa, state->rules[i].next, input + 1)) + return true; } } - return final ? input : NULL; + return final && end_of_input; } -static const char *match(const fsa_t *nfa, const char *input) +static bool accepts(const fsa_t *nfa, const char *input) { - return match_from_state(nfa, nfa->initial, input); + return accepts_from_state(nfa, nfa->initial, input); } static void test_empty_expression(void) @@ -48,9 +44,10 @@ static void test_empty_expression(void) = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; - construct(®ex, &fsa); + construct_nfa(®ex, &fsa); - ASSERT_NOT_NULL(match(&fsa, "")); + ASSERT_TRUE(accepts(&fsa, "")); + ASSERT_FALSE(accepts(&fsa, "a")); regex_free(®ex); fsa_free(&fsa); @@ -69,10 +66,10 @@ static void test_literal_expression(void) = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; - construct(®ex, &fsa); + construct_nfa(®ex, &fsa); - ASSERT_NOT_NULL(match(&fsa, "a")); - ASSERT_NULL(match(&fsa, "b")); + ASSERT_TRUE(accepts(&fsa, "a")); + ASSERT_FALSE(accepts(&fsa, "b")); regex_free(®ex); fsa_free(&fsa); @@ -81,10 +78,13 @@ static void test_literal_expression(void) static void test_sequence(void) { regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); + terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_LITERAL; terms[0].literal = 'a'; + terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].type = REGEX_TERM_LITERAL; terms[1].literal = 'b'; + terms[2].quantifier = REGEX_QUANTIFIER_NONE; terms[2].type = REGEX_TERM_LITERAL; terms[2].literal = 'c'; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); @@ -93,16 +93,13 @@ static void test_sequence(void) regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; - construct(®ex, &fsa); + construct_nfa(®ex, &fsa); - ASSERT_NOT_NULL(match(&fsa, "abc")); - ASSERT_NULL(match(&fsa, "a")); - ASSERT_NULL(match(&fsa, "ab")); - ASSERT_NULL(match(&fsa, "d")); - - const char *s = "abcd"; - const char *t = match(&fsa, s); - ASSERT_EQ(s + 3, t); + ASSERT_TRUE(accepts(&fsa, "abc")); + ASSERT_FALSE(accepts(&fsa, "a")); + ASSERT_FALSE(accepts(&fsa, "ab")); + ASSERT_FALSE(accepts(&fsa, "d")); + ASSERT_FALSE(accepts(&fsa, "abcd")); regex_free(®ex); fsa_free(&fsa); @@ -124,16 +121,13 @@ static void test_union(void) regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives }; fsa_t fsa; - construct(®ex, &fsa); + construct_nfa(®ex, &fsa); - ASSERT_NOT_NULL(match(&fsa, "a")); - ASSERT_NOT_NULL(match(&fsa, "b")); - ASSERT_NOT_NULL(match(&fsa, "c")); - ASSERT_NULL(match(&fsa, "d")); - - const char *s = "aa"; - const char *t = match(&fsa, s); - ASSERT_EQ(s + 1, t); + ASSERT_TRUE(accepts(&fsa, "a")); + ASSERT_TRUE(accepts(&fsa, "b")); + ASSERT_TRUE(accepts(&fsa, "c")); + ASSERT_FALSE(accepts(&fsa, "d")); + ASSERT_FALSE(accepts(&fsa, "aa")); regex_free(®ex); fsa_free(&fsa); @@ -151,15 +145,12 @@ static void test_star(void) regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; - construct(®ex, &fsa); + construct_nfa(®ex, &fsa); - ASSERT_NOT_NULL(match(&fsa, "")); - ASSERT_NOT_NULL(match(&fsa, "a")); - ASSERT_NOT_NULL(match(&fsa, "aaaaaa")); - - const char *s = "b"; - const char *t = match(&fsa, s); - ASSERT_EQ(s, t); + ASSERT_TRUE(accepts(&fsa, "")); + ASSERT_TRUE(accepts(&fsa, "a")); + ASSERT_TRUE(accepts(&fsa, "aaaaaa")); + ASSERT_FALSE(accepts(&fsa, "b")); regex_free(®ex); fsa_free(&fsa); @@ -186,10 +177,10 @@ static void test_subexpression(void) regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; - construct(®ex, &fsa); + construct_nfa(®ex, &fsa); - ASSERT_NOT_NULL(match(&fsa, "a")); - ASSERT_NULL(match(&fsa, "b")); + ASSERT_TRUE(accepts(&fsa, "a")); + ASSERT_FALSE(accepts(&fsa, "b")); regex_free(®ex); fsa_free(&fsa); @@ -219,28 +210,28 @@ static void test_sequence_containing_starred_union(void) terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].type = REGEX_TERM_LITERAL; terms[1].literal = 'b'; - terms[2].quantifier = REGEX_QUANTIFIER_NONE; + terms[2].quantifier = REGEX_QUANTIFIER_STAR; terms[2].type = REGEX_TERM_SUBEXPR; terms[2].subexpr.count = terms[2].subexpr.capacity = 2; terms[2].subexpr.contents = inner_alternatives; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; + alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].contents = terms; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; - construct(®ex, &fsa); + construct_nfa(®ex, &fsa); - ASSERT_NOT_NULL(match(&fsa, "ab")); - ASSERT_NOT_NULL(match(&fsa, "abc")); - ASSERT_NOT_NULL(match(&fsa, "abccc")); - ASSERT_NOT_NULL(match(&fsa, "abd")); - ASSERT_NOT_NULL(match(&fsa, "abddd")); - ASSERT_NOT_NULL(match(&fsa, "abcddcc")); - ASSERT_NOT_NULL(match(&fsa, "abddccd")); - ASSERT_NULL(match(&fsa, "c")); - ASSERT_NULL(match(&fsa, "d")); - ASSERT_NULL(match(&fsa, "foo")); + ASSERT_TRUE(accepts(&fsa, "ab")); + ASSERT_TRUE(accepts(&fsa, "abc")); + ASSERT_TRUE(accepts(&fsa, "abccc")); + ASSERT_TRUE(accepts(&fsa, "abd")); + ASSERT_TRUE(accepts(&fsa, "abddd")); + ASSERT_TRUE(accepts(&fsa, "abcddcc")); + ASSERT_TRUE(accepts(&fsa, "abddccd")); + ASSERT_FALSE(accepts(&fsa, "c")); + ASSERT_FALSE(accepts(&fsa, "d")); + ASSERT_FALSE(accepts(&fsa, "foo")); regex_free(®ex); fsa_free(&fsa); @@ -249,6 +240,7 @@ static void test_sequence_containing_starred_union(void) static void test_union_of_single_term_and_sequence_containing_starred_term(void) { + // a|b*c regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t)); terms0[0].quantifier = REGEX_QUANTIFIER_NONE; terms0[0].type = REGEX_TERM_LITERAL; @@ -268,17 +260,14 @@ test_union_of_single_term_and_sequence_containing_starred_term(void) regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives }; fsa_t fsa; - construct(®ex, &fsa); + construct_nfa(®ex, &fsa); - ASSERT_NOT_NULL(match(&fsa, "a")); - ASSERT_NOT_NULL(match(&fsa, "c")); - ASSERT_NOT_NULL(match(&fsa, "bc")); - ASSERT_NOT_NULL(match(&fsa, "bbbbbc")); - ASSERT_NULL(match(&fsa, "foo")); - - const char *s = "ba"; - const char *t = match(&fsa, s); - ASSERT_EQ(s + 1, t); + ASSERT_TRUE(accepts(&fsa, "a")); + ASSERT_TRUE(accepts(&fsa, "c")); + ASSERT_TRUE(accepts(&fsa, "bc")); + ASSERT_TRUE(accepts(&fsa, "bbbbbc")); + ASSERT_FALSE(accepts(&fsa, "foo")); + ASSERT_FALSE(accepts(&fsa, "ba")); regex_free(®ex); fsa_free(&fsa);