diff --git a/lib/construct.c b/lib/construct.c index f0b2417..8ff33fc 100644 --- a/lib/construct.c +++ b/lib/construct.c @@ -128,19 +128,6 @@ static void construct_symbol(fsa_t *out, int symbol) fsa_add_rule(out, out->initial, 0, symbol); } -static void construct_star(fsa_t *out) -{ - fsa_t f; - memcpy(&f, out, sizeof(fsa_t)); - - construct_symbol(out, EPSILON); - int f_initial, f_final; - add_fsa(out, &f, &f_initial, &f_final); - fsa_add_rule(out, out->initial, f_initial, EPSILON); - fsa_add_rule(out, f_final, f_initial, EPSILON); - fsa_add_rule(out, f_final, 0, EPSILON); -} - static bool in_class(const regex_class_t *class, char c) { for (int i = 0; i < class->count; ++i) { @@ -171,6 +158,38 @@ static void construct_wildcard(fsa_t *out) fsa_add_rule(out, out->initial, 0, i); } +static void base_quantify(fsa_t *out, int *init_out, int *final_out) +{ + fsa_t f; + memcpy(&f, out, sizeof(fsa_t)); + construct_base(out); + add_fsa(out, &f, init_out, final_out); + fsa_add_rule(out, out->initial, *init_out, EPSILON); + fsa_add_rule(out, *final_out, 0, EPSILON); +} + +static void construct_star(fsa_t *out) +{ + int sub_init, sub_final; + base_quantify(out, &sub_init, &sub_final); + fsa_add_rule(out, sub_final, sub_init, EPSILON); + fsa_add_rule(out, out->initial, 0, EPSILON); +} + +static void construct_plus(fsa_t *out) +{ + int sub_init, sub_final; + base_quantify(out, &sub_init, &sub_final); + fsa_add_rule(out, sub_final, sub_init, EPSILON); +} + +static void construct_qmark(fsa_t *out) +{ + int sub_init, sub_final; + base_quantify(out, &sub_init, &sub_final); + fsa_add_rule(out, out->initial, 0, EPSILON); +} + static void construct_term(const regex_term_t *term, fsa_t *out) { switch (term->type) { @@ -198,8 +217,10 @@ static void construct_term(const regex_term_t *term, fsa_t *out) construct_star(out); break; case REGEX_QUANTIFIER_PLUS: + construct_plus(out); + break; case REGEX_QUANTIFIER_QMARK: - assert(false); + construct_qmark(out); break; } diff --git a/tests/construct_tests.c b/tests/construct_tests.c index 155330d..e5763c6 100644 --- a/tests/construct_tests.c +++ b/tests/construct_tests.c @@ -181,6 +181,52 @@ static void test_star(void) fsa_free(&fsa); } +static void test_plus(void) +{ + regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); + terms[0].quantifier = REGEX_QUANTIFIER_PLUS; + terms[0].type = REGEX_TERM_LITERAL; + terms[0].literal = 'a'; + regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); + alternatives[0].count = alternatives[0].capacity = 1; + alternatives[0].contents = terms; + regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; + + fsa_t fsa; + construct_nfa(®ex, &fsa); + + ASSERT_TRUE(accepts(&fsa, "a")); + ASSERT_TRUE(accepts(&fsa, "aaaaaa")); + ASSERT_FALSE(accepts(&fsa, "")); + ASSERT_FALSE(accepts(&fsa, "b")); + + regex_free(®ex); + fsa_free(&fsa); +} + +static void test_qmark(void) +{ + regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); + terms[0].quantifier = REGEX_QUANTIFIER_QMARK; + terms[0].type = REGEX_TERM_LITERAL; + terms[0].literal = 'a'; + regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); + alternatives[0].count = alternatives[0].capacity = 1; + alternatives[0].contents = terms; + regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; + + fsa_t fsa; + construct_nfa(®ex, &fsa); + + ASSERT_TRUE(accepts(&fsa, "")); + ASSERT_TRUE(accepts(&fsa, "a")); + ASSERT_FALSE(accepts(&fsa, "aa")); + ASSERT_FALSE(accepts(&fsa, "b")); + + regex_free(®ex); + fsa_free(&fsa); +} + static void test_subexpression(void) { regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t)); @@ -415,6 +461,8 @@ int main(void) test_sequence(); test_union(); test_star(); + test_plus(); + test_qmark(); test_subexpression(); test_class(); test_negated_class();