diff --git a/lib/construct.c b/lib/construct.c index 35ba7e7..4332c8f 100644 --- a/lib/construct.c +++ b/lib/construct.c @@ -115,13 +115,17 @@ static void prepend_fsa(fsa_t *f, const fsa_t *o) f->count = count; } -static void construct_base(fsa_t *out, int symbol) +static void construct_base(fsa_t *out) { fsa_init(out); - const int id = fsa_add_state(out); - fsa_add_rule(out, id, out->initial, symbol); - out->initial = id; out->states[0].final = true; + out->initial = fsa_add_state(out); +} + +static void construct_symbol(fsa_t *out, int symbol) +{ + construct_base(out); + fsa_add_rule(out, out->initial, 0, symbol); } static void construct_star(fsa_t *out) @@ -129,7 +133,7 @@ static void construct_star(fsa_t *out) fsa_t f; memcpy(&f, out, sizeof(fsa_t)); - construct_base(out, EPSILON); + construct_symbol(out, EPSILON); int f_initial, f_final; add_fsa(out, &f, &f_initial, &f_final); fsa_add_rule(out, out->initial, f_initial, EPSILON); @@ -137,20 +141,46 @@ static void construct_star(fsa_t *out) fsa_add_rule(out, f_final, 0, EPSILON); } +static bool in_class(const regex_class_t *class, char c) +{ + for (int i = 0; i < class->count; ++i) { + if (class->contents[i] == c) + return true; + } + return false; +} + +static void construct_class(fsa_t *out, const regex_class_t *class) +{ + construct_base(out); + if (class->negated) { + for (int i = 0; i < CHAR_COUNT; ++i) { + if (!in_class(class, i)) + fsa_add_rule(out, out->initial, 0, i); + } + } else { + for (int i = 0; i < class->count; ++i) + fsa_add_rule(out, out->initial, 0, class->contents[i]); + } +} + static void construct_term(const regex_term_t *term, fsa_t *out) { switch (term->type) { case REGEX_TERM_EMPTY: - construct_base(out, EPSILON); + construct_symbol(out, EPSILON); break; case REGEX_TERM_LITERAL: - construct_base(out, term->literal); + construct_symbol(out, term->literal); break; case REGEX_TERM_SUBEXPR: construct_nfa(&term->subexpr, out); break; - case REGEX_TERM_WILDCARD: case REGEX_TERM_CLASS: + construct_class(out, &term->class); + break; + + case REGEX_TERM_WILDCARD: assert(false); break; } diff --git a/tests/construct_tests.c b/tests/construct_tests.c index eb398cf..20f7f87 100644 --- a/tests/construct_tests.c +++ b/tests/construct_tests.c @@ -186,6 +186,71 @@ static void test_subexpression(void) fsa_free(&fsa); } +static void test_class(void) +{ + char *class_contents = malloc(3); + class_contents[0] = 'a'; + class_contents[1] = 'b'; + class_contents[2] = 'c'; + regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); + terms[0].quantifier = REGEX_QUANTIFIER_NONE; + terms[0].type = REGEX_TERM_CLASS; + terms[0].class.negated = false; + terms[0].class.count = terms[0].class.capacity = 3; + terms[0].class.contents = class_contents; + regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); + alternatives[0].count = alternatives[0].capacity = 1; + alternatives[0].contents = terms; + const regex_t regex + = { .count = 1, .capacity = 1, .contents = alternatives }; + + fsa_t fsa; + construct_nfa(®ex, &fsa); + + ASSERT_TRUE(accepts(&fsa, "a")); + ASSERT_TRUE(accepts(&fsa, "b")); + ASSERT_TRUE(accepts(&fsa, "c")); + ASSERT_FALSE(accepts(&fsa, "")); + ASSERT_FALSE(accepts(&fsa, "aa")); + ASSERT_FALSE(accepts(&fsa, "d")); + + regex_free(®ex); + fsa_free(&fsa); +} + +static void test_negated_class(void) +{ + char *class_contents = malloc(3); + class_contents[0] = 'a'; + class_contents[1] = 'b'; + class_contents[2] = 'c'; + regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); + terms[0].quantifier = REGEX_QUANTIFIER_NONE; + terms[0].type = REGEX_TERM_CLASS; + terms[0].class.negated = true; + terms[0].class.count = terms[0].class.capacity = 3; + terms[0].class.contents = class_contents; + regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); + alternatives[0].count = alternatives[0].capacity = 1; + alternatives[0].contents = terms; + const regex_t regex + = { .count = 1, .capacity = 1, .contents = alternatives }; + + fsa_t fsa; + construct_nfa(®ex, &fsa); + + ASSERT_TRUE(accepts(&fsa, "d")); + ASSERT_TRUE(accepts(&fsa, "e")); + ASSERT_FALSE(accepts(&fsa, "a")); + ASSERT_FALSE(accepts(&fsa, "b")); + ASSERT_FALSE(accepts(&fsa, "c")); + ASSERT_FALSE(accepts(&fsa, "")); + ASSERT_FALSE(accepts(&fsa, "aa")); + + regex_free(®ex); + fsa_free(&fsa); +} + static void test_sequence_containing_starred_union(void) { // ab(c|d)* @@ -325,6 +390,8 @@ int main(void) test_union(); test_star(); test_subexpression(); + test_class(); + test_negated_class(); // Compound expressions test_sequence_containing_starred_union();