/* * Copyright (c) Camden Dixie O'Brien * SPDX-License-Identifier: AGPL-3.0-only */ #include "construct.h" #include "testing.h" static bool accepts_from_state(const fsa_t *nfa, int state_id, const char *input) { const fsa_state_t *state = &nfa->states[state_id]; const bool final = state->final; const bool end_of_input = '\0' == *input; for (int i = 0; i < state->count; ++i) { if ((!final || !end_of_input) && EPSILON == state->rules[i].input) { if (accepts_from_state(nfa, state->rules[i].next, input)) return true; } if (!end_of_input && *input == state->rules[i].input) { if (accepts_from_state(nfa, state->rules[i].next, input + 1)) return true; } } return final && end_of_input; } static bool accepts(const fsa_t *nfa, const char *input) { return accepts_from_state(nfa, nfa->initial, input); } static void test_empty_expression(void) { regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_EMPTY; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].contents = terms; const regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "")); ASSERT_FALSE(accepts(&fsa, "a")); regex_free(®ex); fsa_free(&fsa); } static void test_wildcard(void) { regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_WILDCARD; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].contents = terms; const regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "b")); ASSERT_TRUE(accepts(&fsa, "c")); ASSERT_TRUE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "")); ASSERT_FALSE(accepts(&fsa, "aa")); regex_free(®ex); fsa_free(&fsa); } static void test_literal_expression(void) { regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_LITERAL; terms[0].literal = 'a'; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].contents = terms; const regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "b")); regex_free(®ex); fsa_free(&fsa); } static void test_sequence(void) { regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_LITERAL; terms[0].literal = 'a'; terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].type = REGEX_TERM_LITERAL; terms[1].literal = 'b'; terms[2].quantifier = REGEX_QUANTIFIER_NONE; terms[2].type = REGEX_TERM_LITERAL; terms[2].literal = 'c'; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].contents = terms; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "abc")); ASSERT_FALSE(accepts(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "ab")); ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "abcd")); regex_free(®ex); fsa_free(&fsa); } static void test_union(void) { const char *literals = "abc"; regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t)); for (int i = 0; i < 3; ++i) { regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_LITERAL; terms[0].literal = literals[i]; alternatives[i].count = alternatives[i].capacity = 1; alternatives[i].contents = terms; } regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "b")); ASSERT_TRUE(accepts(&fsa, "c")); ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "aa")); regex_free(®ex); fsa_free(&fsa); } static void test_star(void) { regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_STAR; terms[0].type = REGEX_TERM_LITERAL; terms[0].literal = 'a'; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].contents = terms; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "")); ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "aaaaaa")); ASSERT_FALSE(accepts(&fsa, "b")); regex_free(®ex); fsa_free(&fsa); } static void test_subexpression(void) { regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t)); inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms[0].type = REGEX_TERM_LITERAL; inner_terms[0].literal = 'a'; regex_sequence_t *inner_alternatives = malloc(1 * sizeof(regex_sequence_t)); inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].contents = inner_terms; regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_SUBEXPR; terms[0].subexpr.count = terms[0].subexpr.capacity = 1; terms[0].subexpr.contents = inner_alternatives; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].contents = terms; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "b")); regex_free(®ex); fsa_free(&fsa); } static void test_class(void) { char *class_contents = malloc(3); class_contents[0] = 'a'; class_contents[1] = 'b'; class_contents[2] = 'c'; regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_CLASS; terms[0].class.negated = false; terms[0].class.count = terms[0].class.capacity = 3; terms[0].class.contents = class_contents; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].contents = terms; const regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "b")); ASSERT_TRUE(accepts(&fsa, "c")); ASSERT_FALSE(accepts(&fsa, "")); ASSERT_FALSE(accepts(&fsa, "aa")); ASSERT_FALSE(accepts(&fsa, "d")); regex_free(®ex); fsa_free(&fsa); } static void test_negated_class(void) { char *class_contents = malloc(3); class_contents[0] = 'a'; class_contents[1] = 'b'; class_contents[2] = 'c'; regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_CLASS; terms[0].class.negated = true; terms[0].class.count = terms[0].class.capacity = 3; terms[0].class.contents = class_contents; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].contents = terms; const regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "d")); ASSERT_TRUE(accepts(&fsa, "e")); ASSERT_FALSE(accepts(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "c")); ASSERT_FALSE(accepts(&fsa, "")); ASSERT_FALSE(accepts(&fsa, "aa")); regex_free(®ex); fsa_free(&fsa); } static void test_sequence_containing_starred_union(void) { // ab(c|d)* regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t)); inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms0[0].type = REGEX_TERM_LITERAL; inner_terms0[0].literal = 'c'; regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t)); inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms1[0].type = REGEX_TERM_LITERAL; inner_terms1[0].literal = 'd'; regex_sequence_t *inner_alternatives = malloc(2 * sizeof(regex_sequence_t)); inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].contents = inner_terms0; inner_alternatives[1].count = inner_alternatives[1].capacity = 1; inner_alternatives[1].contents = inner_terms1; regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_LITERAL; terms[0].literal = 'a'; terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].type = REGEX_TERM_LITERAL; terms[1].literal = 'b'; terms[2].quantifier = REGEX_QUANTIFIER_STAR; terms[2].type = REGEX_TERM_SUBEXPR; terms[2].subexpr.count = terms[2].subexpr.capacity = 2; terms[2].subexpr.contents = inner_alternatives; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].contents = terms; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "ab")); ASSERT_TRUE(accepts(&fsa, "abc")); ASSERT_TRUE(accepts(&fsa, "abccc")); ASSERT_TRUE(accepts(&fsa, "abd")); ASSERT_TRUE(accepts(&fsa, "abddd")); ASSERT_TRUE(accepts(&fsa, "abcddcc")); ASSERT_TRUE(accepts(&fsa, "abddccd")); ASSERT_FALSE(accepts(&fsa, "c")); ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "foo")); regex_free(®ex); fsa_free(&fsa); } static void test_union_of_single_term_and_sequence_containing_starred_term(void) { // a|b*c regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t)); terms0[0].quantifier = REGEX_QUANTIFIER_NONE; terms0[0].type = REGEX_TERM_LITERAL; terms0[0].literal = 'a'; regex_term_t *terms1 = malloc(2 * sizeof(regex_term_t)); terms1[0].quantifier = REGEX_QUANTIFIER_STAR; terms1[0].type = REGEX_TERM_LITERAL; terms1[0].literal = 'b'; terms1[1].quantifier = REGEX_QUANTIFIER_NONE; terms1[1].type = REGEX_TERM_LITERAL; terms1[1].literal = 'c'; regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].contents = terms0; alternatives[1].count = alternatives[1].capacity = 2; alternatives[1].contents = terms1; regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "c")); ASSERT_TRUE(accepts(&fsa, "bc")); ASSERT_TRUE(accepts(&fsa, "bbbbbc")); ASSERT_FALSE(accepts(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "ba")); regex_free(®ex); fsa_free(&fsa); } static void test_sequence_of_subexpr_a_or_empty_and_b(void) { // (a|ε)b regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t)); inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms0[0].type = REGEX_TERM_LITERAL; inner_terms0[0].literal = 'a'; regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t)); inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms1[0].type = REGEX_TERM_EMPTY; regex_sequence_t *inner_alternatives = malloc(2 * sizeof(regex_sequence_t)); inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].contents = inner_terms0; inner_alternatives[1].count = inner_alternatives[1].capacity = 1; inner_alternatives[1].contents = inner_terms1; regex_term_t *terms = malloc(2 * sizeof(regex_term_t)); terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].type = REGEX_TERM_SUBEXPR; terms[0].subexpr.count = terms[0].subexpr.capacity = 2; terms[0].subexpr.contents = inner_alternatives; terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].type = REGEX_TERM_LITERAL; terms[1].literal = 'b'; regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); alternatives[0].count = alternatives[0].capacity = 2; alternatives[0].contents = terms; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; fsa_t fsa; construct_nfa(®ex, &fsa); ASSERT_TRUE(accepts(&fsa, "ab")); ASSERT_TRUE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "")); ASSERT_FALSE(accepts(&fsa, "a")); regex_free(®ex); fsa_free(&fsa); } int main(void) { TESTING_BEGIN(); // Base cases test_empty_expression(); test_literal_expression(); test_wildcard(); test_sequence(); test_union(); test_star(); test_subexpression(); test_class(); test_negated_class(); // Compound expressions test_sequence_containing_starred_union(); test_union_of_single_term_and_sequence_containing_starred_term(); test_sequence_of_subexpr_a_or_empty_and_b(); return TESTING_END(); }