Rename construct to construct_nfa and improve tests

This commit is contained in:
Camden Dixie O'Brien 2024-10-27 17:57:13 +00:00
parent 55e4e4f5ee
commit d35fa4e9da
3 changed files with 62 additions and 73 deletions

View File

@ -143,7 +143,7 @@ static void construct_term(const regex_term_t *term, fsa_t *out)
construct_base(out, term->literal); construct_base(out, term->literal);
break; break;
case REGEX_TERM_SUBEXPR: case REGEX_TERM_SUBEXPR:
construct(&term->subexpr, out); construct_nfa(&term->subexpr, out);
break; break;
case REGEX_TERM_WILDCARD: case REGEX_TERM_WILDCARD:
case REGEX_TERM_CLASS: case REGEX_TERM_CLASS:
@ -199,7 +199,7 @@ static void construct_union(fsa_t *f, const fsa_t *o)
fsa_add_rule(f, final, 0, EPSILON); fsa_add_rule(f, final, 0, EPSILON);
} }
void construct(const regex_t *regex, fsa_t *out) void construct_nfa(const regex_t *regex, fsa_t *out)
{ {
assert(regex->count > 0); assert(regex->count > 0);

View File

@ -9,6 +9,6 @@
#include "fsa.h" #include "fsa.h"
#include "regex.h" #include "regex.h"
void construct(const regex_t *regex, fsa_t *out); void construct_nfa(const regex_t *regex, fsa_t *out);
#endif #endif

View File

@ -6,8 +6,8 @@
#include "construct.h" #include "construct.h"
#include "testing.h" #include "testing.h"
static const char * static bool
match_from_state(const fsa_t *nfa, int state_id, const char *input) accepts_from_state(const fsa_t *nfa, int state_id, const char *input)
{ {
const fsa_state_t *state = &nfa->states[state_id]; const fsa_state_t *state = &nfa->states[state_id];
@ -15,25 +15,21 @@ match_from_state(const fsa_t *nfa, int state_id, const char *input)
const bool end_of_input = '\0' == *input; const bool end_of_input = '\0' == *input;
for (int i = 0; i < state->count; ++i) { for (int i = 0; i < state->count; ++i) {
if ((!final || !end_of_input) && EPSILON == state->rules[i].input) { if ((!final || !end_of_input) && EPSILON == state->rules[i].input) {
const char *s if (accepts_from_state(nfa, state->rules[i].next, input))
= match_from_state(nfa, state->rules[i].next, input); return true;
if (NULL != s)
return s;
} }
if (!end_of_input && *input == state->rules[i].input) { if (!end_of_input && *input == state->rules[i].input) {
const char *s if (accepts_from_state(nfa, state->rules[i].next, input + 1))
= match_from_state(nfa, state->rules[i].next, input + 1); return true;
if (NULL != s)
return s;
} }
} }
return final ? input : NULL; return final && end_of_input;
} }
static const char *match(const fsa_t *nfa, const char *input) static bool accepts(const fsa_t *nfa, const char *input)
{ {
return match_from_state(nfa, nfa->initial, input); return accepts_from_state(nfa, nfa->initial, input);
} }
static void test_empty_expression(void) static void test_empty_expression(void)
@ -48,9 +44,10 @@ static void test_empty_expression(void)
= { .count = 1, .capacity = 1, .contents = alternatives }; = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct(&regex, &fsa); construct_nfa(&regex, &fsa);
ASSERT_NOT_NULL(match(&fsa, "")); ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a"));
regex_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
@ -69,10 +66,10 @@ static void test_literal_expression(void)
= { .count = 1, .capacity = 1, .contents = alternatives }; = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct(&regex, &fsa); construct_nfa(&regex, &fsa);
ASSERT_NOT_NULL(match(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_NULL(match(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
@ -81,10 +78,13 @@ static void test_literal_expression(void)
static void test_sequence(void) static void test_sequence(void)
{ {
regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
terms[1].quantifier = REGEX_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL; terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
terms[2].quantifier = REGEX_QUANTIFIER_NONE;
terms[2].type = REGEX_TERM_LITERAL; terms[2].type = REGEX_TERM_LITERAL;
terms[2].literal = 'c'; terms[2].literal = 'c';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
@ -93,16 +93,13 @@ static void test_sequence(void)
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct(&regex, &fsa); construct_nfa(&regex, &fsa);
ASSERT_NOT_NULL(match(&fsa, "abc")); ASSERT_TRUE(accepts(&fsa, "abc"));
ASSERT_NULL(match(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "a"));
ASSERT_NULL(match(&fsa, "ab")); ASSERT_FALSE(accepts(&fsa, "ab"));
ASSERT_NULL(match(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "abcd"));
const char *s = "abcd";
const char *t = match(&fsa, s);
ASSERT_EQ(s + 3, t);
regex_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
@ -124,16 +121,13 @@ static void test_union(void)
regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives }; regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct(&regex, &fsa); construct_nfa(&regex, &fsa);
ASSERT_NOT_NULL(match(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_NOT_NULL(match(&fsa, "b")); ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_NOT_NULL(match(&fsa, "c")); ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_NULL(match(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "aa"));
const char *s = "aa";
const char *t = match(&fsa, s);
ASSERT_EQ(s + 1, t);
regex_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
@ -151,15 +145,12 @@ static void test_star(void)
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct(&regex, &fsa); construct_nfa(&regex, &fsa);
ASSERT_NOT_NULL(match(&fsa, "")); ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_NOT_NULL(match(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_NOT_NULL(match(&fsa, "aaaaaa")); ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
ASSERT_FALSE(accepts(&fsa, "b"));
const char *s = "b";
const char *t = match(&fsa, s);
ASSERT_EQ(s, t);
regex_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
@ -186,10 +177,10 @@ static void test_subexpression(void)
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct(&regex, &fsa); construct_nfa(&regex, &fsa);
ASSERT_NOT_NULL(match(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_NULL(match(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
@ -219,28 +210,28 @@ static void test_sequence_containing_starred_union(void)
terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].quantifier = REGEX_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL; terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
terms[2].quantifier = REGEX_QUANTIFIER_NONE; terms[2].quantifier = REGEX_QUANTIFIER_STAR;
terms[2].type = REGEX_TERM_SUBEXPR; terms[2].type = REGEX_TERM_SUBEXPR;
terms[2].subexpr.count = terms[2].subexpr.capacity = 2; terms[2].subexpr.count = terms[2].subexpr.capacity = 2;
terms[2].subexpr.contents = inner_alternatives; terms[2].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct(&regex, &fsa); construct_nfa(&regex, &fsa);
ASSERT_NOT_NULL(match(&fsa, "ab")); ASSERT_TRUE(accepts(&fsa, "ab"));
ASSERT_NOT_NULL(match(&fsa, "abc")); ASSERT_TRUE(accepts(&fsa, "abc"));
ASSERT_NOT_NULL(match(&fsa, "abccc")); ASSERT_TRUE(accepts(&fsa, "abccc"));
ASSERT_NOT_NULL(match(&fsa, "abd")); ASSERT_TRUE(accepts(&fsa, "abd"));
ASSERT_NOT_NULL(match(&fsa, "abddd")); ASSERT_TRUE(accepts(&fsa, "abddd"));
ASSERT_NOT_NULL(match(&fsa, "abcddcc")); ASSERT_TRUE(accepts(&fsa, "abcddcc"));
ASSERT_NOT_NULL(match(&fsa, "abddccd")); ASSERT_TRUE(accepts(&fsa, "abddccd"));
ASSERT_NULL(match(&fsa, "c")); ASSERT_FALSE(accepts(&fsa, "c"));
ASSERT_NULL(match(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_NULL(match(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "foo"));
regex_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
@ -249,6 +240,7 @@ static void test_sequence_containing_starred_union(void)
static void static void
test_union_of_single_term_and_sequence_containing_starred_term(void) test_union_of_single_term_and_sequence_containing_starred_term(void)
{ {
// a|b*c
regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t)); regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t));
terms0[0].quantifier = REGEX_QUANTIFIER_NONE; terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
terms0[0].type = REGEX_TERM_LITERAL; terms0[0].type = REGEX_TERM_LITERAL;
@ -268,17 +260,14 @@ test_union_of_single_term_and_sequence_containing_starred_term(void)
regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives }; regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct(&regex, &fsa); construct_nfa(&regex, &fsa);
ASSERT_NOT_NULL(match(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_NOT_NULL(match(&fsa, "c")); ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_NOT_NULL(match(&fsa, "bc")); ASSERT_TRUE(accepts(&fsa, "bc"));
ASSERT_NOT_NULL(match(&fsa, "bbbbbc")); ASSERT_TRUE(accepts(&fsa, "bbbbbc"));
ASSERT_NULL(match(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "foo"));
ASSERT_FALSE(accepts(&fsa, "ba"));
const char *s = "ba";
const char *t = match(&fsa, s);
ASSERT_EQ(s + 1, t);
regex_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);