From 2804638d8440f1d8983077124b82458b0c503268 Mon Sep 17 00:00:00 2001 From: Camden Dixie O'Brien Date: Sat, 26 Oct 2024 23:32:04 +0100 Subject: [PATCH] Support sequences in construct() --- lib/construct.c | 42 +++++++++++++++++++++++++++++++- tests/construct_tests.c | 53 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/lib/construct.c b/lib/construct.c index e40956a..e54bcc6 100644 --- a/lib/construct.c +++ b/lib/construct.c @@ -7,6 +7,7 @@ #include #include +#include static void construct_literal(char literal, fsa_t *out) { @@ -34,9 +35,48 @@ static void construct_term(const regex_term_t *term, fsa_t *out) } } +static void concat_fsas(fsa_t *base, const fsa_t *other) +{ + const int new_count = base->count + other->count - 1; + if (base->capacity < new_count) { + do + base->capacity *= 2; + while (base->capacity < new_count); + base->states = realloc(base->states, base->capacity); + assert(base->states); + } + + const int copy_size = (other->count - 1) * sizeof(fsa_state_t); + memcpy(&base->states[base->count], &other->states[1], copy_size); + + const int id_offset = base->count - 1; + for (int i = base->count; i < new_count; ++i) { + fsa_state_t *state = &base->states[i]; + for (int j = 0; j < state->count; ++j) { + if (0 == state->rules[j].next) + state->rules[j].next = base->initial; + else + state->rules[j].next += id_offset; + } + } + + base->count = new_count; + base->initial = other->initial + id_offset; + + free(other->states[0].rules); + free(other->states); +} + static void construct_sequence(const regex_sequence_t *seq, fsa_t *out) { - construct_term(&seq->contents[0], out); + assert(seq->count > 0); + + fsa_t term_fsa; + construct_term(&seq->contents[seq->count - 1], out); + for (int i = seq->count - 2; i >= 0; --i) { + construct_term(&seq->contents[i], &term_fsa); + concat_fsas(out, &term_fsa); + } } void construct(const regex_t *regex, fsa_t *out) diff --git a/tests/construct_tests.c b/tests/construct_tests.c index 461ce76..83199a0 100644 --- a/tests/construct_tests.c +++ b/tests/construct_tests.c @@ -47,8 +47,56 @@ static void test_literal_expression(void) ASSERT_EQ(2, fsa.count); ASSERT_EQ(1, initial->count); ASSERT_EQ('a', initial->rules[0].input); - ASSERT_TRUE(fsa.states[initial->rules[0].next].final); - ASSERT_EQ(0, fsa.states[initial->rules[0].next].count); + + const int next = initial->rules[0].next; + ASSERT_TRUE(fsa.states[next].final); + ASSERT_EQ(0, fsa.states[next].count); + + regex_free(®ex); + fsa_free(&fsa); +} + +static void test_sequence(void) +{ + regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); + terms[0].type = REGEX_TERM_LITERAL; + terms[0].literal = 'a'; + terms[1].type = REGEX_TERM_LITERAL; + terms[1].literal = 'b'; + terms[2].type = REGEX_TERM_LITERAL; + terms[2].literal = 'c'; + regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); + alternatives[0].count = alternatives[0].capacity = 3; + alternatives[0].contents = terms; + regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; + + fsa_t fsa; + construct(®ex, &fsa); + + int next = fsa.initial; + const fsa_state_t *state; + + state = &fsa.states[next]; + ASSERT_FALSE(state->final); + ASSERT_EQ(1, state->count); + ASSERT_EQ('a', state->rules[0].input); + next = state->rules[0].next; + + state = &fsa.states[next]; + ASSERT_FALSE(state->final); + ASSERT_EQ(1, state->count); + ASSERT_EQ('b', state->rules[0].input); + next = state->rules[0].next; + + state = &fsa.states[next]; + ASSERT_FALSE(state->final); + ASSERT_EQ(1, state->count); + ASSERT_EQ('c', state->rules[0].input); + next = state->rules[0].next; + + state = &fsa.states[next]; + ASSERT_TRUE(state->final); + ASSERT_EQ(0, state->count); regex_free(®ex); fsa_free(&fsa); @@ -59,5 +107,6 @@ int main(void) TESTING_BEGIN(); test_empty_expression(); test_literal_expression(); + test_sequence(); return TESTING_END(); }