From bbecd12c91cc45bcf044f4d4efb90cdf8ac6d49f Mon Sep 17 00:00:00 2001 From: Camden Dixie O'Brien Date: Sun, 27 Oct 2024 13:24:29 +0000 Subject: [PATCH] Implement star construction --- lib/construct.c | 61 ++++++++++++++++++++++++++++++++++++++++- tests/construct_tests.c | 25 +++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/lib/construct.c b/lib/construct.c index 7f79b15..f66e764 100644 --- a/lib/construct.c +++ b/lib/construct.c @@ -17,6 +17,53 @@ static void construct_literal(char literal, fsa_t *out) out->initial = id; } +static void star_fsa(fsa_t *fsa) +{ + // If the initial state is already the final state then nothing + // needs to be done. + if (0 == fsa->initial) + return; + + // Copy inital state's rules to final state. + fsa_state_t *final = &fsa->states[0]; + const fsa_state_t *initial = &fsa->states[fsa->initial]; + if (final->capacity < final->count + initial->count) { + do + final->capacity *= 2; + while (final->capacity < final->count + initial->count); + final->rules + = realloc(final->rules, final->capacity * sizeof(fsa_rule_t)); + assert(final->rules); + } + const int copy_size = initial->count * sizeof(fsa_rule_t); + memcpy(&final->rules[final->count], initial->rules, copy_size); + final->count += initial->count; + + // Move states that come after initial state if there are any. + if (fsa->count - 1 > fsa->initial) { + const int count = fsa->count - fsa->initial - 1; + fsa_state_t *start = &fsa->states[fsa->initial]; + memmove(start, start + 1, count * sizeof(fsa_state_t)); + } + + // Retarget all states' rules. + for (int i = 0; i < fsa->count - 1; ++i) { + for (int j = 0; j < fsa->states[i].count; ++j) { + if (fsa->states[i].rules[j].next == fsa->initial) + fsa->states[i].rules[j].next = 0; + else if (fsa->states[i].rules[j].next > fsa->initial) + // All states after the initial state have been moved + // down by one position. + --fsa->states[i].rules[j].next; + } + } + + --fsa->count; + fsa->initial = 0; + + free(initial->rules); +} + static void construct_term(const regex_term_t *term, fsa_t *out) { switch (term->type) { @@ -28,10 +75,22 @@ static void construct_term(const regex_term_t *term, fsa_t *out) break; case REGEX_TERM_SUBEXPR: return; - case REGEX_TERM_WILDCARD: case REGEX_TERM_CLASS: assert(false); + break; + } + + switch (term->quantifier) { + case REGEX_QUANTIFIER_NONE: + break; + case REGEX_QUANTIFIER_STAR: + star_fsa(out); + break; + case REGEX_QUANTIFIER_PLUS: + case REGEX_QUANTIFIER_QMARK: + assert(false); + break; } assert(out->states[0].final); diff --git a/tests/construct_tests.c b/tests/construct_tests.c index 304cbc9..d81891b 100644 --- a/tests/construct_tests.c +++ b/tests/construct_tests.c @@ -132,6 +132,30 @@ static void test_union(void) fsa_free(&fsa); } +static void test_star(void) +{ + regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); + terms[0].quantifier = REGEX_QUANTIFIER_STAR; + terms[0].type = REGEX_TERM_LITERAL; + terms[0].literal = 'a'; + regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); + alternatives[0].count = alternatives[0].capacity = 1; + alternatives[0].contents = terms; + regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; + + fsa_t fsa; + construct(®ex, &fsa); + + const fsa_state_t *initial = &fsa.states[fsa.initial]; + ASSERT_TRUE(initial->final); + ASSERT_EQ(1, initial->count); + ASSERT_EQ('a', initial->rules[0].input); + ASSERT_EQ(fsa.initial, initial->rules[0].next); + + regex_free(®ex); + fsa_free(&fsa); +} + int main(void) { TESTING_BEGIN(); @@ -139,5 +163,6 @@ int main(void) test_literal_expression(); test_sequence(); test_union(); + test_star(); return TESTING_END(); }