Implement union construction

This commit is contained in:
Camden Dixie O'Brien 2024-10-27 01:50:35 +00:00
parent afd4cf928c
commit 852102cab9
2 changed files with 127 additions and 0 deletions

View File

@ -85,7 +85,103 @@ static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
assert(out->states[0].final); assert(out->states[0].final);
} }
static void retarget_merged_rules(
fsa_rule_t *rules, int rules_count, int initial, int base_initial,
int base_count)
{
for (int i = 0; i < rules_count; ++i) {
if (0 == rules[i].next)
continue;
// IDs less than initial have to be offset by one less than
// base_count because the final state (ID zero) is not copied.
// If they are greater it's two less as the initial state is
// also not copied. Finally, if the target is the initial
// state then it should be changed to the base's initial
// state.
if (rules[i].next < initial)
rules[i].next += base_count - 1;
else if (rules[i].next > initial)
rules[i].next += base_count - 2;
else
rules[i].next = base_initial;
}
}
static void merge_fsas(fsa_t *base, const fsa_t *other)
{
const int new_count = base->count + other->count - 2;
if (base->capacity < new_count) {
do
base->capacity *= 2;
while (base->capacity < new_count);
base->states
= realloc(base->states, base->capacity * sizeof(fsa_state_t));
assert(base->states);
}
// Copy rules from the other's initial state into the base's, then
// retarget them.
fsa_state_t *initial = &base->states[base->initial];
const fsa_state_t *other_initial = &other->states[other->initial];
const int new_rule_count = initial->count + other_initial->count;
if (initial->capacity < new_rule_count) {
do
initial->capacity *= 2;
while (initial->capacity < new_rule_count);
initial->rules = realloc(
initial->rules, initial->capacity * sizeof(fsa_rule_t));
assert(initial->rules);
}
memcpy(
&initial->rules[initial->count], other_initial->rules,
other_initial->count * sizeof(fsa_rule_t));
retarget_merged_rules(
&initial->rules[initial->count], other_initial->count,
other->initial, base->initial, base->count);
initial->count = new_rule_count;
// Copy other states, skipping the initial state, then retarget
// their rules.
int offset = base->count;
if (1 < other->initial) {
const int copy_count = other->initial - 1;
const int copy_size = copy_count * sizeof(fsa_state_t);
memcpy(&base->states[offset], &other->states[1], copy_size);
offset += copy_count;
}
if (other->initial < other->count - 1) {
const int copy_count = other->count - other->initial - 1;
const int copy_size = copy_count * sizeof(fsa_state_t);
memcpy(
&base->states[offset], &other->states[other->initial],
copy_size);
}
for (int i = base->count; i < new_count; ++i) {
retarget_merged_rules(
base->states[i].rules, base->states[i].count, other->initial,
base->initial, base->count);
}
base->count = new_count;
free(other->states[0].rules);
free(other->states[other->initial].rules);
free(other->states);
assert(base->states[0].final);
}
void construct(const regex_t *regex, fsa_t *out) void construct(const regex_t *regex, fsa_t *out)
{ {
assert(regex->count > 0);
fsa_t sequence_fsa;
construct_sequence(&regex->contents[0], out); construct_sequence(&regex->contents[0], out);
for (int i = 1; i < regex->count; ++i) {
construct_sequence(&regex->contents[i], &sequence_fsa);
merge_fsas(out, &sequence_fsa);
}
assert(out->initial == out->count - 1);
assert(out->states[0].final);
} }

View File

@ -102,11 +102,42 @@ static void test_sequence(void)
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_union(void)
{
const char *literals = "abc";
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
for (int i = 0; i < 3; ++i) {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives };
fsa_t fsa;
construct(&regex, &fsa);
const fsa_state_t *initial = &fsa.states[fsa.initial];
ASSERT_EQ(3, initial->count);
for (int i = 0; i < 3; ++i) {
ASSERT_EQ(literals[i], initial->rules[i].input);
const int next = initial->rules[i].next;
ASSERT_TRUE(fsa.states[next].final);
}
regex_free(&regex);
fsa_free(&fsa);
}
int main(void) int main(void)
{ {
TESTING_BEGIN(); TESTING_BEGIN();
test_empty_expression(); test_empty_expression();
test_literal_expression(); test_literal_expression();
test_sequence(); test_sequence();
test_union();
return TESTING_END(); return TESTING_END();
} }