Implement union construction
This commit is contained in:
parent
afd4cf928c
commit
852102cab9
@ -85,7 +85,103 @@ static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
|
||||
assert(out->states[0].final);
|
||||
}
|
||||
|
||||
static void retarget_merged_rules(
|
||||
fsa_rule_t *rules, int rules_count, int initial, int base_initial,
|
||||
int base_count)
|
||||
{
|
||||
for (int i = 0; i < rules_count; ++i) {
|
||||
if (0 == rules[i].next)
|
||||
continue;
|
||||
|
||||
// IDs less than initial have to be offset by one less than
|
||||
// base_count because the final state (ID zero) is not copied.
|
||||
// If they are greater it's two less as the initial state is
|
||||
// also not copied. Finally, if the target is the initial
|
||||
// state then it should be changed to the base's initial
|
||||
// state.
|
||||
if (rules[i].next < initial)
|
||||
rules[i].next += base_count - 1;
|
||||
else if (rules[i].next > initial)
|
||||
rules[i].next += base_count - 2;
|
||||
else
|
||||
rules[i].next = base_initial;
|
||||
}
|
||||
}
|
||||
|
||||
static void merge_fsas(fsa_t *base, const fsa_t *other)
|
||||
{
|
||||
const int new_count = base->count + other->count - 2;
|
||||
if (base->capacity < new_count) {
|
||||
do
|
||||
base->capacity *= 2;
|
||||
while (base->capacity < new_count);
|
||||
base->states
|
||||
= realloc(base->states, base->capacity * sizeof(fsa_state_t));
|
||||
assert(base->states);
|
||||
}
|
||||
|
||||
// Copy rules from the other's initial state into the base's, then
|
||||
// retarget them.
|
||||
fsa_state_t *initial = &base->states[base->initial];
|
||||
const fsa_state_t *other_initial = &other->states[other->initial];
|
||||
const int new_rule_count = initial->count + other_initial->count;
|
||||
if (initial->capacity < new_rule_count) {
|
||||
do
|
||||
initial->capacity *= 2;
|
||||
while (initial->capacity < new_rule_count);
|
||||
initial->rules = realloc(
|
||||
initial->rules, initial->capacity * sizeof(fsa_rule_t));
|
||||
assert(initial->rules);
|
||||
}
|
||||
memcpy(
|
||||
&initial->rules[initial->count], other_initial->rules,
|
||||
other_initial->count * sizeof(fsa_rule_t));
|
||||
retarget_merged_rules(
|
||||
&initial->rules[initial->count], other_initial->count,
|
||||
other->initial, base->initial, base->count);
|
||||
initial->count = new_rule_count;
|
||||
|
||||
// Copy other states, skipping the initial state, then retarget
|
||||
// their rules.
|
||||
int offset = base->count;
|
||||
if (1 < other->initial) {
|
||||
const int copy_count = other->initial - 1;
|
||||
const int copy_size = copy_count * sizeof(fsa_state_t);
|
||||
memcpy(&base->states[offset], &other->states[1], copy_size);
|
||||
offset += copy_count;
|
||||
}
|
||||
if (other->initial < other->count - 1) {
|
||||
const int copy_count = other->count - other->initial - 1;
|
||||
const int copy_size = copy_count * sizeof(fsa_state_t);
|
||||
memcpy(
|
||||
&base->states[offset], &other->states[other->initial],
|
||||
copy_size);
|
||||
}
|
||||
for (int i = base->count; i < new_count; ++i) {
|
||||
retarget_merged_rules(
|
||||
base->states[i].rules, base->states[i].count, other->initial,
|
||||
base->initial, base->count);
|
||||
}
|
||||
base->count = new_count;
|
||||
|
||||
free(other->states[0].rules);
|
||||
free(other->states[other->initial].rules);
|
||||
free(other->states);
|
||||
|
||||
assert(base->states[0].final);
|
||||
}
|
||||
|
||||
void construct(const regex_t *regex, fsa_t *out)
|
||||
{
|
||||
assert(regex->count > 0);
|
||||
|
||||
fsa_t sequence_fsa;
|
||||
construct_sequence(®ex->contents[0], out);
|
||||
for (int i = 1; i < regex->count; ++i) {
|
||||
construct_sequence(®ex->contents[i], &sequence_fsa);
|
||||
merge_fsas(out, &sequence_fsa);
|
||||
}
|
||||
|
||||
assert(out->initial == out->count - 1);
|
||||
assert(out->states[0].final);
|
||||
}
|
||||
|
@ -102,11 +102,42 @@ static void test_sequence(void)
|
||||
fsa_free(&fsa);
|
||||
}
|
||||
|
||||
static void test_union(void)
|
||||
{
|
||||
const char *literals = "abc";
|
||||
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
||||
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
||||
terms[0].type = REGEX_TERM_LITERAL;
|
||||
terms[0].literal = literals[i];
|
||||
|
||||
alternatives[i].count = alternatives[i].capacity = 1;
|
||||
alternatives[i].contents = terms;
|
||||
}
|
||||
regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives };
|
||||
|
||||
fsa_t fsa;
|
||||
construct(®ex, &fsa);
|
||||
|
||||
const fsa_state_t *initial = &fsa.states[fsa.initial];
|
||||
ASSERT_EQ(3, initial->count);
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
ASSERT_EQ(literals[i], initial->rules[i].input);
|
||||
const int next = initial->rules[i].next;
|
||||
ASSERT_TRUE(fsa.states[next].final);
|
||||
}
|
||||
|
||||
regex_free(®ex);
|
||||
fsa_free(&fsa);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
TESTING_BEGIN();
|
||||
test_empty_expression();
|
||||
test_literal_expression();
|
||||
test_sequence();
|
||||
test_union();
|
||||
return TESTING_END();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user