Implement union construction
This commit is contained in:
parent
afd4cf928c
commit
852102cab9
@ -85,7 +85,103 @@ static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
|
|||||||
assert(out->states[0].final);
|
assert(out->states[0].final);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void retarget_merged_rules(
|
||||||
|
fsa_rule_t *rules, int rules_count, int initial, int base_initial,
|
||||||
|
int base_count)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < rules_count; ++i) {
|
||||||
|
if (0 == rules[i].next)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// IDs less than initial have to be offset by one less than
|
||||||
|
// base_count because the final state (ID zero) is not copied.
|
||||||
|
// If they are greater it's two less as the initial state is
|
||||||
|
// also not copied. Finally, if the target is the initial
|
||||||
|
// state then it should be changed to the base's initial
|
||||||
|
// state.
|
||||||
|
if (rules[i].next < initial)
|
||||||
|
rules[i].next += base_count - 1;
|
||||||
|
else if (rules[i].next > initial)
|
||||||
|
rules[i].next += base_count - 2;
|
||||||
|
else
|
||||||
|
rules[i].next = base_initial;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void merge_fsas(fsa_t *base, const fsa_t *other)
|
||||||
|
{
|
||||||
|
const int new_count = base->count + other->count - 2;
|
||||||
|
if (base->capacity < new_count) {
|
||||||
|
do
|
||||||
|
base->capacity *= 2;
|
||||||
|
while (base->capacity < new_count);
|
||||||
|
base->states
|
||||||
|
= realloc(base->states, base->capacity * sizeof(fsa_state_t));
|
||||||
|
assert(base->states);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy rules from the other's initial state into the base's, then
|
||||||
|
// retarget them.
|
||||||
|
fsa_state_t *initial = &base->states[base->initial];
|
||||||
|
const fsa_state_t *other_initial = &other->states[other->initial];
|
||||||
|
const int new_rule_count = initial->count + other_initial->count;
|
||||||
|
if (initial->capacity < new_rule_count) {
|
||||||
|
do
|
||||||
|
initial->capacity *= 2;
|
||||||
|
while (initial->capacity < new_rule_count);
|
||||||
|
initial->rules = realloc(
|
||||||
|
initial->rules, initial->capacity * sizeof(fsa_rule_t));
|
||||||
|
assert(initial->rules);
|
||||||
|
}
|
||||||
|
memcpy(
|
||||||
|
&initial->rules[initial->count], other_initial->rules,
|
||||||
|
other_initial->count * sizeof(fsa_rule_t));
|
||||||
|
retarget_merged_rules(
|
||||||
|
&initial->rules[initial->count], other_initial->count,
|
||||||
|
other->initial, base->initial, base->count);
|
||||||
|
initial->count = new_rule_count;
|
||||||
|
|
||||||
|
// Copy other states, skipping the initial state, then retarget
|
||||||
|
// their rules.
|
||||||
|
int offset = base->count;
|
||||||
|
if (1 < other->initial) {
|
||||||
|
const int copy_count = other->initial - 1;
|
||||||
|
const int copy_size = copy_count * sizeof(fsa_state_t);
|
||||||
|
memcpy(&base->states[offset], &other->states[1], copy_size);
|
||||||
|
offset += copy_count;
|
||||||
|
}
|
||||||
|
if (other->initial < other->count - 1) {
|
||||||
|
const int copy_count = other->count - other->initial - 1;
|
||||||
|
const int copy_size = copy_count * sizeof(fsa_state_t);
|
||||||
|
memcpy(
|
||||||
|
&base->states[offset], &other->states[other->initial],
|
||||||
|
copy_size);
|
||||||
|
}
|
||||||
|
for (int i = base->count; i < new_count; ++i) {
|
||||||
|
retarget_merged_rules(
|
||||||
|
base->states[i].rules, base->states[i].count, other->initial,
|
||||||
|
base->initial, base->count);
|
||||||
|
}
|
||||||
|
base->count = new_count;
|
||||||
|
|
||||||
|
free(other->states[0].rules);
|
||||||
|
free(other->states[other->initial].rules);
|
||||||
|
free(other->states);
|
||||||
|
|
||||||
|
assert(base->states[0].final);
|
||||||
|
}
|
||||||
|
|
||||||
void construct(const regex_t *regex, fsa_t *out)
|
void construct(const regex_t *regex, fsa_t *out)
|
||||||
{
|
{
|
||||||
|
assert(regex->count > 0);
|
||||||
|
|
||||||
|
fsa_t sequence_fsa;
|
||||||
construct_sequence(®ex->contents[0], out);
|
construct_sequence(®ex->contents[0], out);
|
||||||
|
for (int i = 1; i < regex->count; ++i) {
|
||||||
|
construct_sequence(®ex->contents[i], &sequence_fsa);
|
||||||
|
merge_fsas(out, &sequence_fsa);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(out->initial == out->count - 1);
|
||||||
|
assert(out->states[0].final);
|
||||||
}
|
}
|
||||||
|
@ -102,11 +102,42 @@ static void test_sequence(void)
|
|||||||
fsa_free(&fsa);
|
fsa_free(&fsa);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_union(void)
|
||||||
|
{
|
||||||
|
const char *literals = "abc";
|
||||||
|
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
||||||
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
terms[0].type = REGEX_TERM_LITERAL;
|
||||||
|
terms[0].literal = literals[i];
|
||||||
|
|
||||||
|
alternatives[i].count = alternatives[i].capacity = 1;
|
||||||
|
alternatives[i].contents = terms;
|
||||||
|
}
|
||||||
|
regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives };
|
||||||
|
|
||||||
|
fsa_t fsa;
|
||||||
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
|
const fsa_state_t *initial = &fsa.states[fsa.initial];
|
||||||
|
ASSERT_EQ(3, initial->count);
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
ASSERT_EQ(literals[i], initial->rules[i].input);
|
||||||
|
const int next = initial->rules[i].next;
|
||||||
|
ASSERT_TRUE(fsa.states[next].final);
|
||||||
|
}
|
||||||
|
|
||||||
|
regex_free(®ex);
|
||||||
|
fsa_free(&fsa);
|
||||||
|
}
|
||||||
|
|
||||||
int main(void)
|
int main(void)
|
||||||
{
|
{
|
||||||
TESTING_BEGIN();
|
TESTING_BEGIN();
|
||||||
test_empty_expression();
|
test_empty_expression();
|
||||||
test_literal_expression();
|
test_literal_expression();
|
||||||
test_sequence();
|
test_sequence();
|
||||||
|
test_union();
|
||||||
return TESTING_END();
|
return TESTING_END();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user