I'm using this invariant to save work finding the final state so it makes sense to test it.
92 lines
2.0 KiB
C
92 lines
2.0 KiB
C
/*
|
|
* Copyright (c) Camden Dixie O'Brien
|
|
* SPDX-License-Identifier: AGPL-3.0-only
|
|
*/
|
|
|
|
#include "construct.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
static void construct_literal(char literal, fsa_t *out)
|
|
{
|
|
fsa_init(out);
|
|
const int id = fsa_add_state(out);
|
|
fsa_add_rule(out, id, out->initial, literal);
|
|
out->initial = id;
|
|
}
|
|
|
|
static void construct_term(const regex_term_t *term, fsa_t *out)
|
|
{
|
|
switch (term->type) {
|
|
case REGEX_TERM_EMPTY:
|
|
fsa_init(out);
|
|
break;
|
|
case REGEX_TERM_LITERAL:
|
|
construct_literal(term->literal, out);
|
|
break;
|
|
case REGEX_TERM_SUBEXPR:
|
|
return;
|
|
|
|
case REGEX_TERM_WILDCARD:
|
|
case REGEX_TERM_CLASS:
|
|
assert(false);
|
|
}
|
|
|
|
assert(out->states[0].final);
|
|
}
|
|
|
|
static void concat_fsas(fsa_t *base, const fsa_t *other)
|
|
{
|
|
const int new_count = base->count + other->count - 1;
|
|
if (base->capacity < new_count) {
|
|
do
|
|
base->capacity *= 2;
|
|
while (base->capacity < new_count);
|
|
base->states = realloc(base->states, base->capacity);
|
|
assert(base->states);
|
|
}
|
|
|
|
const int copy_size = (other->count - 1) * sizeof(fsa_state_t);
|
|
memcpy(&base->states[base->count], &other->states[1], copy_size);
|
|
|
|
const int id_offset = base->count - 1;
|
|
for (int i = base->count; i < new_count; ++i) {
|
|
fsa_state_t *state = &base->states[i];
|
|
for (int j = 0; j < state->count; ++j) {
|
|
if (0 == state->rules[j].next)
|
|
state->rules[j].next = base->initial;
|
|
else
|
|
state->rules[j].next += id_offset;
|
|
}
|
|
}
|
|
|
|
base->count = new_count;
|
|
base->initial = other->initial + id_offset;
|
|
|
|
free(other->states[0].rules);
|
|
free(other->states);
|
|
|
|
assert(base->states[0].final);
|
|
}
|
|
|
|
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
|
|
{
|
|
assert(seq->count > 0);
|
|
|
|
fsa_t term_fsa;
|
|
construct_term(&seq->contents[seq->count - 1], out);
|
|
for (int i = seq->count - 2; i >= 0; --i) {
|
|
construct_term(&seq->contents[i], &term_fsa);
|
|
concat_fsas(out, &term_fsa);
|
|
}
|
|
|
|
assert(out->states[0].final);
|
|
}
|
|
|
|
void construct(const regex_t *regex, fsa_t *out)
|
|
{
|
|
construct_sequence(®ex->contents[0], out);
|
|
}
|