regex-engine/lib/construct.c
Camden Dixie O'Brien 007cddc292 Add some runtime assertions for state zero being final
I'm using this invariant to save work finding the final state so it
makes sense to test it.
2024-10-27 01:24:43 +01:00

92 lines
2.0 KiB
C

/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "construct.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
static void construct_literal(char literal, fsa_t *out)
{
fsa_init(out);
const int id = fsa_add_state(out);
fsa_add_rule(out, id, out->initial, literal);
out->initial = id;
}
static void construct_term(const regex_term_t *term, fsa_t *out)
{
switch (term->type) {
case REGEX_TERM_EMPTY:
fsa_init(out);
break;
case REGEX_TERM_LITERAL:
construct_literal(term->literal, out);
break;
case REGEX_TERM_SUBEXPR:
return;
case REGEX_TERM_WILDCARD:
case REGEX_TERM_CLASS:
assert(false);
}
assert(out->states[0].final);
}
static void concat_fsas(fsa_t *base, const fsa_t *other)
{
const int new_count = base->count + other->count - 1;
if (base->capacity < new_count) {
do
base->capacity *= 2;
while (base->capacity < new_count);
base->states = realloc(base->states, base->capacity);
assert(base->states);
}
const int copy_size = (other->count - 1) * sizeof(fsa_state_t);
memcpy(&base->states[base->count], &other->states[1], copy_size);
const int id_offset = base->count - 1;
for (int i = base->count; i < new_count; ++i) {
fsa_state_t *state = &base->states[i];
for (int j = 0; j < state->count; ++j) {
if (0 == state->rules[j].next)
state->rules[j].next = base->initial;
else
state->rules[j].next += id_offset;
}
}
base->count = new_count;
base->initial = other->initial + id_offset;
free(other->states[0].rules);
free(other->states);
assert(base->states[0].final);
}
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
{
assert(seq->count > 0);
fsa_t term_fsa;
construct_term(&seq->contents[seq->count - 1], out);
for (int i = seq->count - 2; i >= 0; --i) {
construct_term(&seq->contents[i], &term_fsa);
concat_fsas(out, &term_fsa);
}
assert(out->states[0].final);
}
void construct(const regex_t *regex, fsa_t *out)
{
construct_sequence(&regex->contents[0], out);
}