277 lines
6.6 KiB
C
277 lines
6.6 KiB
C
/*
|
|
* Copyright (c) Camden Dixie O'Brien
|
|
* SPDX-License-Identifier: AGPL-3.0-only
|
|
*/
|
|
|
|
#include "construct.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
static void add_fsa(fsa_t *f, const fsa_t *o, int *init_out, int *final_out)
|
|
{
|
|
assert(f != o);
|
|
|
|
// Ensure f has enough space for o's states, then copy o's states
|
|
// into f.
|
|
const int count = f->count + o->count;
|
|
if (f->capacity < count) {
|
|
do
|
|
f->capacity *= 2;
|
|
while (f->capacity < count);
|
|
f->states = realloc(f->states, f->capacity * sizeof(fsa_state_t));
|
|
assert(f->states);
|
|
}
|
|
memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t));
|
|
|
|
// Mark o's final state as non-final.
|
|
f->states[f->count].final = false;
|
|
|
|
// Retarget the rules of the copied states to refer to the new
|
|
// state indices.
|
|
for (int i = f->count; i < count; ++i) {
|
|
for (int j = 0; j < f->states[i].count; ++j)
|
|
f->states[i].rules[j].next += f->count;
|
|
}
|
|
|
|
// Clean up o's remaining resources. All of the states have been
|
|
// copied to f so we just need to free its states buffer.
|
|
free(o->states);
|
|
|
|
if (NULL != init_out)
|
|
*init_out = o->initial + f->count;
|
|
if (NULL != final_out)
|
|
*final_out = f->count;
|
|
f->count = count;
|
|
}
|
|
|
|
static void retarget_prepended_rules(
|
|
fsa_rule_t *rules, int n, int idx_offset, int init_idx)
|
|
{
|
|
for (fsa_rule_t *r = rules; r < rules + n; ++r) {
|
|
if (0 == r->next)
|
|
r->next = init_idx;
|
|
else
|
|
r->next += idx_offset;
|
|
}
|
|
}
|
|
|
|
static void prepend_fsa(fsa_t *f, const fsa_t *o)
|
|
{
|
|
assert(f != 0);
|
|
|
|
// Ensure f's initial state has enough space for the rules from
|
|
// o's final state.
|
|
fsa_state_t *f_init = &f->states[f->initial];
|
|
const fsa_state_t *o_final = &o->states[0];
|
|
const int rule_count = f_init->count + o_final->count;
|
|
if (f_init->capacity < rule_count) {
|
|
do
|
|
f_init->capacity *= 2;
|
|
while (f_init->capacity < rule_count);
|
|
f_init->rules
|
|
= realloc(f_init->rules, f_init->capacity * sizeof(fsa_rule_t));
|
|
assert(f_init->rules);
|
|
}
|
|
|
|
// Copy o's final state's rules into f's intial state, then
|
|
// retarget them.
|
|
fsa_rule_t *start = f_init->rules + f_init->count;
|
|
memcpy(start, o_final->rules, o_final->count * sizeof(fsa_rule_t));
|
|
retarget_prepended_rules(
|
|
start, o_final->count, f->count - 1, f->initial);
|
|
|
|
// Ensure f has enough space for the new states.
|
|
const int count = f->count + o->count - 1;
|
|
if (f->capacity < count) {
|
|
do
|
|
f->capacity *= 2;
|
|
while (f->capacity < count);
|
|
f->states = realloc(f->states, f->capacity * sizeof(fsa_state_t));
|
|
}
|
|
|
|
// Copy o's states into f, skipping index zero (the final state).
|
|
fsa_state_t *dst = f->states + f->count;
|
|
const fsa_state_t *src = o->states + 1;
|
|
const int copy_count = o->count - 1;
|
|
memcpy(dst, src, copy_count * sizeof(fsa_state_t));
|
|
|
|
// Retarget the rules of all the newly-copied states.
|
|
for (int i = f->count; i < count; ++i) {
|
|
retarget_prepended_rules(
|
|
f->states[i].rules, f->states[i].count, f->count - 1,
|
|
f->initial);
|
|
}
|
|
|
|
// Clean up o's remaining resources. The final state was not
|
|
// copied to f, so that must be cleaned up along with the states
|
|
// buffer.
|
|
free(o->states[0].rules);
|
|
free(o->states);
|
|
|
|
if (0 != o->initial)
|
|
f->initial = o->initial + f->count - 1;
|
|
f->count = count;
|
|
}
|
|
|
|
static void construct_base(fsa_t *out)
|
|
{
|
|
fsa_init(out);
|
|
out->states[0].final = true;
|
|
out->initial = fsa_add_state(out);
|
|
}
|
|
|
|
static void construct_symbol(fsa_t *out, int symbol)
|
|
{
|
|
construct_base(out);
|
|
fsa_add_rule(out, out->initial, 0, symbol);
|
|
}
|
|
|
|
static bool in_class(const regex_class_t *class, char c)
|
|
{
|
|
for (int i = 0; i < class->count; ++i) {
|
|
if (class->contents[i] == c)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void construct_class(fsa_t *out, const regex_class_t *class)
|
|
{
|
|
construct_base(out);
|
|
if (class->negated) {
|
|
for (int i = 0; i < CHAR_COUNT; ++i) {
|
|
if (!in_class(class, i))
|
|
fsa_add_rule(out, out->initial, 0, i);
|
|
}
|
|
} else {
|
|
for (int i = 0; i < class->count; ++i)
|
|
fsa_add_rule(out, out->initial, 0, class->contents[i]);
|
|
}
|
|
}
|
|
|
|
static void construct_wildcard(fsa_t *out)
|
|
{
|
|
construct_base(out);
|
|
for (int i = 0; i < CHAR_COUNT; ++i)
|
|
fsa_add_rule(out, out->initial, 0, i);
|
|
}
|
|
|
|
static void base_quantify(fsa_t *out, int *init_out, int *final_out)
|
|
{
|
|
fsa_t f;
|
|
memcpy(&f, out, sizeof(fsa_t));
|
|
construct_base(out);
|
|
add_fsa(out, &f, init_out, final_out);
|
|
fsa_add_rule(out, out->initial, *init_out, EPSILON);
|
|
fsa_add_rule(out, *final_out, 0, EPSILON);
|
|
}
|
|
|
|
static void construct_star(fsa_t *out)
|
|
{
|
|
int sub_init, sub_final;
|
|
base_quantify(out, &sub_init, &sub_final);
|
|
fsa_add_rule(out, sub_final, sub_init, EPSILON);
|
|
fsa_add_rule(out, out->initial, 0, EPSILON);
|
|
}
|
|
|
|
static void construct_plus(fsa_t *out)
|
|
{
|
|
int sub_init, sub_final;
|
|
base_quantify(out, &sub_init, &sub_final);
|
|
fsa_add_rule(out, sub_final, sub_init, EPSILON);
|
|
}
|
|
|
|
static void construct_qmark(fsa_t *out)
|
|
{
|
|
int sub_init, sub_final;
|
|
base_quantify(out, &sub_init, &sub_final);
|
|
fsa_add_rule(out, out->initial, 0, EPSILON);
|
|
}
|
|
|
|
static void construct_term(const regex_term_t *term, fsa_t *out)
|
|
{
|
|
switch (term->type) {
|
|
case REGEX_TERM_EMPTY:
|
|
construct_symbol(out, EPSILON);
|
|
break;
|
|
case REGEX_TERM_LITERAL:
|
|
construct_symbol(out, term->literal);
|
|
break;
|
|
case REGEX_TERM_SUBEXPR:
|
|
construct_nfa(&term->subexpr, out);
|
|
break;
|
|
case REGEX_TERM_CLASS:
|
|
construct_class(out, &term->class);
|
|
break;
|
|
case REGEX_TERM_WILDCARD:
|
|
construct_wildcard(out);
|
|
break;
|
|
}
|
|
|
|
switch (term->quantifier) {
|
|
case REGEX_QUANTIFIER_NONE:
|
|
break;
|
|
case REGEX_QUANTIFIER_STAR:
|
|
construct_star(out);
|
|
break;
|
|
case REGEX_QUANTIFIER_PLUS:
|
|
construct_plus(out);
|
|
break;
|
|
case REGEX_QUANTIFIER_QMARK:
|
|
construct_qmark(out);
|
|
break;
|
|
}
|
|
|
|
assert(out->states[0].final);
|
|
}
|
|
|
|
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
|
|
{
|
|
assert(seq->count > 0);
|
|
|
|
fsa_t term_fsa;
|
|
construct_term(&seq->contents[seq->count - 1], out);
|
|
for (int i = seq->count - 2; i >= 0; --i) {
|
|
construct_term(&seq->contents[i], &term_fsa);
|
|
prepend_fsa(out, &term_fsa);
|
|
}
|
|
|
|
assert(out->states[0].final);
|
|
}
|
|
|
|
static void construct_union(fsa_t *f, const fsa_t *o)
|
|
{
|
|
fsa_t g;
|
|
memcpy(&g, f, sizeof(fsa_t));
|
|
|
|
fsa_init(f);
|
|
f->states[0].final = true;
|
|
f->initial = fsa_add_state(f);
|
|
|
|
int init, final;
|
|
|
|
add_fsa(f, &g, &init, &final);
|
|
fsa_add_rule(f, f->initial, init, EPSILON);
|
|
fsa_add_rule(f, final, 0, EPSILON);
|
|
|
|
add_fsa(f, o, &init, &final);
|
|
fsa_add_rule(f, f->initial, init, EPSILON);
|
|
fsa_add_rule(f, final, 0, EPSILON);
|
|
}
|
|
|
|
void construct_nfa(const regex_t *regex, fsa_t *out)
|
|
{
|
|
assert(regex->count > 0);
|
|
|
|
fsa_t sequence_fsa;
|
|
construct_sequence(®ex->contents[0], out);
|
|
for (int i = 1; i < regex->count; ++i) {
|
|
construct_sequence(®ex->contents[i], &sequence_fsa);
|
|
construct_union(out, &sequence_fsa);
|
|
}
|
|
|
|
assert(out->states[0].final);
|
|
}
|