/* * Copyright (c) Camden Dixie O'Brien * SPDX-License-Identifier: AGPL-3.0-only */ #include "construct.h" #include #include #include static void add_fsa(fsa_t *f, const fsa_t *o, int *init_out, int *final_out) { assert(f != o); // Ensure f has enough space for o's states, then copy o's states // into f. const int count = f->count + o->count; if (f->capacity < count) { do f->capacity *= 2; while (f->capacity < count); f->states = realloc(f->states, f->capacity * sizeof(fsa_state_t)); assert(f->states); } memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t)); // Retarget the rules of the copied states to refer to the new // state indices. for (int i = f->count; i < count; ++i) { for (int j = 0; j < f->states[i].count; ++j) f->states[i].rules[j].next += f->count; } // Clean up o's remaining resources. All of the states have been // copied to f so we just need to free its states buffer. free(o->states); if (NULL != init_out) *init_out = o->initial + f->count; if (NULL != final_out) *final_out = f->count; f->count = count; } static void retarget_prepended_rules( fsa_rule_t *rules, int n, int idx_offset, int init_idx) { for (fsa_rule_t *r = rules; r < rules + n; ++r) { if (0 == r->next) r->next = init_idx; else r->next += idx_offset; } } static void prepend_fsa(fsa_t *f, const fsa_t *o) { assert(f != 0); // Ensure f's initial state has enough space for the rules from // o's final state. fsa_state_t *f_init = &f->states[f->initial]; const fsa_state_t *o_final = &o->states[0]; const int rule_count = f_init->count + o_final->count; if (f_init->capacity < rule_count) { do f_init->capacity *= 2; while (f_init->capacity < rule_count); f_init->rules = realloc(f_init->rules, f_init->capacity * sizeof(fsa_rule_t)); assert(f_init->rules); } // Copy o's final state's rules into f's intial state, then // retarget them. fsa_rule_t *start = f_init->rules + f_init->count; memcpy(start, o_final->rules, o_final->count * sizeof(fsa_rule_t)); retarget_prepended_rules( start, o_final->count, f->count - 1, f->initial); // Ensure f has enough space for the new states. const int count = f->count + o->count - 1; if (f->capacity < count) { do f->capacity *= 2; while (f->capacity < count); f->states = realloc(f->states, f->capacity * sizeof(fsa_state_t)); } // Copy o's states into f, skipping index zero (the final state). fsa_state_t *dst = f->states + f->count; const fsa_state_t *src = o->states + 1; const int copy_count = o->count - 1; memcpy(dst, src, copy_count * sizeof(fsa_state_t)); // Retarget the rules of all the newly-copied states. for (int i = f->count; i < count; ++i) { retarget_prepended_rules( f->states[i].rules, f->states[i].count, f->count - 1, f->initial); } // Clean up o's remaining resources. The final state was not // copied to f, so that must be cleaned up along with the states // buffer. free(o->states[0].rules); free(o->states); if (0 != o->initial) f->initial = o->initial + f->count - 1; f->count = count; } static void construct_base(fsa_t *out, int symbol) { fsa_init(out); const int id = fsa_add_state(out); fsa_add_rule(out, id, out->initial, symbol); out->initial = id; out->states[0].final = true; } static void construct_star(fsa_t *out) { fsa_t f; memcpy(&f, out, sizeof(fsa_t)); construct_base(out, EPSILON); int f_initial, f_final; add_fsa(out, &f, &f_initial, &f_final); fsa_add_rule(out, out->initial, f_initial, EPSILON); fsa_add_rule(out, f_final, f_initial, EPSILON); fsa_add_rule(out, f_final, 0, EPSILON); } static void construct_term(const regex_term_t *term, fsa_t *out) { switch (term->type) { case REGEX_TERM_EMPTY: construct_base(out, EPSILON); break; case REGEX_TERM_LITERAL: construct_base(out, term->literal); break; case REGEX_TERM_SUBEXPR: construct_nfa(&term->subexpr, out); break; case REGEX_TERM_WILDCARD: case REGEX_TERM_CLASS: assert(false); break; } switch (term->quantifier) { case REGEX_QUANTIFIER_NONE: break; case REGEX_QUANTIFIER_STAR: construct_star(out); break; case REGEX_QUANTIFIER_PLUS: case REGEX_QUANTIFIER_QMARK: assert(false); break; } assert(out->states[0].final); } static void construct_sequence(const regex_sequence_t *seq, fsa_t *out) { assert(seq->count > 0); fsa_t term_fsa; construct_term(&seq->contents[seq->count - 1], out); for (int i = seq->count - 2; i >= 0; --i) { construct_term(&seq->contents[i], &term_fsa); prepend_fsa(out, &term_fsa); } assert(out->states[0].final); } static void construct_union(fsa_t *f, const fsa_t *o) { fsa_t g; memcpy(&g, f, sizeof(fsa_t)); fsa_init(f); f->states[0].final = true; f->initial = fsa_add_state(f); int init, final; add_fsa(f, &g, &init, &final); fsa_add_rule(f, f->initial, init, EPSILON); fsa_add_rule(f, final, 0, EPSILON); add_fsa(f, o, &init, &final); fsa_add_rule(f, f->initial, init, EPSILON); fsa_add_rule(f, final, 0, EPSILON); } void construct_nfa(const regex_t *regex, fsa_t *out) { assert(regex->count > 0); fsa_t sequence_fsa; construct_sequence(®ex->contents[0], out); for (int i = 1; i < regex->count; ++i) { construct_sequence(®ex->contents[i], &sequence_fsa); construct_union(out, &sequence_fsa); } assert(out->states[0].final); }