/* * Copyright (c) Camden Dixie O'Brien * SPDX-License-Identifier: AGPL-3.0-only */ #include "construct.h" #include #include #include static void construct_literal(char literal, fsa_t *out) { fsa_init(out); const int id = fsa_add_state(out); fsa_add_rule(out, id, out->initial, literal); out->initial = id; } static void construct_term(const regex_term_t *term, fsa_t *out) { switch (term->type) { case REGEX_TERM_EMPTY: fsa_init(out); break; case REGEX_TERM_LITERAL: construct_literal(term->literal, out); break; case REGEX_TERM_SUBEXPR: return; case REGEX_TERM_WILDCARD: case REGEX_TERM_CLASS: assert(false); } assert(out->states[0].final); } static void concat_fsas(fsa_t *base, const fsa_t *other) { const int new_count = base->count + other->count - 1; if (base->capacity < new_count) { do base->capacity *= 2; while (base->capacity < new_count); base->states = realloc(base->states, base->capacity); assert(base->states); } const int copy_size = (other->count - 1) * sizeof(fsa_state_t); memcpy(&base->states[base->count], &other->states[1], copy_size); const int id_offset = base->count - 1; for (int i = base->count; i < new_count; ++i) { fsa_state_t *state = &base->states[i]; for (int j = 0; j < state->count; ++j) { if (0 == state->rules[j].next) state->rules[j].next = base->initial; else state->rules[j].next += id_offset; } } base->count = new_count; base->initial = other->initial + id_offset; free(other->states[0].rules); free(other->states); assert(base->states[0].final); } static void construct_sequence(const regex_sequence_t *seq, fsa_t *out) { assert(seq->count > 0); fsa_t term_fsa; construct_term(&seq->contents[seq->count - 1], out); for (int i = seq->count - 2; i >= 0; --i) { construct_term(&seq->contents[i], &term_fsa); concat_fsas(out, &term_fsa); } assert(out->states[0].final); } static void retarget_merged_rules( fsa_rule_t *rules, int rules_count, int initial, int base_initial, int base_count) { for (int i = 0; i < rules_count; ++i) { if (0 == rules[i].next) continue; // IDs less than initial have to be offset by one less than // base_count because the final state (ID zero) is not copied. // If they are greater it's two less as the initial state is // also not copied. Finally, if the target is the initial // state then it should be changed to the base's initial // state. if (rules[i].next < initial) rules[i].next += base_count - 1; else if (rules[i].next > initial) rules[i].next += base_count - 2; else rules[i].next = base_initial; } } static void merge_fsas(fsa_t *base, const fsa_t *other) { const int new_count = base->count + other->count - 2; if (base->capacity < new_count) { do base->capacity *= 2; while (base->capacity < new_count); base->states = realloc(base->states, base->capacity * sizeof(fsa_state_t)); assert(base->states); } // Copy rules from the other's initial state into the base's, then // retarget them. fsa_state_t *initial = &base->states[base->initial]; const fsa_state_t *other_initial = &other->states[other->initial]; const int new_rule_count = initial->count + other_initial->count; if (initial->capacity < new_rule_count) { do initial->capacity *= 2; while (initial->capacity < new_rule_count); initial->rules = realloc( initial->rules, initial->capacity * sizeof(fsa_rule_t)); assert(initial->rules); } memcpy( &initial->rules[initial->count], other_initial->rules, other_initial->count * sizeof(fsa_rule_t)); retarget_merged_rules( &initial->rules[initial->count], other_initial->count, other->initial, base->initial, base->count); initial->count = new_rule_count; // Copy other states, skipping the initial state, then retarget // their rules. int offset = base->count; if (1 < other->initial) { const int copy_count = other->initial - 1; const int copy_size = copy_count * sizeof(fsa_state_t); memcpy(&base->states[offset], &other->states[1], copy_size); offset += copy_count; } if (other->initial < other->count - 1) { const int copy_count = other->count - other->initial - 1; const int copy_size = copy_count * sizeof(fsa_state_t); memcpy( &base->states[offset], &other->states[other->initial], copy_size); } for (int i = base->count; i < new_count; ++i) { retarget_merged_rules( base->states[i].rules, base->states[i].count, other->initial, base->initial, base->count); } base->count = new_count; free(other->states[0].rules); free(other->states[other->initial].rules); free(other->states); assert(base->states[0].final); } void construct(const regex_t *regex, fsa_t *out) { assert(regex->count > 0); fsa_t sequence_fsa; construct_sequence(®ex->contents[0], out); for (int i = 1; i < regex->count; ++i) { construct_sequence(®ex->contents[i], &sequence_fsa); merge_fsas(out, &sequence_fsa); } assert(out->initial == out->count - 1); assert(out->states[0].final); }