/* * Copyright (c) Camden Dixie O'Brien * SPDX-License-Identifier: AGPL-3.0-only */ #include "construct.h" #include #include #include static void construct_literal(char literal, fsa_t *out) { fsa_init(out); const int id = fsa_add_state(out); fsa_add_rule(out, id, out->initial, literal); out->initial = id; } static void construct_term(const regex_term_t *term, fsa_t *out) { switch (term->type) { case REGEX_TERM_EMPTY: fsa_init(out); break; case REGEX_TERM_LITERAL: construct_literal(term->literal, out); break; case REGEX_TERM_SUBEXPR: return; case REGEX_TERM_WILDCARD: case REGEX_TERM_CLASS: assert(false); } assert(out->states[0].final); } static void concat_fsas(fsa_t *base, const fsa_t *other) { // TODO: Handle the other's final state having transition rules. assert(0 == other->states[0].count); // Copy states other than the final state (index zero) to base. const int new_count = base->count + other->count - 1; if (base->capacity < new_count) { do base->capacity *= 2; while (base->capacity < new_count); base->states = realloc(base->states, base->capacity); assert(base->states); } const int copy_size = (other->count - 1) * sizeof(fsa_state_t); memcpy(&base->states[base->count], &other->states[1], copy_size); // Retarget new states' rules. for (int i = base->count; i < new_count; ++i) { fsa_state_t *state = &base->states[i]; for (int j = 0; j < state->count; ++j) { if (0 == state->rules[j].next) state->rules[j].next = base->initial; else // States' indices have increased by one less than the // base count, as the final state came before them and // was not copied. state->rules[j].next += base->count - 1; } } base->initial = other->initial + base->count - 1; base->count = new_count; free(other->states[0].rules); free(other->states); assert(base->states[0].final); } static void construct_sequence(const regex_sequence_t *seq, fsa_t *out) { assert(seq->count > 0); fsa_t term_fsa; construct_term(&seq->contents[seq->count - 1], out); for (int i = seq->count - 2; i >= 0; --i) { construct_term(&seq->contents[i], &term_fsa); concat_fsas(out, &term_fsa); } assert(out->states[0].final); } static void retarget_merged_rules( fsa_rule_t *rules, int rules_count, int initial, int base_initial, int base_count) { for (int i = 0; i < rules_count; ++i) { if (0 == rules[i].next) continue; // If the state came before the initial state it should be // offset by one less than base_count, because the final state // (index zero) came before it and was not copied into the // base. const int before_offset = base_count - 1; // If it came after the initial state it must be offset by two // less than base_count because both the final state and the // initial state came before it and were not copied -- unless // the initial state is the same state as the final state, in // which case the offset is still only one less than // base_count. const int after_offset = base_count - (0 != initial ? 2 : 1); if (rules[i].next < initial) rules[i].next += before_offset; else if (rules[i].next > initial) rules[i].next += after_offset; else if (rules[i].next == initial) rules[i].next = base_initial; } } static void merge_fsas(fsa_t *base, const fsa_t *other) { // Copy rules from the other's initial state into the base's // initial state. fsa_state_t *initial = &base->states[base->initial]; const fsa_state_t *other_initial = &other->states[other->initial]; const int new_rule_count = initial->count + other_initial->count; if (initial->capacity < new_rule_count) { do initial->capacity *= 2; while (initial->capacity < new_rule_count); initial->rules = realloc( initial->rules, initial->capacity * sizeof(fsa_rule_t)); assert(initial->rules); } memcpy( &initial->rules[initial->count], other_initial->rules, other_initial->count * sizeof(fsa_rule_t)); // Retarget the copied rules. retarget_merged_rules( &initial->rules[initial->count], other_initial->count, other->initial, base->initial, base->count); // Copy other states, skipping the initial state. const int skipped_states = other->initial != 0 ? 2 : 1; const int new_count = base->count + other->count - skipped_states; if (base->capacity < new_count) { do base->capacity *= 2; while (base->capacity < new_count); base->states = realloc(base->states, base->capacity * sizeof(fsa_state_t)); assert(base->states); } int offset = base->count; if (1 < other->initial) { const int copy_count = other->initial - 1; const int copy_size = copy_count * sizeof(fsa_state_t); memcpy(&base->states[offset], &other->states[1], copy_size); offset += copy_count; } if (other->initial < other->count - 1) { const int copy_count = other->count - other->initial - 1; const int copy_size = copy_count * sizeof(fsa_state_t); memcpy( &base->states[offset], &other->states[other->initial], copy_size); } // Retarget the copied states' rules. for (int i = base->count; i < new_count; ++i) { retarget_merged_rules( base->states[i].rules, base->states[i].count, other->initial, base->initial, base->count); } initial->count = new_rule_count; base->count = new_count; free(other->states[0].rules); if (other->initial != 0) free(other->states[other->initial].rules); free(other->states); assert(base->states[0].final); } void construct(const regex_t *regex, fsa_t *out) { assert(regex->count > 0); fsa_t sequence_fsa; construct_sequence(®ex->contents[0], out); for (int i = 1; i < regex->count; ++i) { construct_sequence(®ex->contents[i], &sequence_fsa); merge_fsas(out, &sequence_fsa); } assert(out->states[0].final); }