/* * Copyright (c) Camden Dixie O'Brien * SPDX-License-Identifier: AGPL-3.0-only */ #include "construct.h" #include #include #include static void construct_literal(char literal, fsa_t *out) { fsa_init(out); const int id = fsa_add_state(out); fsa_add_rule(out, id, out->initial, literal); out->initial = id; } static void star_fsa(fsa_t *fsa) { // If the initial state is already the final state then nothing // needs to be done. if (0 == fsa->initial) return; // Copy inital state's rules to final state. fsa_state_t *final = &fsa->states[0]; const fsa_state_t *initial = &fsa->states[fsa->initial]; if (final->capacity < final->count + initial->count) { do final->capacity *= 2; while (final->capacity < final->count + initial->count); final->rules = realloc(final->rules, final->capacity * sizeof(fsa_rule_t)); assert(final->rules); } const int copy_size = initial->count * sizeof(fsa_rule_t); memcpy(&final->rules[final->count], initial->rules, copy_size); final->count += initial->count; // Move states that come after initial state if there are any. if (fsa->count - 1 > fsa->initial) { const int count = fsa->count - fsa->initial - 1; fsa_state_t *start = &fsa->states[fsa->initial]; memmove(start, start + 1, count * sizeof(fsa_state_t)); } // Retarget all states' rules. for (int i = 0; i < fsa->count - 1; ++i) { for (int j = 0; j < fsa->states[i].count; ++j) { if (fsa->states[i].rules[j].next == fsa->initial) fsa->states[i].rules[j].next = 0; else if (fsa->states[i].rules[j].next > fsa->initial) // All states after the initial state have been moved // down by one position. --fsa->states[i].rules[j].next; } } --fsa->count; fsa->initial = 0; free(initial->rules); } static void construct_term(const regex_term_t *term, fsa_t *out) { switch (term->type) { case REGEX_TERM_EMPTY: fsa_init(out); break; case REGEX_TERM_LITERAL: construct_literal(term->literal, out); break; case REGEX_TERM_SUBEXPR: construct(&term->subexpr, out); break; case REGEX_TERM_WILDCARD: case REGEX_TERM_CLASS: assert(false); break; } switch (term->quantifier) { case REGEX_QUANTIFIER_NONE: break; case REGEX_QUANTIFIER_STAR: star_fsa(out); break; case REGEX_QUANTIFIER_PLUS: case REGEX_QUANTIFIER_QMARK: assert(false); break; } assert(out->states[0].final); } static void concat_fsas(fsa_t *base, const fsa_t *other) { // TODO: Handle the other's final state having transition rules. assert(0 == other->states[0].count); // Copy states other than the final state (index zero) to base. const int new_count = base->count + other->count - 1; if (base->capacity < new_count) { do base->capacity *= 2; while (base->capacity < new_count); base->states = realloc(base->states, base->capacity); assert(base->states); } const int copy_size = (other->count - 1) * sizeof(fsa_state_t); memcpy(&base->states[base->count], &other->states[1], copy_size); // Retarget new states' rules. for (int i = base->count; i < new_count; ++i) { fsa_state_t *state = &base->states[i]; for (int j = 0; j < state->count; ++j) { if (0 == state->rules[j].next) state->rules[j].next = base->initial; else // States' indices have increased by one less than the // base count, as the final state came before them and // was not copied. state->rules[j].next += base->count - 1; } } base->initial = other->initial + base->count - 1; base->count = new_count; free(other->states[0].rules); free(other->states); assert(base->states[0].final); } static void construct_sequence(const regex_sequence_t *seq, fsa_t *out) { assert(seq->count > 0); fsa_t term_fsa; construct_term(&seq->contents[seq->count - 1], out); for (int i = seq->count - 2; i >= 0; --i) { construct_term(&seq->contents[i], &term_fsa); concat_fsas(out, &term_fsa); } assert(out->states[0].final); } static void retarget_merged_rules( fsa_rule_t *rules, int rules_count, int initial, int base_initial, int base_count) { for (int i = 0; i < rules_count; ++i) { if (0 == rules[i].next) continue; // If the state came before the initial state it should be // offset by one less than base_count, because the final state // (index zero) came before it and was not copied into the // base. const int before_offset = base_count - 1; // If it came after the initial state it must be offset by two // less than base_count because both the final state and the // initial state came before it and were not copied -- unless // the initial state is the same state as the final state, in // which case the offset is still only one less than // base_count. const int after_offset = base_count - (0 != initial ? 2 : 1); if (rules[i].next < initial) rules[i].next += before_offset; else if (rules[i].next > initial) rules[i].next += after_offset; else if (rules[i].next == initial) rules[i].next = base_initial; } } static void merge_fsas(fsa_t *base, const fsa_t *other) { // Copy rules from the other's initial state into the base's // initial state. fsa_state_t *initial = &base->states[base->initial]; const fsa_state_t *other_initial = &other->states[other->initial]; const int new_rule_count = initial->count + other_initial->count; if (initial->capacity < new_rule_count) { do initial->capacity *= 2; while (initial->capacity < new_rule_count); initial->rules = realloc( initial->rules, initial->capacity * sizeof(fsa_rule_t)); assert(initial->rules); } memcpy( &initial->rules[initial->count], other_initial->rules, other_initial->count * sizeof(fsa_rule_t)); // Retarget the copied rules. retarget_merged_rules( &initial->rules[initial->count], other_initial->count, other->initial, base->initial, base->count); // Copy other states, skipping the initial state. const int skipped_states = other->initial != 0 ? 2 : 1; const int new_count = base->count + other->count - skipped_states; if (base->capacity < new_count) { do base->capacity *= 2; while (base->capacity < new_count); base->states = realloc(base->states, base->capacity * sizeof(fsa_state_t)); assert(base->states); } int offset = base->count; if (1 < other->initial) { const int copy_count = other->initial - 1; const int copy_size = copy_count * sizeof(fsa_state_t); memcpy(&base->states[offset], &other->states[1], copy_size); offset += copy_count; } if (other->initial < other->count - 1) { const int copy_count = other->count - other->initial - 1; const int copy_size = copy_count * sizeof(fsa_state_t); memcpy( &base->states[offset], &other->states[other->initial], copy_size); } // Retarget the copied states' rules. for (int i = base->count; i < new_count; ++i) { retarget_merged_rules( base->states[i].rules, base->states[i].count, other->initial, base->initial, base->count); } initial->count = new_rule_count; base->count = new_count; free(other->states[0].rules); if (other->initial != 0) free(other->states[other->initial].rules); free(other->states); assert(base->states[0].final); } void construct(const regex_t *regex, fsa_t *out) { assert(regex->count > 0); fsa_t sequence_fsa; construct_sequence(®ex->contents[0], out); for (int i = 1; i < regex->count; ++i) { construct_sequence(®ex->contents[i], &sequence_fsa); merge_fsas(out, &sequence_fsa); } assert(out->states[0].final); }