Add non-base-case construct tests and fix construct logic
This commit is contained in:
parent
3eb782f59f
commit
55e4e4f5ee
313
lib/construct.c
313
lib/construct.c
@ -9,69 +9,138 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
static void construct_literal(char literal, fsa_t *out)
|
static void add_fsa(fsa_t *f, const fsa_t *o, int *init_out, int *final_out)
|
||||||
|
{
|
||||||
|
assert(f != o);
|
||||||
|
|
||||||
|
// Ensure f has enough space for o's states, then copy o's states
|
||||||
|
// into f.
|
||||||
|
const int count = f->count + o->count;
|
||||||
|
if (f->capacity < count) {
|
||||||
|
do
|
||||||
|
f->capacity *= 2;
|
||||||
|
while (f->capacity < count);
|
||||||
|
f->states = realloc(f->states, f->capacity * sizeof(fsa_state_t));
|
||||||
|
assert(f->states);
|
||||||
|
}
|
||||||
|
memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t));
|
||||||
|
|
||||||
|
// Retarget the rules of the copied states to refer to the new
|
||||||
|
// state indices.
|
||||||
|
for (int i = f->count; i < count; ++i) {
|
||||||
|
for (int j = 0; j < f->states[i].count; ++j)
|
||||||
|
f->states[i].rules[j].next += f->count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up o's remaining resources. All of the states have been
|
||||||
|
// copied to f so we just need to free its states buffer.
|
||||||
|
free(o->states);
|
||||||
|
|
||||||
|
if (NULL != init_out)
|
||||||
|
*init_out = o->initial + f->count;
|
||||||
|
if (NULL != final_out)
|
||||||
|
*final_out = f->count;
|
||||||
|
f->count = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void retarget_prepended_rules(
|
||||||
|
fsa_rule_t *rules, int n, int idx_offset, int init_idx)
|
||||||
|
{
|
||||||
|
for (fsa_rule_t *r = rules; r < rules + n; ++r) {
|
||||||
|
if (0 == r->next)
|
||||||
|
r->next = init_idx;
|
||||||
|
else
|
||||||
|
r->next += idx_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void prepend_fsa(fsa_t *f, const fsa_t *o)
|
||||||
|
{
|
||||||
|
assert(f != 0);
|
||||||
|
|
||||||
|
// Ensure f's initial state has enough space for the rules from
|
||||||
|
// o's final state.
|
||||||
|
fsa_state_t *f_init = &f->states[f->initial];
|
||||||
|
const fsa_state_t *o_final = &o->states[0];
|
||||||
|
const int rule_count = f_init->count + o_final->count;
|
||||||
|
if (f_init->capacity < rule_count) {
|
||||||
|
do
|
||||||
|
f_init->capacity *= 2;
|
||||||
|
while (f_init->capacity < rule_count);
|
||||||
|
f_init->rules
|
||||||
|
= realloc(f_init->rules, f_init->capacity * sizeof(fsa_rule_t));
|
||||||
|
assert(f_init->rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy o's final state's rules into f's intial state, then
|
||||||
|
// retarget them.
|
||||||
|
fsa_rule_t *start = f_init->rules + f_init->count;
|
||||||
|
memcpy(start, o_final->rules, o_final->count * sizeof(fsa_rule_t));
|
||||||
|
retarget_prepended_rules(
|
||||||
|
start, o_final->count, f->count - 1, f->initial);
|
||||||
|
|
||||||
|
// Ensure f has enough space for the new states.
|
||||||
|
const int count = f->count + o->count - 1;
|
||||||
|
if (f->capacity < count) {
|
||||||
|
do
|
||||||
|
f->capacity *= 2;
|
||||||
|
while (f->capacity < count);
|
||||||
|
f->states = realloc(f->states, f->capacity * sizeof(fsa_state_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy o's states into f, skipping index zero (the final state).
|
||||||
|
fsa_state_t *dst = f->states + f->count;
|
||||||
|
const fsa_state_t *src = o->states + 1;
|
||||||
|
const int copy_count = o->count - 1;
|
||||||
|
memcpy(dst, src, copy_count * sizeof(fsa_state_t));
|
||||||
|
|
||||||
|
// Retarget the rules of all the newly-copied states.
|
||||||
|
for (int i = f->count; i < count; ++i) {
|
||||||
|
retarget_prepended_rules(
|
||||||
|
f->states[i].rules, f->states[i].count, f->count - 1,
|
||||||
|
f->initial);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up o's remaining resources. The final state was not
|
||||||
|
// copied to f, so that must be cleaned up along with the states
|
||||||
|
// buffer.
|
||||||
|
free(o->states[0].rules);
|
||||||
|
free(o->states);
|
||||||
|
|
||||||
|
if (0 != o->initial)
|
||||||
|
f->initial = o->initial + f->count - 1;
|
||||||
|
f->count = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void construct_base(fsa_t *out, int symbol)
|
||||||
{
|
{
|
||||||
fsa_init(out);
|
fsa_init(out);
|
||||||
const int id = fsa_add_state(out);
|
const int id = fsa_add_state(out);
|
||||||
fsa_add_rule(out, id, out->initial, literal);
|
fsa_add_rule(out, id, out->initial, symbol);
|
||||||
out->initial = id;
|
out->initial = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void star_fsa(fsa_t *fsa)
|
static void construct_star(fsa_t *out)
|
||||||
{
|
{
|
||||||
// If the initial state is already the final state then nothing
|
fsa_t f;
|
||||||
// needs to be done.
|
memcpy(&f, out, sizeof(fsa_t));
|
||||||
if (0 == fsa->initial)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// Copy inital state's rules to final state.
|
construct_base(out, EPSILON);
|
||||||
fsa_state_t *final = &fsa->states[0];
|
int f_initial, f_final;
|
||||||
const fsa_state_t *initial = &fsa->states[fsa->initial];
|
add_fsa(out, &f, &f_initial, &f_final);
|
||||||
if (final->capacity < final->count + initial->count) {
|
fsa_add_rule(out, out->initial, f_initial, EPSILON);
|
||||||
do
|
fsa_add_rule(out, f_final, f_initial, EPSILON);
|
||||||
final->capacity *= 2;
|
fsa_add_rule(out, f_final, 0, EPSILON);
|
||||||
while (final->capacity < final->count + initial->count);
|
|
||||||
final->rules
|
|
||||||
= realloc(final->rules, final->capacity * sizeof(fsa_rule_t));
|
|
||||||
assert(final->rules);
|
|
||||||
}
|
|
||||||
const int copy_size = initial->count * sizeof(fsa_rule_t);
|
|
||||||
memcpy(&final->rules[final->count], initial->rules, copy_size);
|
|
||||||
final->count += initial->count;
|
|
||||||
|
|
||||||
// Move states that come after initial state if there are any.
|
|
||||||
if (fsa->count - 1 > fsa->initial) {
|
|
||||||
const int count = fsa->count - fsa->initial - 1;
|
|
||||||
fsa_state_t *start = &fsa->states[fsa->initial];
|
|
||||||
memmove(start, start + 1, count * sizeof(fsa_state_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Retarget all states' rules.
|
|
||||||
for (int i = 0; i < fsa->count - 1; ++i) {
|
|
||||||
for (int j = 0; j < fsa->states[i].count; ++j) {
|
|
||||||
if (fsa->states[i].rules[j].next == fsa->initial)
|
|
||||||
fsa->states[i].rules[j].next = 0;
|
|
||||||
else if (fsa->states[i].rules[j].next > fsa->initial)
|
|
||||||
// All states after the initial state have been moved
|
|
||||||
// down by one position.
|
|
||||||
--fsa->states[i].rules[j].next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
--fsa->count;
|
|
||||||
fsa->initial = 0;
|
|
||||||
|
|
||||||
free(initial->rules);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void construct_term(const regex_term_t *term, fsa_t *out)
|
static void construct_term(const regex_term_t *term, fsa_t *out)
|
||||||
{
|
{
|
||||||
switch (term->type) {
|
switch (term->type) {
|
||||||
case REGEX_TERM_EMPTY:
|
case REGEX_TERM_EMPTY:
|
||||||
fsa_init(out);
|
construct_base(out, EPSILON);
|
||||||
break;
|
break;
|
||||||
case REGEX_TERM_LITERAL:
|
case REGEX_TERM_LITERAL:
|
||||||
construct_literal(term->literal, out);
|
construct_base(out, term->literal);
|
||||||
break;
|
break;
|
||||||
case REGEX_TERM_SUBEXPR:
|
case REGEX_TERM_SUBEXPR:
|
||||||
construct(&term->subexpr, out);
|
construct(&term->subexpr, out);
|
||||||
@ -86,7 +155,7 @@ static void construct_term(const regex_term_t *term, fsa_t *out)
|
|||||||
case REGEX_QUANTIFIER_NONE:
|
case REGEX_QUANTIFIER_NONE:
|
||||||
break;
|
break;
|
||||||
case REGEX_QUANTIFIER_STAR:
|
case REGEX_QUANTIFIER_STAR:
|
||||||
star_fsa(out);
|
construct_star(out);
|
||||||
break;
|
break;
|
||||||
case REGEX_QUANTIFIER_PLUS:
|
case REGEX_QUANTIFIER_PLUS:
|
||||||
case REGEX_QUANTIFIER_QMARK:
|
case REGEX_QUANTIFIER_QMARK:
|
||||||
@ -97,46 +166,6 @@ static void construct_term(const regex_term_t *term, fsa_t *out)
|
|||||||
assert(out->states[0].final);
|
assert(out->states[0].final);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void concat_fsas(fsa_t *base, const fsa_t *other)
|
|
||||||
{
|
|
||||||
// TODO: Handle the other's final state having transition rules.
|
|
||||||
assert(0 == other->states[0].count);
|
|
||||||
|
|
||||||
// Copy states other than the final state (index zero) to base.
|
|
||||||
const int new_count = base->count + other->count - 1;
|
|
||||||
if (base->capacity < new_count) {
|
|
||||||
do
|
|
||||||
base->capacity *= 2;
|
|
||||||
while (base->capacity < new_count);
|
|
||||||
base->states = realloc(base->states, base->capacity);
|
|
||||||
assert(base->states);
|
|
||||||
}
|
|
||||||
const int copy_size = (other->count - 1) * sizeof(fsa_state_t);
|
|
||||||
memcpy(&base->states[base->count], &other->states[1], copy_size);
|
|
||||||
|
|
||||||
// Retarget new states' rules.
|
|
||||||
for (int i = base->count; i < new_count; ++i) {
|
|
||||||
fsa_state_t *state = &base->states[i];
|
|
||||||
for (int j = 0; j < state->count; ++j) {
|
|
||||||
if (0 == state->rules[j].next)
|
|
||||||
state->rules[j].next = base->initial;
|
|
||||||
else
|
|
||||||
// States' indices have increased by one less than the
|
|
||||||
// base count, as the final state came before them and
|
|
||||||
// was not copied.
|
|
||||||
state->rules[j].next += base->count - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
base->initial = other->initial + base->count - 1;
|
|
||||||
base->count = new_count;
|
|
||||||
|
|
||||||
free(other->states[0].rules);
|
|
||||||
free(other->states);
|
|
||||||
|
|
||||||
assert(base->states[0].final);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
|
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
|
||||||
{
|
{
|
||||||
assert(seq->count > 0);
|
assert(seq->count > 0);
|
||||||
@ -145,109 +174,29 @@ static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
|
|||||||
construct_term(&seq->contents[seq->count - 1], out);
|
construct_term(&seq->contents[seq->count - 1], out);
|
||||||
for (int i = seq->count - 2; i >= 0; --i) {
|
for (int i = seq->count - 2; i >= 0; --i) {
|
||||||
construct_term(&seq->contents[i], &term_fsa);
|
construct_term(&seq->contents[i], &term_fsa);
|
||||||
concat_fsas(out, &term_fsa);
|
prepend_fsa(out, &term_fsa);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(out->states[0].final);
|
assert(out->states[0].final);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void retarget_merged_rules(
|
static void construct_union(fsa_t *f, const fsa_t *o)
|
||||||
fsa_rule_t *rules, int rules_count, int initial, int base_initial,
|
|
||||||
int base_count)
|
|
||||||
{
|
{
|
||||||
for (int i = 0; i < rules_count; ++i) {
|
fsa_t g;
|
||||||
if (0 == rules[i].next)
|
memcpy(&g, f, sizeof(fsa_t));
|
||||||
continue;
|
|
||||||
|
|
||||||
// If the state came before the initial state it should be
|
fsa_init(f);
|
||||||
// offset by one less than base_count, because the final state
|
f->initial = fsa_add_state(f);
|
||||||
// (index zero) came before it and was not copied into the
|
|
||||||
// base.
|
|
||||||
const int before_offset = base_count - 1;
|
|
||||||
|
|
||||||
// If it came after the initial state it must be offset by two
|
int init, final;
|
||||||
// less than base_count because both the final state and the
|
|
||||||
// initial state came before it and were not copied -- unless
|
|
||||||
// the initial state is the same state as the final state, in
|
|
||||||
// which case the offset is still only one less than
|
|
||||||
// base_count.
|
|
||||||
const int after_offset = base_count - (0 != initial ? 2 : 1);
|
|
||||||
|
|
||||||
if (rules[i].next < initial)
|
add_fsa(f, &g, &init, &final);
|
||||||
rules[i].next += before_offset;
|
fsa_add_rule(f, f->initial, init, EPSILON);
|
||||||
else if (rules[i].next > initial)
|
fsa_add_rule(f, final, 0, EPSILON);
|
||||||
rules[i].next += after_offset;
|
|
||||||
else if (rules[i].next == initial)
|
|
||||||
rules[i].next = base_initial;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void merge_fsas(fsa_t *base, const fsa_t *other)
|
add_fsa(f, o, &init, &final);
|
||||||
{
|
fsa_add_rule(f, f->initial, init, EPSILON);
|
||||||
// Copy rules from the other's initial state into the base's
|
fsa_add_rule(f, final, 0, EPSILON);
|
||||||
// initial state.
|
|
||||||
fsa_state_t *initial = &base->states[base->initial];
|
|
||||||
const fsa_state_t *other_initial = &other->states[other->initial];
|
|
||||||
const int new_rule_count = initial->count + other_initial->count;
|
|
||||||
if (initial->capacity < new_rule_count) {
|
|
||||||
do
|
|
||||||
initial->capacity *= 2;
|
|
||||||
while (initial->capacity < new_rule_count);
|
|
||||||
initial->rules = realloc(
|
|
||||||
initial->rules, initial->capacity * sizeof(fsa_rule_t));
|
|
||||||
assert(initial->rules);
|
|
||||||
}
|
|
||||||
memcpy(
|
|
||||||
&initial->rules[initial->count], other_initial->rules,
|
|
||||||
other_initial->count * sizeof(fsa_rule_t));
|
|
||||||
|
|
||||||
// Retarget the copied rules.
|
|
||||||
retarget_merged_rules(
|
|
||||||
&initial->rules[initial->count], other_initial->count,
|
|
||||||
other->initial, base->initial, base->count);
|
|
||||||
|
|
||||||
// Copy other states, skipping the initial state.
|
|
||||||
const int skipped_states = other->initial != 0 ? 2 : 1;
|
|
||||||
const int new_count = base->count + other->count - skipped_states;
|
|
||||||
if (base->capacity < new_count) {
|
|
||||||
do
|
|
||||||
base->capacity *= 2;
|
|
||||||
while (base->capacity < new_count);
|
|
||||||
base->states
|
|
||||||
= realloc(base->states, base->capacity * sizeof(fsa_state_t));
|
|
||||||
assert(base->states);
|
|
||||||
}
|
|
||||||
int offset = base->count;
|
|
||||||
if (1 < other->initial) {
|
|
||||||
const int copy_count = other->initial - 1;
|
|
||||||
const int copy_size = copy_count * sizeof(fsa_state_t);
|
|
||||||
memcpy(&base->states[offset], &other->states[1], copy_size);
|
|
||||||
offset += copy_count;
|
|
||||||
}
|
|
||||||
if (other->initial < other->count - 1) {
|
|
||||||
const int copy_count = other->count - other->initial - 1;
|
|
||||||
const int copy_size = copy_count * sizeof(fsa_state_t);
|
|
||||||
memcpy(
|
|
||||||
&base->states[offset], &other->states[other->initial],
|
|
||||||
copy_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Retarget the copied states' rules.
|
|
||||||
for (int i = base->count; i < new_count; ++i) {
|
|
||||||
retarget_merged_rules(
|
|
||||||
base->states[i].rules, base->states[i].count, other->initial,
|
|
||||||
base->initial, base->count);
|
|
||||||
}
|
|
||||||
|
|
||||||
initial->count = new_rule_count;
|
|
||||||
base->count = new_count;
|
|
||||||
|
|
||||||
free(other->states[0].rules);
|
|
||||||
if (other->initial != 0)
|
|
||||||
free(other->states[other->initial].rules);
|
|
||||||
free(other->states);
|
|
||||||
|
|
||||||
assert(base->states[0].final);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void construct(const regex_t *regex, fsa_t *out)
|
void construct(const regex_t *regex, fsa_t *out)
|
||||||
@ -258,7 +207,7 @@ void construct(const regex_t *regex, fsa_t *out)
|
|||||||
construct_sequence(®ex->contents[0], out);
|
construct_sequence(®ex->contents[0], out);
|
||||||
for (int i = 1; i < regex->count; ++i) {
|
for (int i = 1; i < regex->count; ++i) {
|
||||||
construct_sequence(®ex->contents[i], &sequence_fsa);
|
construct_sequence(®ex->contents[i], &sequence_fsa);
|
||||||
merge_fsas(out, &sequence_fsa);
|
construct_union(out, &sequence_fsa);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(out->states[0].final);
|
assert(out->states[0].final);
|
||||||
|
@ -6,28 +6,34 @@
|
|||||||
#include "construct.h"
|
#include "construct.h"
|
||||||
#include "testing.h"
|
#include "testing.h"
|
||||||
|
|
||||||
static bool
|
static const char *
|
||||||
accepts_from_state(const fsa_t *nfa, int state_id, const char *input)
|
match_from_state(const fsa_t *nfa, int state_id, const char *input)
|
||||||
{
|
{
|
||||||
const fsa_state_t *state = &nfa->states[state_id];
|
const fsa_state_t *state = &nfa->states[state_id];
|
||||||
if ('\0' == *input)
|
|
||||||
return state->final;
|
|
||||||
|
|
||||||
|
const bool final = state->final;
|
||||||
|
const bool end_of_input = '\0' == *input;
|
||||||
for (int i = 0; i < state->count; ++i) {
|
for (int i = 0; i < state->count; ++i) {
|
||||||
if (EPSILON == state->rules[i].input
|
if ((!final || !end_of_input) && EPSILON == state->rules[i].input) {
|
||||||
&& accepts_from_state(nfa, state->rules[i].next, input))
|
const char *s
|
||||||
return true;
|
= match_from_state(nfa, state->rules[i].next, input);
|
||||||
if (*input == state->rules[i].input
|
if (NULL != s)
|
||||||
&& accepts_from_state(nfa, state->rules[i].next, input + 1))
|
return s;
|
||||||
return true;
|
}
|
||||||
|
if (!end_of_input && *input == state->rules[i].input) {
|
||||||
|
const char *s
|
||||||
|
= match_from_state(nfa, state->rules[i].next, input + 1);
|
||||||
|
if (NULL != s)
|
||||||
|
return s;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return final ? input : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool accepts(const fsa_t *nfa, const char *input)
|
static const char *match(const fsa_t *nfa, const char *input)
|
||||||
{
|
{
|
||||||
return accepts_from_state(nfa, nfa->initial, input);
|
return match_from_state(nfa, nfa->initial, input);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_empty_expression(void)
|
static void test_empty_expression(void)
|
||||||
@ -44,7 +50,7 @@ static void test_empty_expression(void)
|
|||||||
fsa_t fsa;
|
fsa_t fsa;
|
||||||
construct(®ex, &fsa);
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
ASSERT_TRUE(accepts(&fsa, ""));
|
ASSERT_NOT_NULL(match(&fsa, ""));
|
||||||
|
|
||||||
regex_free(®ex);
|
regex_free(®ex);
|
||||||
fsa_free(&fsa);
|
fsa_free(&fsa);
|
||||||
@ -65,8 +71,8 @@ static void test_literal_expression(void)
|
|||||||
fsa_t fsa;
|
fsa_t fsa;
|
||||||
construct(®ex, &fsa);
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
ASSERT_TRUE(accepts(&fsa, "a"));
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
||||||
ASSERT_FALSE(accepts(&fsa, "b"));
|
ASSERT_NULL(match(&fsa, "b"));
|
||||||
|
|
||||||
regex_free(®ex);
|
regex_free(®ex);
|
||||||
fsa_free(&fsa);
|
fsa_free(&fsa);
|
||||||
@ -89,10 +95,14 @@ static void test_sequence(void)
|
|||||||
fsa_t fsa;
|
fsa_t fsa;
|
||||||
construct(®ex, &fsa);
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
ASSERT_TRUE(accepts(&fsa, "abc"));
|
ASSERT_NOT_NULL(match(&fsa, "abc"));
|
||||||
ASSERT_FALSE(accepts(&fsa, "a"));
|
ASSERT_NULL(match(&fsa, "a"));
|
||||||
ASSERT_FALSE(accepts(&fsa, "ab"));
|
ASSERT_NULL(match(&fsa, "ab"));
|
||||||
ASSERT_FALSE(accepts(&fsa, "d"));
|
ASSERT_NULL(match(&fsa, "d"));
|
||||||
|
|
||||||
|
const char *s = "abcd";
|
||||||
|
const char *t = match(&fsa, s);
|
||||||
|
ASSERT_EQ(s + 3, t);
|
||||||
|
|
||||||
regex_free(®ex);
|
regex_free(®ex);
|
||||||
fsa_free(&fsa);
|
fsa_free(&fsa);
|
||||||
@ -116,10 +126,14 @@ static void test_union(void)
|
|||||||
fsa_t fsa;
|
fsa_t fsa;
|
||||||
construct(®ex, &fsa);
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
ASSERT_TRUE(accepts(&fsa, "a"));
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
||||||
ASSERT_TRUE(accepts(&fsa, "b"));
|
ASSERT_NOT_NULL(match(&fsa, "b"));
|
||||||
ASSERT_TRUE(accepts(&fsa, "c"));
|
ASSERT_NOT_NULL(match(&fsa, "c"));
|
||||||
ASSERT_FALSE(accepts(&fsa, "d"));
|
ASSERT_NULL(match(&fsa, "d"));
|
||||||
|
|
||||||
|
const char *s = "aa";
|
||||||
|
const char *t = match(&fsa, s);
|
||||||
|
ASSERT_EQ(s + 1, t);
|
||||||
|
|
||||||
regex_free(®ex);
|
regex_free(®ex);
|
||||||
fsa_free(&fsa);
|
fsa_free(&fsa);
|
||||||
@ -139,10 +153,13 @@ static void test_star(void)
|
|||||||
fsa_t fsa;
|
fsa_t fsa;
|
||||||
construct(®ex, &fsa);
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
ASSERT_TRUE(accepts(&fsa, ""));
|
ASSERT_NOT_NULL(match(&fsa, ""));
|
||||||
ASSERT_TRUE(accepts(&fsa, "a"));
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
||||||
ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
|
ASSERT_NOT_NULL(match(&fsa, "aaaaaa"));
|
||||||
ASSERT_FALSE(accepts(&fsa, "b"));
|
|
||||||
|
const char *s = "b";
|
||||||
|
const char *t = match(&fsa, s);
|
||||||
|
ASSERT_EQ(s, t);
|
||||||
|
|
||||||
regex_free(®ex);
|
regex_free(®ex);
|
||||||
fsa_free(&fsa);
|
fsa_free(&fsa);
|
||||||
@ -171,8 +188,97 @@ static void test_subexpression(void)
|
|||||||
fsa_t fsa;
|
fsa_t fsa;
|
||||||
construct(®ex, &fsa);
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
ASSERT_TRUE(accepts(&fsa, "a"));
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
||||||
ASSERT_FALSE(accepts(&fsa, "b"));
|
ASSERT_NULL(match(&fsa, "b"));
|
||||||
|
|
||||||
|
regex_free(®ex);
|
||||||
|
fsa_free(&fsa);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_sequence_containing_starred_union(void)
|
||||||
|
{
|
||||||
|
// ab(c|d)*
|
||||||
|
regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t));
|
||||||
|
inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
inner_terms0[0].type = REGEX_TERM_LITERAL;
|
||||||
|
inner_terms0[0].literal = 'c';
|
||||||
|
regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t));
|
||||||
|
inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
inner_terms1[0].type = REGEX_TERM_LITERAL;
|
||||||
|
inner_terms1[0].literal = 'd';
|
||||||
|
regex_sequence_t *inner_alternatives
|
||||||
|
= malloc(2 * sizeof(regex_sequence_t));
|
||||||
|
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
|
||||||
|
inner_alternatives[0].contents = inner_terms0;
|
||||||
|
inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
|
||||||
|
inner_alternatives[1].contents = inner_terms1;
|
||||||
|
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
|
||||||
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
terms[0].type = REGEX_TERM_LITERAL;
|
||||||
|
terms[0].literal = 'a';
|
||||||
|
terms[1].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
terms[1].type = REGEX_TERM_LITERAL;
|
||||||
|
terms[1].literal = 'b';
|
||||||
|
terms[2].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
terms[2].type = REGEX_TERM_SUBEXPR;
|
||||||
|
terms[2].subexpr.count = terms[2].subexpr.capacity = 2;
|
||||||
|
terms[2].subexpr.contents = inner_alternatives;
|
||||||
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
||||||
|
alternatives[0].count = alternatives[0].capacity = 1;
|
||||||
|
alternatives[0].contents = terms;
|
||||||
|
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
|
||||||
|
|
||||||
|
fsa_t fsa;
|
||||||
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "ab"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "abc"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "abccc"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "abd"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "abddd"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "abcddcc"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "abddccd"));
|
||||||
|
ASSERT_NULL(match(&fsa, "c"));
|
||||||
|
ASSERT_NULL(match(&fsa, "d"));
|
||||||
|
ASSERT_NULL(match(&fsa, "foo"));
|
||||||
|
|
||||||
|
regex_free(®ex);
|
||||||
|
fsa_free(&fsa);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_union_of_single_term_and_sequence_containing_starred_term(void)
|
||||||
|
{
|
||||||
|
regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t));
|
||||||
|
terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
terms0[0].type = REGEX_TERM_LITERAL;
|
||||||
|
terms0[0].literal = 'a';
|
||||||
|
regex_term_t *terms1 = malloc(2 * sizeof(regex_term_t));
|
||||||
|
terms1[0].quantifier = REGEX_QUANTIFIER_STAR;
|
||||||
|
terms1[0].type = REGEX_TERM_LITERAL;
|
||||||
|
terms1[0].literal = 'b';
|
||||||
|
terms1[1].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
terms1[1].type = REGEX_TERM_LITERAL;
|
||||||
|
terms1[1].literal = 'c';
|
||||||
|
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
|
||||||
|
alternatives[0].count = alternatives[0].capacity = 1;
|
||||||
|
alternatives[0].contents = terms0;
|
||||||
|
alternatives[1].count = alternatives[1].capacity = 2;
|
||||||
|
alternatives[1].contents = terms1;
|
||||||
|
regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives };
|
||||||
|
|
||||||
|
fsa_t fsa;
|
||||||
|
construct(®ex, &fsa);
|
||||||
|
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "c"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "bc"));
|
||||||
|
ASSERT_NOT_NULL(match(&fsa, "bbbbbc"));
|
||||||
|
ASSERT_NULL(match(&fsa, "foo"));
|
||||||
|
|
||||||
|
const char *s = "ba";
|
||||||
|
const char *t = match(&fsa, s);
|
||||||
|
ASSERT_EQ(s + 1, t);
|
||||||
|
|
||||||
regex_free(®ex);
|
regex_free(®ex);
|
||||||
fsa_free(&fsa);
|
fsa_free(&fsa);
|
||||||
@ -181,11 +287,18 @@ static void test_subexpression(void)
|
|||||||
int main(void)
|
int main(void)
|
||||||
{
|
{
|
||||||
TESTING_BEGIN();
|
TESTING_BEGIN();
|
||||||
|
|
||||||
|
// Base cases
|
||||||
test_empty_expression();
|
test_empty_expression();
|
||||||
test_literal_expression();
|
test_literal_expression();
|
||||||
test_sequence();
|
test_sequence();
|
||||||
test_union();
|
test_union();
|
||||||
test_star();
|
test_star();
|
||||||
test_subexpression();
|
test_subexpression();
|
||||||
|
|
||||||
|
// Compound expressions
|
||||||
|
test_sequence_containing_starred_union();
|
||||||
|
test_union_of_single_term_and_sequence_containing_starred_term();
|
||||||
|
|
||||||
return TESTING_END();
|
return TESTING_END();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user