305 lines
8.7 KiB
C
305 lines
8.7 KiB
C
/*
|
|
* Copyright (c) Camden Dixie O'Brien
|
|
* SPDX-License-Identifier: AGPL-3.0-only
|
|
*/
|
|
|
|
#include "construct.h"
|
|
#include "testing.h"
|
|
|
|
static const char *
|
|
match_from_state(const fsa_t *nfa, int state_id, const char *input)
|
|
{
|
|
const fsa_state_t *state = &nfa->states[state_id];
|
|
|
|
const bool final = state->final;
|
|
const bool end_of_input = '\0' == *input;
|
|
for (int i = 0; i < state->count; ++i) {
|
|
if ((!final || !end_of_input) && EPSILON == state->rules[i].input) {
|
|
const char *s
|
|
= match_from_state(nfa, state->rules[i].next, input);
|
|
if (NULL != s)
|
|
return s;
|
|
}
|
|
if (!end_of_input && *input == state->rules[i].input) {
|
|
const char *s
|
|
= match_from_state(nfa, state->rules[i].next, input + 1);
|
|
if (NULL != s)
|
|
return s;
|
|
}
|
|
}
|
|
|
|
return final ? input : NULL;
|
|
}
|
|
|
|
static const char *match(const fsa_t *nfa, const char *input)
|
|
{
|
|
return match_from_state(nfa, nfa->initial, input);
|
|
}
|
|
|
|
static void test_empty_expression(void)
|
|
{
|
|
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms[0].type = REGEX_TERM_EMPTY;
|
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
|
alternatives[0].count = alternatives[0].capacity = 1;
|
|
alternatives[0].contents = terms;
|
|
const regex_t regex
|
|
= { .count = 1, .capacity = 1, .contents = alternatives };
|
|
|
|
fsa_t fsa;
|
|
construct(®ex, &fsa);
|
|
|
|
ASSERT_NOT_NULL(match(&fsa, ""));
|
|
|
|
regex_free(®ex);
|
|
fsa_free(&fsa);
|
|
}
|
|
|
|
static void test_literal_expression(void)
|
|
{
|
|
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms[0].type = REGEX_TERM_LITERAL;
|
|
terms[0].literal = 'a';
|
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
|
alternatives[0].count = alternatives[0].capacity = 1;
|
|
alternatives[0].contents = terms;
|
|
const regex_t regex
|
|
= { .count = 1, .capacity = 1, .contents = alternatives };
|
|
|
|
fsa_t fsa;
|
|
construct(®ex, &fsa);
|
|
|
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
|
ASSERT_NULL(match(&fsa, "b"));
|
|
|
|
regex_free(®ex);
|
|
fsa_free(&fsa);
|
|
}
|
|
|
|
static void test_sequence(void)
|
|
{
|
|
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
|
|
terms[0].type = REGEX_TERM_LITERAL;
|
|
terms[0].literal = 'a';
|
|
terms[1].type = REGEX_TERM_LITERAL;
|
|
terms[1].literal = 'b';
|
|
terms[2].type = REGEX_TERM_LITERAL;
|
|
terms[2].literal = 'c';
|
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
|
alternatives[0].count = alternatives[0].capacity = 3;
|
|
alternatives[0].contents = terms;
|
|
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
|
|
|
|
fsa_t fsa;
|
|
construct(®ex, &fsa);
|
|
|
|
ASSERT_NOT_NULL(match(&fsa, "abc"));
|
|
ASSERT_NULL(match(&fsa, "a"));
|
|
ASSERT_NULL(match(&fsa, "ab"));
|
|
ASSERT_NULL(match(&fsa, "d"));
|
|
|
|
const char *s = "abcd";
|
|
const char *t = match(&fsa, s);
|
|
ASSERT_EQ(s + 3, t);
|
|
|
|
regex_free(®ex);
|
|
fsa_free(&fsa);
|
|
}
|
|
|
|
static void test_union(void)
|
|
{
|
|
const char *literals = "abc";
|
|
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
|
|
for (int i = 0; i < 3; ++i) {
|
|
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms[0].type = REGEX_TERM_LITERAL;
|
|
terms[0].literal = literals[i];
|
|
|
|
alternatives[i].count = alternatives[i].capacity = 1;
|
|
alternatives[i].contents = terms;
|
|
}
|
|
regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives };
|
|
|
|
fsa_t fsa;
|
|
construct(®ex, &fsa);
|
|
|
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
|
ASSERT_NOT_NULL(match(&fsa, "b"));
|
|
ASSERT_NOT_NULL(match(&fsa, "c"));
|
|
ASSERT_NULL(match(&fsa, "d"));
|
|
|
|
const char *s = "aa";
|
|
const char *t = match(&fsa, s);
|
|
ASSERT_EQ(s + 1, t);
|
|
|
|
regex_free(®ex);
|
|
fsa_free(&fsa);
|
|
}
|
|
|
|
static void test_star(void)
|
|
{
|
|
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
|
terms[0].quantifier = REGEX_QUANTIFIER_STAR;
|
|
terms[0].type = REGEX_TERM_LITERAL;
|
|
terms[0].literal = 'a';
|
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
|
alternatives[0].count = alternatives[0].capacity = 1;
|
|
alternatives[0].contents = terms;
|
|
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
|
|
|
|
fsa_t fsa;
|
|
construct(®ex, &fsa);
|
|
|
|
ASSERT_NOT_NULL(match(&fsa, ""));
|
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
|
ASSERT_NOT_NULL(match(&fsa, "aaaaaa"));
|
|
|
|
const char *s = "b";
|
|
const char *t = match(&fsa, s);
|
|
ASSERT_EQ(s, t);
|
|
|
|
regex_free(®ex);
|
|
fsa_free(&fsa);
|
|
}
|
|
|
|
static void test_subexpression(void)
|
|
{
|
|
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
|
|
inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
inner_terms[0].type = REGEX_TERM_LITERAL;
|
|
inner_terms[0].literal = 'a';
|
|
regex_sequence_t *inner_alternatives
|
|
= malloc(1 * sizeof(regex_sequence_t));
|
|
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
|
|
inner_alternatives[0].contents = inner_terms;
|
|
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms[0].type = REGEX_TERM_SUBEXPR;
|
|
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
|
|
terms[0].subexpr.contents = inner_alternatives;
|
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
|
alternatives[0].count = alternatives[0].capacity = 1;
|
|
alternatives[0].contents = terms;
|
|
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
|
|
|
|
fsa_t fsa;
|
|
construct(®ex, &fsa);
|
|
|
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
|
ASSERT_NULL(match(&fsa, "b"));
|
|
|
|
regex_free(®ex);
|
|
fsa_free(&fsa);
|
|
}
|
|
|
|
static void test_sequence_containing_starred_union(void)
|
|
{
|
|
// ab(c|d)*
|
|
regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t));
|
|
inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
inner_terms0[0].type = REGEX_TERM_LITERAL;
|
|
inner_terms0[0].literal = 'c';
|
|
regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t));
|
|
inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
inner_terms1[0].type = REGEX_TERM_LITERAL;
|
|
inner_terms1[0].literal = 'd';
|
|
regex_sequence_t *inner_alternatives
|
|
= malloc(2 * sizeof(regex_sequence_t));
|
|
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
|
|
inner_alternatives[0].contents = inner_terms0;
|
|
inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
|
|
inner_alternatives[1].contents = inner_terms1;
|
|
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
|
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms[0].type = REGEX_TERM_LITERAL;
|
|
terms[0].literal = 'a';
|
|
terms[1].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms[1].type = REGEX_TERM_LITERAL;
|
|
terms[1].literal = 'b';
|
|
terms[2].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms[2].type = REGEX_TERM_SUBEXPR;
|
|
terms[2].subexpr.count = terms[2].subexpr.capacity = 2;
|
|
terms[2].subexpr.contents = inner_alternatives;
|
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
|
alternatives[0].count = alternatives[0].capacity = 1;
|
|
alternatives[0].contents = terms;
|
|
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
|
|
|
|
fsa_t fsa;
|
|
construct(®ex, &fsa);
|
|
|
|
ASSERT_NOT_NULL(match(&fsa, "ab"));
|
|
ASSERT_NOT_NULL(match(&fsa, "abc"));
|
|
ASSERT_NOT_NULL(match(&fsa, "abccc"));
|
|
ASSERT_NOT_NULL(match(&fsa, "abd"));
|
|
ASSERT_NOT_NULL(match(&fsa, "abddd"));
|
|
ASSERT_NOT_NULL(match(&fsa, "abcddcc"));
|
|
ASSERT_NOT_NULL(match(&fsa, "abddccd"));
|
|
ASSERT_NULL(match(&fsa, "c"));
|
|
ASSERT_NULL(match(&fsa, "d"));
|
|
ASSERT_NULL(match(&fsa, "foo"));
|
|
|
|
regex_free(®ex);
|
|
fsa_free(&fsa);
|
|
}
|
|
|
|
static void
|
|
test_union_of_single_term_and_sequence_containing_starred_term(void)
|
|
{
|
|
regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t));
|
|
terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms0[0].type = REGEX_TERM_LITERAL;
|
|
terms0[0].literal = 'a';
|
|
regex_term_t *terms1 = malloc(2 * sizeof(regex_term_t));
|
|
terms1[0].quantifier = REGEX_QUANTIFIER_STAR;
|
|
terms1[0].type = REGEX_TERM_LITERAL;
|
|
terms1[0].literal = 'b';
|
|
terms1[1].quantifier = REGEX_QUANTIFIER_NONE;
|
|
terms1[1].type = REGEX_TERM_LITERAL;
|
|
terms1[1].literal = 'c';
|
|
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
|
|
alternatives[0].count = alternatives[0].capacity = 1;
|
|
alternatives[0].contents = terms0;
|
|
alternatives[1].count = alternatives[1].capacity = 2;
|
|
alternatives[1].contents = terms1;
|
|
regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives };
|
|
|
|
fsa_t fsa;
|
|
construct(®ex, &fsa);
|
|
|
|
ASSERT_NOT_NULL(match(&fsa, "a"));
|
|
ASSERT_NOT_NULL(match(&fsa, "c"));
|
|
ASSERT_NOT_NULL(match(&fsa, "bc"));
|
|
ASSERT_NOT_NULL(match(&fsa, "bbbbbc"));
|
|
ASSERT_NULL(match(&fsa, "foo"));
|
|
|
|
const char *s = "ba";
|
|
const char *t = match(&fsa, s);
|
|
ASSERT_EQ(s + 1, t);
|
|
|
|
regex_free(®ex);
|
|
fsa_free(&fsa);
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
TESTING_BEGIN();
|
|
|
|
// Base cases
|
|
test_empty_expression();
|
|
test_literal_expression();
|
|
test_sequence();
|
|
test_union();
|
|
test_star();
|
|
test_subexpression();
|
|
|
|
// Compound expressions
|
|
test_sequence_containing_starred_union();
|
|
test_union_of_single_term_and_sequence_containing_starred_term();
|
|
|
|
return TESTING_END();
|
|
}
|