regex-engine/tests/construct_tests.c

429 lines
13 KiB
C

/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "construct.h"
#include "testing.h"
static bool
accepts_from_state(const fsa_t *nfa, int state_id, const char *input)
{
const fsa_state_t *state = &nfa->states[state_id];
const bool final = state->final;
const bool end_of_input = '\0' == *input;
for (int i = 0; i < state->count; ++i) {
if ((!final || !end_of_input) && EPSILON == state->rules[i].input) {
if (accepts_from_state(nfa, state->rules[i].next, input))
return true;
}
if (!end_of_input && *input == state->rules[i].input) {
if (accepts_from_state(nfa, state->rules[i].next, input + 1))
return true;
}
}
return final && end_of_input;
}
static bool accepts(const fsa_t *nfa, const char *input)
{
return accepts_from_state(nfa, nfa->initial, input);
}
static void test_empty_expression(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_EMPTY;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_wildcard(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_WILDCARD;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_TRUE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_literal_expression(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_sequence(void)
{
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
terms[1].quantifier = REGEX_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b';
terms[2].quantifier = REGEX_QUANTIFIER_NONE;
terms[2].type = REGEX_TERM_LITERAL;
terms[2].literal = 'c';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "abc"));
ASSERT_FALSE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "ab"));
ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "abcd"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_union(void)
{
const char *literals = "abc";
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
for (int i = 0; i < 3; ++i) {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "aa"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_star(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_STAR;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_subexpression(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_class(void)
{
char *class_contents = malloc(3);
class_contents[0] = 'a';
class_contents[1] = 'b';
class_contents[2] = 'c';
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_CLASS;
terms[0].class.negated = false;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = class_contents;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
ASSERT_FALSE(accepts(&fsa, "d"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_negated_class(void)
{
char *class_contents = malloc(3);
class_contents[0] = 'a';
class_contents[1] = 'b';
class_contents[2] = 'c';
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_CLASS;
terms[0].class.negated = true;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = class_contents;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "d"));
ASSERT_TRUE(accepts(&fsa, "e"));
ASSERT_FALSE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b"));
ASSERT_FALSE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_sequence_containing_starred_union(void)
{
// ab(c|d)*
regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t));
inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms0[0].type = REGEX_TERM_LITERAL;
inner_terms0[0].literal = 'c';
regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t));
inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms1[0].type = REGEX_TERM_LITERAL;
inner_terms1[0].literal = 'd';
regex_sequence_t *inner_alternatives
= malloc(2 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1;
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
terms[1].quantifier = REGEX_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b';
terms[2].quantifier = REGEX_QUANTIFIER_STAR;
terms[2].type = REGEX_TERM_SUBEXPR;
terms[2].subexpr.count = terms[2].subexpr.capacity = 2;
terms[2].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "ab"));
ASSERT_TRUE(accepts(&fsa, "abc"));
ASSERT_TRUE(accepts(&fsa, "abccc"));
ASSERT_TRUE(accepts(&fsa, "abd"));
ASSERT_TRUE(accepts(&fsa, "abddd"));
ASSERT_TRUE(accepts(&fsa, "abcddcc"));
ASSERT_TRUE(accepts(&fsa, "abddccd"));
ASSERT_FALSE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "foo"));
regex_free(&regex);
fsa_free(&fsa);
}
static void
test_union_of_single_term_and_sequence_containing_starred_term(void)
{
// a|b*c
regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t));
terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
terms0[0].type = REGEX_TERM_LITERAL;
terms0[0].literal = 'a';
regex_term_t *terms1 = malloc(2 * sizeof(regex_term_t));
terms1[0].quantifier = REGEX_QUANTIFIER_STAR;
terms1[0].type = REGEX_TERM_LITERAL;
terms1[0].literal = 'b';
terms1[1].quantifier = REGEX_QUANTIFIER_NONE;
terms1[1].type = REGEX_TERM_LITERAL;
terms1[1].literal = 'c';
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms0;
alternatives[1].count = alternatives[1].capacity = 2;
alternatives[1].contents = terms1;
regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_TRUE(accepts(&fsa, "bc"));
ASSERT_TRUE(accepts(&fsa, "bbbbbc"));
ASSERT_FALSE(accepts(&fsa, "foo"));
ASSERT_FALSE(accepts(&fsa, "ba"));
regex_free(&regex);
fsa_free(&fsa);
}
static void test_sequence_of_subexpr_a_or_empty_and_b(void)
{
// (a|ε)b
regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t));
inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms0[0].type = REGEX_TERM_LITERAL;
inner_terms0[0].literal = 'a';
regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t));
inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms1[0].type = REGEX_TERM_EMPTY;
regex_sequence_t *inner_alternatives
= malloc(2 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1;
regex_term_t *terms = malloc(2 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 2;
terms[0].subexpr.contents = inner_alternatives;
terms[1].quantifier = REGEX_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "ab"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a"));
regex_free(&regex);
fsa_free(&fsa);
}
int main(void)
{
TESTING_BEGIN();
// Base cases
test_empty_expression();
test_literal_expression();
test_wildcard();
test_sequence();
test_union();
test_star();
test_subexpression();
test_class();
test_negated_class();
// Compound expressions
test_sequence_containing_starred_union();
test_union_of_single_term_and_sequence_containing_starred_term();
test_sequence_of_subexpr_a_or_empty_and_b();
return TESTING_END();
}