Implement NFA construction for empty expression and literals

This commit is contained in:
Camden Dixie O'Brien 2024-10-26 22:24:21 +01:00
parent 5d980cf64b
commit 2ac92f62f1
5 changed files with 128 additions and 1 deletions

45
lib/construct.c Normal file
View File

@ -0,0 +1,45 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "construct.h"
#include <assert.h>
#include <stdlib.h>
static void construct_literal(char literal, fsa_t *out)
{
fsa_init(out);
const int id = fsa_add_state(out);
fsa_add_rule(out, id, out->initial, literal);
out->initial = id;
}
static void construct_term(const regex_term_t *term, fsa_t *out)
{
switch (term->type) {
case REGEX_TERM_EMPTY:
fsa_init(out);
break;
case REGEX_TERM_LITERAL:
construct_literal(term->literal, out);
break;
case REGEX_TERM_SUBEXPR:
return;
case REGEX_TERM_WILDCARD:
case REGEX_TERM_CLASS:
assert(false);
}
}
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
{
construct_term(&seq->contents[0], out);
}
void construct(const regex_t *regex, fsa_t *out)
{
construct_sequence(&regex->contents[0], out);
}

14
lib/construct.h Normal file
View File

@ -0,0 +1,14 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef CONSTRUCT_H
#define CONSTRUCT_H
#include "fsa.h"
#include "regex.h"
void construct(const regex_t *regex, fsa_t *out);
#endif

View File

@ -13,7 +13,9 @@ clang $CFLAGS -Ilib -c lib/parse.c -o build/parse.o
clang $CFLAGS -Ilib -c lib/desugar.c -o build/desugar.o clang $CFLAGS -Ilib -c lib/desugar.c -o build/desugar.o
clang $CFLAGS -Ilib -c lib/regex.c -o build/regex.o clang $CFLAGS -Ilib -c lib/regex.c -o build/regex.o
clang $CFLAGS -Ilib -c lib/fsa.c -o build/fsa.o clang $CFLAGS -Ilib -c lib/fsa.c -o build/fsa.o
ar -crs build/lib.a build/parse.o build/desugar.o build/regex.o build/fsa.o clang $CFLAGS -Ilib -c lib/construct.c -o build/construct.o
ar -crs build/lib.a build/parse.o build/desugar.o build/regex.o \
build/fsa.o build/construct.o
# Build tests # Build tests
clang $CFLAGS -Itests -c tests/testing.c -o build/testing.o clang $CFLAGS -Itests -c tests/testing.c -o build/testing.o
@ -23,3 +25,5 @@ clang $CFLAGS -Ilib -Itests -o build/desugar_tests \
tests/desugar_tests.c build/testing.o build/lib.a tests/desugar_tests.c build/testing.o build/lib.a
clang $CFLAGS -Ilib -Itests -o build/fsa_tests \ clang $CFLAGS -Ilib -Itests -o build/fsa_tests \
tests/fsa_tests.c build/testing.o build/lib.a tests/fsa_tests.c build/testing.o build/lib.a
clang $CFLAGS -Ilib -Itests -o build/construct_tests \
tests/construct_tests.c build/testing.o build/lib.a

View File

@ -7,5 +7,6 @@ fails=0
build/parse_tests || fails=`expr $fails + 1` build/parse_tests || fails=`expr $fails + 1`
build/desugar_tests || fails=`expr $fails + 1` build/desugar_tests || fails=`expr $fails + 1`
build/fsa_tests || fails=`expr $fails + 1` build/fsa_tests || fails=`expr $fails + 1`
build/construct_tests || fails=`expr $fails + 1`
if [ $fails -eq 0 ]; then echo Tests OK; fi if [ $fails -eq 0 ]; then echo Tests OK; fi

63
tests/construct_tests.c Normal file
View File

@ -0,0 +1,63 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "construct.h"
#include "testing.h"
static void test_empty_expression(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_EMPTY;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct(&regex, &fsa);
ASSERT_EQ(1, fsa.count);
ASSERT_TRUE(fsa.states[fsa.initial].final);
ASSERT_EQ(0, fsa.states[fsa.initial].count);
regex_free(&regex);
fsa_free(&fsa);
}
static void test_literal_expression(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct(&regex, &fsa);
const fsa_state_t *initial = &fsa.states[fsa.initial];
ASSERT_EQ(2, fsa.count);
ASSERT_EQ(1, initial->count);
ASSERT_EQ('a', initial->rules[0].input);
ASSERT_TRUE(fsa.states[initial->rules[0].next].final);
ASSERT_EQ(0, fsa.states[initial->rules[0].next].count);
regex_free(&regex);
fsa_free(&fsa);
}
int main(void)
{
TESTING_BEGIN();
test_empty_expression();
test_literal_expression();
return TESTING_END();
}