From ad6c385f421234d2ee6b2cc7ff9a166414d3b199 Mon Sep 17 00:00:00 2001 From: Camden Dixie O'Brien Date: Sat, 26 Oct 2024 19:57:10 +0100 Subject: [PATCH] Define FSA data structure --- lib/fsa.c | 67 +++++++++++++++++++++++++++ lib/fsa.h | 38 ++++++++++++++++ scripts/build.sh | 5 ++- scripts/test.sh | 1 + tests/fsa_tests.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 lib/fsa.c create mode 100644 lib/fsa.h create mode 100644 tests/fsa_tests.c diff --git a/lib/fsa.c b/lib/fsa.c new file mode 100644 index 0000000..a4611aa --- /dev/null +++ b/lib/fsa.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) Camden Dixie O'Brien + * SPDX-License-Identifier: AGPL-3.0-only + */ + +#include "fsa.h" + +#include +#include + +#define FSA_START_CAPACITY 16 +#define STATE_START_CAPACITY 16 + +void fsa_init(fsa_t *fsa) +{ + fsa->count = 0; + fsa->capacity = FSA_START_CAPACITY; + fsa->states = malloc(fsa->capacity * sizeof(fsa_state_t)); + assert(NULL != fsa->states); + + fsa->initial = fsa_add_state(fsa); + fsa->states[fsa->initial].final = true; +} + +void fsa_free(const fsa_t *fsa) +{ + for (int i = 0; i < fsa->count; ++i) + free(fsa->states[i].rules); + free(fsa->states); +} + +int fsa_add_state(fsa_t *fsa) +{ + if (fsa->count >= fsa->capacity) { + fsa->capacity *= 2; + fsa->states = realloc(fsa->states, fsa->capacity); + assert(NULL != fsa->states); + } + + fsa_state_t *state = &fsa->states[fsa->count]; + state->final = false; + state->count = 0; + state->capacity = STATE_START_CAPACITY; + state->rules = malloc(state->capacity * sizeof(fsa_rule_t)); + assert(state->rules); + + return fsa->count++; +} + +void fsa_add_rule(fsa_t *fsa, int from, int to, int input) +{ + assert(fsa->count > from); + assert(fsa->count > to); + assert(input < ALPHABET_SIZE); + + fsa_state_t *state = &fsa->states[from]; + if (state->count >= state->capacity) { + state->capacity *= 2; + state->rules = realloc(state->rules, state->capacity); + assert(NULL != state->rules); + } + + fsa_rule_t *rule = &state->rules[state->count]; + rule->input = input; + rule->next = to; + ++state->count; +} diff --git a/lib/fsa.h b/lib/fsa.h new file mode 100644 index 0000000..85ee342 --- /dev/null +++ b/lib/fsa.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) Camden Dixie O'Brien + * SPDX-License-Identifier: AGPL-3.0-only + */ + +#ifndef FSA_H +#define FSA_H + +#include + +#define CHAR_COUNT 256 +#define ALPHABET_SIZE (CHAR_COUNT + 1) + +// Use one more than any valid char to represent empty string +#define EPSILON CHAR_COUNT + +typedef struct { + int input, next; +} fsa_rule_t; + +typedef struct { + bool final; + int count, capacity; + fsa_rule_t *rules; +} fsa_state_t; + +typedef struct { + int count, capacity, initial; + fsa_state_t *states; +} fsa_t; + +void fsa_init(fsa_t *fsa); +void fsa_free(const fsa_t *fsa); + +int fsa_add_state(fsa_t *fsa); +void fsa_add_rule(fsa_t *fsa, int from, int to, int input); + +#endif diff --git a/scripts/build.sh b/scripts/build.sh index 1993c8c..803dcdd 100644 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -10,7 +10,8 @@ if [ ! -e build ]; then mkdir build; else rm build/*; fi clang $CFLAGS -Ilib -c lib/parse.c -o build/parse.o clang $CFLAGS -Ilib -c lib/desugar.c -o build/desugar.o clang $CFLAGS -Ilib -c lib/regex.c -o build/regex.o -ar -crs build/lib.a build/parse.o build/desugar.o build/regex.o +clang $CFLAGS -Ilib -c lib/fsa.c -o build/fsa.o +ar -crs build/lib.a build/parse.o build/desugar.o build/regex.o build/fsa.o # Build tests clang $CFLAGS -Itests -c tests/testing.c -o build/testing.o @@ -18,3 +19,5 @@ clang $CFLAGS -Ilib -Itests -o build/parse_tests \ tests/parse_tests.c build/testing.o build/lib.a clang $CFLAGS -Ilib -Itests -o build/desugar_tests \ tests/desugar_tests.c build/testing.o build/lib.a +clang $CFLAGS -Ilib -Itests -o build/fsa_tests \ + tests/fsa_tests.c build/testing.o build/lib.a diff --git a/scripts/test.sh b/scripts/test.sh index 2e918c9..014e2d0 100644 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -4,5 +4,6 @@ fails=0 build/parse_tests || fails=`expr $fails + 1` build/desugar_tests || fails=`expr $fails + 1` +build/fsa_tests || fails=`expr $fails + 1` if [ $fails -eq 0 ]; then echo Tests OK; fi diff --git a/tests/fsa_tests.c b/tests/fsa_tests.c new file mode 100644 index 0000000..89206c8 --- /dev/null +++ b/tests/fsa_tests.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) Camden Dixie O'Brien + * SPDX-License-Identifier: AGPL-3.0-only + */ + +#include "fsa.h" +#include "testing.h" + +static void new_fsa_has_single_state_with_no_rules(void) +{ + fsa_t fsa; + fsa_init(&fsa); + ASSERT_EQ(1, fsa.count); + fsa_free(&fsa); +} + +static void new_fsa_has_initial_state_zero(void) +{ + fsa_t fsa; + fsa_init(&fsa); + ASSERT_EQ(0, fsa.initial); + fsa_free(&fsa); +} + +static void new_fsa_initial_state_has_no_rules(void) +{ + fsa_t fsa; + fsa_init(&fsa); + ASSERT_EQ(0, fsa.states[fsa.initial].count); + fsa_free(&fsa); +} + +static void new_fsa_initial_state_is_final(void) +{ + fsa_t fsa; + fsa_init(&fsa); + ASSERT_TRUE(fsa.states[fsa.initial].final); + fsa_free(&fsa); +} + +static void adding_state_increases_count(void) +{ + fsa_t fsa; + fsa_init(&fsa); + fsa_add_state(&fsa); + ASSERT_EQ(2, fsa.count); + fsa_free(&fsa); +} + +static void added_state_is_not_final(void) +{ + fsa_t fsa; + fsa_init(&fsa); + const int id = fsa_add_state(&fsa); + ASSERT_FALSE(fsa.states[id].final); + fsa_free(&fsa); +} + +static void added_state_has_no_rules(void) +{ + fsa_t fsa; + fsa_init(&fsa); + const int id = fsa_add_state(&fsa); + ASSERT_EQ(0, fsa.states[id].count); + fsa_free(&fsa); +} + +static void adding_rule_increases_count_of_state(void) +{ + fsa_t fsa; + fsa_init(&fsa); + const int id = fsa_add_state(&fsa); + fsa_add_rule(&fsa, id, 0, 'a'); + ASSERT_EQ(1, fsa.states[id].count); + fsa_free(&fsa); +} + +static void added_rule_has_correct_input(void) +{ + fsa_t fsa; + fsa_init(&fsa); + const int id = fsa_add_state(&fsa); + fsa_add_rule(&fsa, id, 0, 'a'); + ASSERT_EQ('a', fsa.states[id].rules[0].input); + fsa_free(&fsa); +} + +static void added_rule_has_correct_next(void) +{ + fsa_t fsa; + fsa_init(&fsa); + const int id = fsa_add_state(&fsa); + fsa_add_rule(&fsa, id, 0, 'a'); + ASSERT_EQ(0, fsa.states[id].rules[0].next); + fsa_free(&fsa); +} + +int main(void) +{ + TESTING_BEGIN(); + new_fsa_has_single_state_with_no_rules(); + new_fsa_has_initial_state_zero(); + new_fsa_initial_state_has_no_rules(); + new_fsa_initial_state_is_final(); + adding_state_increases_count(); + added_state_is_not_final(); + added_state_has_no_rules(); + adding_rule_increases_count_of_state(); + added_rule_has_correct_input(); + added_rule_has_correct_next(); + return TESTING_END(); +}