From 584e92c29c6da4cc8a852af4b64a67ded7765a2d Mon Sep 17 00:00:00 2001 From: Camden Dixie O'Brien Date: Fri, 25 Oct 2024 13:33:31 +0100 Subject: [PATCH] Implement first iteration of parser and write test scripts --- lib/parser.c | 84 ++++++++++++++++++++++++++++++++++++++++++++ lib/parser.h | 13 +++++++ scripts/build.sh | 8 ++++- scripts/entr.sh | 3 ++ scripts/test.sh | 7 ++++ tests/parser_tests.c | 76 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 lib/parser.c create mode 100644 lib/parser.h create mode 100644 scripts/entr.sh create mode 100644 scripts/test.sh create mode 100644 tests/parser_tests.c diff --git a/lib/parser.c b/lib/parser.c new file mode 100644 index 0000000..fa1783a --- /dev/null +++ b/lib/parser.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) Camden Dixie O'Brien + * SPDX-License-Identifier: AGPL-3.0-only + */ + +#include "parser.h" + +#include +#include + +static bool is_special(char c) +{ + switch (c) { + case '|': + return true; + default: + return false; + } +} + +static int parse_literal(const char *input, int rem, char *out) +{ + if (rem > 0 || !is_special(input[0])) { + *out = input[0]; + return 1; + } else { + return -1; + } +} + +static int parse_term(const char *input, int rem, term_t *out) +{ + int result, used = 0; + + result = parse_literal(input + used, rem - used, &out->literal); + if (result < 0) + return -1; + out->quantifier = QUANTIFIER_NONE; + out->type = TERM_TYPE_LITERAL; + used += result; + + return used; +} + +static int parse_sequence(const char *input, int rem, sequence_t *out) +{ + int result, used = 0; + + out->contents = calloc(1, sizeof(term_t)); + out->len = 0; + out->capacity = 1; + + result = parse_term(input + used, rem - used, &out->contents[0]); + if (result < 0) + return -1; + ++out->len; + used += result; + + return used; +} + +int parse_regex(const char *input, int rem, regex_t *out) +{ + int result, used = 0; + + result = parse_sequence(input + used, rem - used, &out->sequence); + if (result < 0) + return -1; + used += result; + + if (used < rem) { + if (input[used] != '|') + return -1; + ++used; + + out->alternative = calloc(1, sizeof(regex_t)); + result = parse_regex(input + used, rem - used, out->alternative); + if (result < 0) + return -1; + used += result; + } + + return used; +} diff --git a/lib/parser.h b/lib/parser.h new file mode 100644 index 0000000..b88933c --- /dev/null +++ b/lib/parser.h @@ -0,0 +1,13 @@ +/* + * Copyright (c) Camden Dixie O'Brien + * SPDX-License-Identifier: AGPL-3.0-only + */ + +#ifndef PARSER_H +#define PARSER_H + +#include "regex.h" + +int parse_regex(const char *input, int rem, regex_t *out); + +#endif diff --git a/scripts/build.sh b/scripts/build.sh index 2dd4a01..9ec83c1 100644 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -7,5 +7,11 @@ CFLAGS="$CFLAGS -O0 -ggdb" mkdir -p build # Build library +clang $CFLAGS -Ilib -c lib/parser.c -o build/parser.o clang $CFLAGS -Ilib -c lib/regex.c -o build/regex.o -ar -crs build/lib.a build/regex.o +ar -crs build/lib.a build/parser.o build/regex.o + +# Build tests +clang $CFLAGS -Itests -c tests/testing.c -o build/testing.o +clang $CFLAGS -Ilib -Itests -o build/parser_tests \ + tests/parser_tests.c build/testing.o build/lib.a diff --git a/scripts/entr.sh b/scripts/entr.sh new file mode 100644 index 0000000..69d58ca --- /dev/null +++ b/scripts/entr.sh @@ -0,0 +1,3 @@ +cd "$(git rev-parse --show-toplevel)" +find . -not \( -path './.git' -prune \) -not \( -path './build' -prune \) \ + | entr -s 'clear && sh scripts/build.sh && sh scripts/test.sh' diff --git a/scripts/test.sh b/scripts/test.sh new file mode 100644 index 0000000..be0de71 --- /dev/null +++ b/scripts/test.sh @@ -0,0 +1,7 @@ +cd "$(git rev-parse --show-toplevel)" + +fails=0 + +build/parser_tests || fails=`expr $fails + 1` + +if [ $fails -eq 0 ]; then echo Tests OK; fi diff --git a/tests/parser_tests.c b/tests/parser_tests.c new file mode 100644 index 0000000..d641e9c --- /dev/null +++ b/tests/parser_tests.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) Camden Dixie O'Brien + * SPDX-License-Identifier: AGPL-3.0-only + */ + +#include "parser.h" +#include "testing.h" + +#define PARSE_REGEX_STRING(s, r) parse_regex(s, strlen(s), r) + +static void a_has_no_alternative(void) +{ + regex_t r = { 0 }; + const int result = PARSE_REGEX_STRING("a", &r); + ASSERT_NE(-1, result); + ASSERT_NULL(r.alternative); + regex_free_children(&r); +} + +static void a_pipe_b_has_alternative(void) +{ + regex_t r = { 0 }; + const int result = PARSE_REGEX_STRING("a|b", &r); + ASSERT_NE(-1, result); + ASSERT_NOT_NULL(r.alternative); + regex_free_children(&r); +} + +static void a_pipe_b_pipe_c_result_alternative_has_alternative(void) +{ + regex_t r = { 0 }; + const int result = PARSE_REGEX_STRING("a|b|c", &r); + ASSERT_NE(-1, result); + ASSERT_NOT_NULL(r.alternative); + ASSERT_NOT_NULL(r.alternative->alternative); + regex_free_children(&r); +} + +static void a_is_parsed_as_unquantified_literal(void) +{ + regex_t r = { 0 }; + const int result = PARSE_REGEX_STRING("a", &r); + ASSERT_NE(-1, result); + + ASSERT_EQ(1, r.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type); + ASSERT_EQ('a', r.sequence.contents[0].literal); + + regex_free_children(&r); +} + +static void b_is_parsed_as_unquantified_literal(void) +{ + regex_t r = { 0 }; + const int result = PARSE_REGEX_STRING("b", &r); + ASSERT_NE(-1, result); + + ASSERT_EQ(1, r.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type); + ASSERT_EQ('b', r.sequence.contents[0].literal); + + regex_free_children(&r); +} + +int main(void) +{ + TESTING_BEGIN(); + a_has_no_alternative(); + a_pipe_b_has_alternative(); + a_pipe_b_pipe_c_result_alternative_has_alternative(); + a_is_parsed_as_unquantified_literal(); + b_is_parsed_as_unquantified_literal(); + return TESTING_END(); +}