Implement first iteration of parser and write test scripts

This commit is contained in:
Camden Dixie O'Brien 2024-10-25 13:33:31 +01:00
parent 63facb3954
commit 584e92c29c
6 changed files with 190 additions and 1 deletions

84
lib/parser.c Normal file
View File

@ -0,0 +1,84 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "parser.h"
#include <stdbool.h>
#include <stdlib.h>
static bool is_special(char c)
{
switch (c) {
case '|':
return true;
default:
return false;
}
}
static int parse_literal(const char *input, int rem, char *out)
{
if (rem > 0 || !is_special(input[0])) {
*out = input[0];
return 1;
} else {
return -1;
}
}
static int parse_term(const char *input, int rem, term_t *out)
{
int result, used = 0;
result = parse_literal(input + used, rem - used, &out->literal);
if (result < 0)
return -1;
out->quantifier = QUANTIFIER_NONE;
out->type = TERM_TYPE_LITERAL;
used += result;
return used;
}
static int parse_sequence(const char *input, int rem, sequence_t *out)
{
int result, used = 0;
out->contents = calloc(1, sizeof(term_t));
out->len = 0;
out->capacity = 1;
result = parse_term(input + used, rem - used, &out->contents[0]);
if (result < 0)
return -1;
++out->len;
used += result;
return used;
}
int parse_regex(const char *input, int rem, regex_t *out)
{
int result, used = 0;
result = parse_sequence(input + used, rem - used, &out->sequence);
if (result < 0)
return -1;
used += result;
if (used < rem) {
if (input[used] != '|')
return -1;
++used;
out->alternative = calloc(1, sizeof(regex_t));
result = parse_regex(input + used, rem - used, out->alternative);
if (result < 0)
return -1;
used += result;
}
return used;
}

13
lib/parser.h Normal file
View File

@ -0,0 +1,13 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef PARSER_H
#define PARSER_H
#include "regex.h"
int parse_regex(const char *input, int rem, regex_t *out);
#endif

View File

@ -7,5 +7,11 @@ CFLAGS="$CFLAGS -O0 -ggdb"
mkdir -p build mkdir -p build
# Build library # Build library
clang $CFLAGS -Ilib -c lib/parser.c -o build/parser.o
clang $CFLAGS -Ilib -c lib/regex.c -o build/regex.o clang $CFLAGS -Ilib -c lib/regex.c -o build/regex.o
ar -crs build/lib.a build/regex.o ar -crs build/lib.a build/parser.o build/regex.o
# Build tests
clang $CFLAGS -Itests -c tests/testing.c -o build/testing.o
clang $CFLAGS -Ilib -Itests -o build/parser_tests \
tests/parser_tests.c build/testing.o build/lib.a

3
scripts/entr.sh Normal file
View File

@ -0,0 +1,3 @@
cd "$(git rev-parse --show-toplevel)"
find . -not \( -path './.git' -prune \) -not \( -path './build' -prune \) \
| entr -s 'clear && sh scripts/build.sh && sh scripts/test.sh'

7
scripts/test.sh Normal file
View File

@ -0,0 +1,7 @@
cd "$(git rev-parse --show-toplevel)"
fails=0
build/parser_tests || fails=`expr $fails + 1`
if [ $fails -eq 0 ]; then echo Tests OK; fi

76
tests/parser_tests.c Normal file
View File

@ -0,0 +1,76 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "parser.h"
#include "testing.h"
#define PARSE_REGEX_STRING(s, r) parse_regex(s, strlen(s), r)
static void a_has_no_alternative(void)
{
regex_t r = { 0 };
const int result = PARSE_REGEX_STRING("a", &r);
ASSERT_NE(-1, result);
ASSERT_NULL(r.alternative);
regex_free_children(&r);
}
static void a_pipe_b_has_alternative(void)
{
regex_t r = { 0 };
const int result = PARSE_REGEX_STRING("a|b", &r);
ASSERT_NE(-1, result);
ASSERT_NOT_NULL(r.alternative);
regex_free_children(&r);
}
static void a_pipe_b_pipe_c_result_alternative_has_alternative(void)
{
regex_t r = { 0 };
const int result = PARSE_REGEX_STRING("a|b|c", &r);
ASSERT_NE(-1, result);
ASSERT_NOT_NULL(r.alternative);
ASSERT_NOT_NULL(r.alternative->alternative);
regex_free_children(&r);
}
static void a_is_parsed_as_unquantified_literal(void)
{
regex_t r = { 0 };
const int result = PARSE_REGEX_STRING("a", &r);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type);
ASSERT_EQ('a', r.sequence.contents[0].literal);
regex_free_children(&r);
}
static void b_is_parsed_as_unquantified_literal(void)
{
regex_t r = { 0 };
const int result = PARSE_REGEX_STRING("b", &r);
ASSERT_NE(-1, result);
ASSERT_EQ(1, r.sequence.len);
ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier);
ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type);
ASSERT_EQ('b', r.sequence.contents[0].literal);
regex_free_children(&r);
}
int main(void)
{
TESTING_BEGIN();
a_has_no_alternative();
a_pipe_b_has_alternative();
a_pipe_b_pipe_c_result_alternative_has_alternative();
a_is_parsed_as_unquantified_literal();
b_is_parsed_as_unquantified_literal();
return TESTING_END();
}