diff --git a/lib/parser.c b/lib/parser.c index 2c4d5a4..0b2b56c 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -101,10 +101,10 @@ static int parse_term(const char *input, int rem, term_t *out) } else if ('(' == input[0]) { ++used; - result = parse_regex(input + used, rem - used, &out->regex); + result = parse_expr(input + used, rem - used, &out->subexpr); if (result < 0) return -1; - out->type = TERM_TYPE_REGEX; + out->type = TERM_TYPE_SUBEXPR; used += result; if (')' != input[used]) @@ -178,7 +178,7 @@ static int parse_sequence(const char *input, int rem, sequence_t *out) return out->len > 0 ? used : -1; } -int parse_regex(const char *input, int rem, regex_t *out) +int parse_expr(const char *input, int rem, parse_tree_t *out) { int result, used = 0; @@ -190,10 +190,10 @@ int parse_regex(const char *input, int rem, regex_t *out) if (used < rem && '|' == input[used]) { ++used; - out->alternative = malloc(sizeof(regex_t)); + out->alternative = malloc(sizeof(parse_tree_t)); if (NULL == out->alternative) return -1; - result = parse_regex(input + used, rem - used, out->alternative); + result = parse_expr(input + used, rem - used, out->alternative); if (result < 0) return -1; used += result; @@ -203,3 +203,38 @@ int parse_regex(const char *input, int rem, regex_t *out) return used; } + +static void class_free(class_t *c) +{ + if (NULL != c->contents) + free(c->contents); +} + +static void sequence_free(sequence_t *s) +{ + if (NULL != s->contents) { + for (int i = 0; i < s->len; ++i) { + switch (s->contents[i].type) { + case TERM_TYPE_CLASS: + class_free(&s->contents[i].class); + break; + case TERM_TYPE_SUBEXPR: + parse_tree_free_children(&s->contents[i].subexpr); + break; + case TERM_TYPE_WILDCARD: + case TERM_TYPE_LITERAL: + break; + } + } + free(s->contents); + } +} + +void parse_tree_free_children(parse_tree_t *t) +{ + sequence_free(&t->sequence); + if (NULL != t->alternative) { + parse_tree_free_children(t->alternative); + free(t->alternative); + } +} diff --git a/lib/parser.h b/lib/parser.h index b88933c..22e769e 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -6,8 +6,50 @@ #ifndef PARSER_H #define PARSER_H -#include "regex.h" +#include -int parse_regex(const char *input, int rem, regex_t *out); +typedef struct { + bool negated; + int count, capacity; + char *contents; +} class_t; + +typedef enum { + QUANTIFIER_NONE, + QUANTIFIER_ZERO_OR_MORE, + QUANTIFIER_ONE_OR_MORE, + QUANTIFIER_ZERO_OR_ONE, +} quantifier_t; + +typedef enum { + TERM_TYPE_WILDCARD, + TERM_TYPE_CLASS, + TERM_TYPE_LITERAL, + TERM_TYPE_SUBEXPR, +} term_type_t; + +struct _term; +typedef struct { + int len, capacity; + struct _term *contents; +} sequence_t; + +typedef struct _parse_tree { + sequence_t sequence; + struct _parse_tree *alternative; +} parse_tree_t; + +typedef struct _term { + quantifier_t quantifier; + term_type_t type; + union { + class_t class; + char literal; + parse_tree_t subexpr; + }; +} term_t; + +int parse_expr(const char *input, int rem, parse_tree_t *out); +void parse_tree_free_children(parse_tree_t *t); #endif diff --git a/lib/regex.c b/lib/regex.c deleted file mode 100644 index 2100db9..0000000 --- a/lib/regex.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) Camden Dixie O'Brien - * SPDX-License-Identifier: AGPL-3.0-only - */ - -#include "regex.h" - -#include - -static void class_free(class_t *c) -{ - if (NULL != c->contents) - free(c->contents); -} - -static void sequence_free(sequence_t *s) -{ - if (NULL != s->contents) { - for (int i = 0; i < s->len; ++i) { - switch (s->contents[i].type) { - case TERM_TYPE_CLASS: - class_free(&s->contents[i].class); - break; - case TERM_TYPE_REGEX: - regex_free_children(&s->contents[i].regex); - break; - case TERM_TYPE_WILDCARD: - case TERM_TYPE_LITERAL: - break; - } - } - free(s->contents); - } -} - -void regex_free_children(regex_t *r) -{ - sequence_free(&r->sequence); - if (NULL != r->alternative) { - regex_free_children(r->alternative); - free(r->alternative); - } -} diff --git a/lib/regex.h b/lib/regex.h deleted file mode 100644 index 5d9efe2..0000000 --- a/lib/regex.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) Camden Dixie O'Brien - * SPDX-License-Identifier: AGPL-3.0-only - */ - -#ifndef REGEX_H -#define REGEX_H - -#include - -typedef struct { - bool negated; - int count, capacity; - char *contents; -} class_t; - -typedef enum { - QUANTIFIER_NONE, - QUANTIFIER_ZERO_OR_MORE, - QUANTIFIER_ONE_OR_MORE, - QUANTIFIER_ZERO_OR_ONE, -} quantifier_t; - -typedef enum { - TERM_TYPE_WILDCARD, - TERM_TYPE_CLASS, - TERM_TYPE_LITERAL, - TERM_TYPE_REGEX, -} term_type_t; - -struct _term; -typedef struct { - int len, capacity; - struct _term *contents; -} sequence_t; - -typedef struct _regex { - sequence_t sequence; - struct _regex *alternative; -} regex_t; - -typedef struct _term { - quantifier_t quantifier; - term_type_t type; - union { - class_t class; - char literal; - regex_t regex; - }; -} term_t; - -void regex_free_children(regex_t *r); - -#endif diff --git a/scripts/build.sh b/scripts/build.sh index 9ec83c1..5bb1c13 100644 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -8,8 +8,7 @@ mkdir -p build # Build library clang $CFLAGS -Ilib -c lib/parser.c -o build/parser.o -clang $CFLAGS -Ilib -c lib/regex.c -o build/regex.o -ar -crs build/lib.a build/parser.o build/regex.o +ar -crs build/lib.a build/parser.o # Build tests clang $CFLAGS -Itests -c tests/testing.c -o build/testing.o diff --git a/tests/parser_tests.c b/tests/parser_tests.c index 3165eae..7b74890 100644 --- a/tests/parser_tests.c +++ b/tests/parser_tests.c @@ -6,243 +6,243 @@ #include "parser.h" #include "testing.h" -#define PARSE_REGEX_STRING(s, r) parse_regex(s, strlen(s), r) +#define PARSE_EXPR_STRING(s, r) parse_expr(s, strlen(s), r) static void a_has_no_alternative(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("a", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("a", &t); ASSERT_NE(-1, result); - ASSERT_NULL(r.alternative); - regex_free_children(&r); + ASSERT_NULL(t.alternative); + parse_tree_free_children(&t); } static void a_pipe_b_has_alternative(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("a|b", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("a|b", &t); ASSERT_NE(-1, result); - ASSERT_NOT_NULL(r.alternative); - regex_free_children(&r); + ASSERT_NOT_NULL(t.alternative); + parse_tree_free_children(&t); } static void a_pipe_b_pipe_c_result_alternative_has_alternative(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("a|b|c", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("a|b|c", &t); ASSERT_NE(-1, result); - ASSERT_NOT_NULL(r.alternative); - ASSERT_NOT_NULL(r.alternative->alternative); - regex_free_children(&r); + ASSERT_NOT_NULL(t.alternative); + ASSERT_NOT_NULL(t.alternative->alternative); + parse_tree_free_children(&t); } static void a_is_parsed_as_unquantified_literal(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("a", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("a", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type); - ASSERT_EQ('a', r.sequence.contents[0].literal); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type); + ASSERT_EQ('a', t.sequence.contents[0].literal); - regex_free_children(&r); + parse_tree_free_children(&t); } static void b_is_parsed_as_unquantified_literal(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("b", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("b", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type); - ASSERT_EQ('b', r.sequence.contents[0].literal); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type); + ASSERT_EQ('b', t.sequence.contents[0].literal); - regex_free_children(&r); + parse_tree_free_children(&t); } static void abc_is_parsed_as_sequence_of_unquantified_literals(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("abc", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("abc", &t); ASSERT_NE(-1, result); - ASSERT_EQ(3, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type); - ASSERT_EQ('a', r.sequence.contents[0].literal); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[1].quantifier); - ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[1].type); - ASSERT_EQ('b', r.sequence.contents[1].literal); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[2].quantifier); - ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[2].type); - ASSERT_EQ('c', r.sequence.contents[2].literal); + ASSERT_EQ(3, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type); + ASSERT_EQ('a', t.sequence.contents[0].literal); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[1].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[1].type); + ASSERT_EQ('b', t.sequence.contents[1].literal); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[2].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[2].type); + ASSERT_EQ('c', t.sequence.contents[2].literal); - regex_free_children(&r); + parse_tree_free_children(&t); } static void dot_is_parsed_as_unquantified_wildcard_term(void) { - regex_t r; - const int result = PARSE_REGEX_STRING(".", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING(".", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_WILDCARD, r.sequence.contents[0].type); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_WILDCARD, t.sequence.contents[0].type); - regex_free_children(&r); + parse_tree_free_children(&t); } static void backslash_dot_is_parsed_as_unquantified_literal(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("\\.", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("\\.", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type); - ASSERT_EQ('.', r.sequence.contents[0].literal); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type); + ASSERT_EQ('.', t.sequence.contents[0].literal); - regex_free_children(&r); + parse_tree_free_children(&t); } static void backslash_backslash_is_parsed_as_unquantified_literal(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("\\\\", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("\\\\", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[0].type); - ASSERT_EQ('\\', r.sequence.contents[0].literal); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[0].type); + ASSERT_EQ('\\', t.sequence.contents[0].literal); - regex_free_children(&r); + parse_tree_free_children(&t); } -static void a_pipe_b_in_parens_is_parsed_as_regex_term(void) +static void a_pipe_b_in_parens_is_parsed_as_subexpr_term(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("(a|b)", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("(a|b)", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_REGEX, r.sequence.contents[0].type); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_SUBEXPR, t.sequence.contents[0].type); - const regex_t *inner = &r.sequence.contents[0].regex; + const parse_tree_t *inner = &t.sequence.contents[0].subexpr; ASSERT_EQ(1, inner->sequence.len); ASSERT_EQ(QUANTIFIER_NONE, inner->sequence.contents[0].quantifier); ASSERT_EQ(TERM_TYPE_LITERAL, inner->sequence.contents[0].type); ASSERT_EQ('a', inner->sequence.contents[0].literal); - const regex_t *inner_alt = inner->alternative; + const parse_tree_t *inner_alt = inner->alternative; ASSERT_EQ(1, inner->sequence.len); ASSERT_EQ(QUANTIFIER_NONE, inner_alt->sequence.contents[0].quantifier); ASSERT_EQ(TERM_TYPE_LITERAL, inner_alt->sequence.contents[0].type); ASSERT_EQ('b', inner_alt->sequence.contents[0].literal); - regex_free_children(&r); + parse_tree_free_children(&t); } -static void a_in_parens_b_is_parsed_as_sequence_with_regex_term(void) +static void a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("(a)b", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("(a)b", &t); ASSERT_NE(-1, result); - ASSERT_EQ(2, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_REGEX, r.sequence.contents[0].type); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[1].quantifier); - ASSERT_EQ(TERM_TYPE_LITERAL, r.sequence.contents[1].type); - ASSERT_EQ('b', r.sequence.contents[1].literal); + ASSERT_EQ(2, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_SUBEXPR, t.sequence.contents[0].type); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[1].quantifier); + ASSERT_EQ(TERM_TYPE_LITERAL, t.sequence.contents[1].type); + ASSERT_EQ('b', t.sequence.contents[1].literal); - const regex_t *inner = &r.sequence.contents[0].regex; + const parse_tree_t *inner = &t.sequence.contents[0].subexpr; ASSERT_EQ(1, inner->sequence.len); ASSERT_EQ(QUANTIFIER_NONE, inner->sequence.contents[0].quantifier); ASSERT_EQ(TERM_TYPE_LITERAL, inner->sequence.contents[0].type); ASSERT_EQ('a', inner->sequence.contents[0].literal); - regex_free_children(&r); + parse_tree_free_children(&t); } static void dot_star_is_parsed_as_zero_or_more_wildcard(void) { - regex_t r; - const int result = PARSE_REGEX_STRING(".*", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING(".*", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_ZERO_OR_MORE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_WILDCARD, r.sequence.contents[0].type); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_ZERO_OR_MORE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_WILDCARD, t.sequence.contents[0].type); - regex_free_children(&r); + parse_tree_free_children(&t); } static void dot_plus_is_parsed_as_one_or_more_wildcard(void) { - regex_t r; - const int result = PARSE_REGEX_STRING(".+", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING(".+", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_ONE_OR_MORE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_WILDCARD, r.sequence.contents[0].type); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_ONE_OR_MORE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_WILDCARD, t.sequence.contents[0].type); - regex_free_children(&r); + parse_tree_free_children(&t); } static void dot_question_mark_is_parsed_as_zero_or_one_wildcard(void) { - regex_t r; - const int result = PARSE_REGEX_STRING(".?", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING(".?", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_ZERO_OR_ONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_WILDCARD, r.sequence.contents[0].type); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_ZERO_OR_ONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_WILDCARD, t.sequence.contents[0].type); - regex_free_children(&r); + parse_tree_free_children(&t); } static void a_in_brackets_is_parsed_as_class_containing_only_a(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("[a]", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("[a]", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_CLASS, r.sequence.contents[0].type); - ASSERT_FALSE(r.sequence.contents[0].class.negated); - ASSERT_EQ(1, r.sequence.contents[0].class.count); - ASSERT_NOT_NULL(r.sequence.contents[0].class.contents); - ASSERT_EQ('a', r.sequence.contents[0].class.contents[0]); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_CLASS, t.sequence.contents[0].type); + ASSERT_FALSE(t.sequence.contents[0].class.negated); + ASSERT_EQ(1, t.sequence.contents[0].class.count); + ASSERT_NOT_NULL(t.sequence.contents[0].class.contents); + ASSERT_EQ('a', t.sequence.contents[0].class.contents[0]); - regex_free_children(&r); + parse_tree_free_children(&t); } static void caret_a_in_brackets_parses_as_negated_class(void) { - regex_t r; - const int result = PARSE_REGEX_STRING("[^a]", &r); + parse_tree_t t; + const int result = PARSE_EXPR_STRING("[^a]", &t); ASSERT_NE(-1, result); - ASSERT_EQ(1, r.sequence.len); - ASSERT_EQ(QUANTIFIER_NONE, r.sequence.contents[0].quantifier); - ASSERT_EQ(TERM_TYPE_CLASS, r.sequence.contents[0].type); - ASSERT_TRUE(r.sequence.contents[0].class.negated); - ASSERT_EQ(1, r.sequence.contents[0].class.count); - ASSERT_NOT_NULL(r.sequence.contents[0].class.contents); - ASSERT_EQ('a', r.sequence.contents[0].class.contents[0]); + ASSERT_EQ(1, t.sequence.len); + ASSERT_EQ(QUANTIFIER_NONE, t.sequence.contents[0].quantifier); + ASSERT_EQ(TERM_TYPE_CLASS, t.sequence.contents[0].type); + ASSERT_TRUE(t.sequence.contents[0].class.negated); + ASSERT_EQ(1, t.sequence.contents[0].class.count); + ASSERT_NOT_NULL(t.sequence.contents[0].class.contents); + ASSERT_EQ('a', t.sequence.contents[0].class.contents[0]); - regex_free_children(&r); + parse_tree_free_children(&t); } int main(void) @@ -257,8 +257,8 @@ int main(void) dot_is_parsed_as_unquantified_wildcard_term(); backslash_dot_is_parsed_as_unquantified_literal(); backslash_backslash_is_parsed_as_unquantified_literal(); - a_pipe_b_in_parens_is_parsed_as_regex_term(); - a_in_parens_b_is_parsed_as_sequence_with_regex_term(); + a_pipe_b_in_parens_is_parsed_as_subexpr_term(); + a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(); dot_star_is_parsed_as_zero_or_more_wildcard(); dot_plus_is_parsed_as_one_or_more_wildcard(); dot_question_mark_is_parsed_as_zero_or_one_wildcard();