From 1f248ad4cdbc457067d26e99114c70b8f4208962 Mon Sep 17 00:00:00 2001 From: Camden Dixie O'Brien Date: Sun, 3 Nov 2024 12:16:52 +0000 Subject: [PATCH] Remove desugaring step --- lib/CMakeLists.txt | 1 - lib/compile.c | 2 - lib/desugar.c | 124 --------------- lib/include/desugar.h | 13 -- tests/CMakeLists.txt | 1 - tests/desugar_tests.c | 362 ------------------------------------------ 6 files changed, 503 deletions(-) delete mode 100644 lib/desugar.c delete mode 100644 lib/include/desugar.h delete mode 100644 tests/desugar_tests.c diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 32729b1..2225096 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -2,7 +2,6 @@ add_library(lib compile.c construct.c convert.c - desugar.c fsa.c min_heap.c parse.c diff --git a/lib/compile.c b/lib/compile.c index 0149497..7615793 100644 --- a/lib/compile.c +++ b/lib/compile.c @@ -7,7 +7,6 @@ #include "construct.h" #include "convert.h" -#include "desugar.h" #include "parse.h" bool compile(const char *regex, int len, fsa_t *dfa_out) @@ -15,7 +14,6 @@ bool compile(const char *regex, int len, fsa_t *dfa_out) regex_t pt; if (-1 == parse_expr(regex, len, &pt)) return false; - desugar_regex(&pt); fsa_t nfa; construct_nfa(&pt, &nfa); diff --git a/lib/desugar.c b/lib/desugar.c deleted file mode 100644 index d051055..0000000 --- a/lib/desugar.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) Camden Dixie O'Brien - * SPDX-License-Identifier: AGPL-3.0-only - */ - -#include "desugar.h" - -#include -#include -#include - -static void deep_copy_term(regex_term_t *dst, regex_term_t *src); - -static void deep_copy_sequence(regex_sequence_t *dst, regex_sequence_t *src) -{ - dst->count = dst->capacity = src->count; - dst->contents = malloc(dst->capacity * sizeof(regex_term_t)); - assert(NULL != dst->contents); - - for (int i = 0; i < dst->count; ++i) - deep_copy_term(&dst->contents[i], &src->contents[i]); -} - -static void deep_copy_term(regex_term_t *dst, regex_term_t *src) -{ - assert(REGEX_TERM_WILDCARD != src->type); - - memcpy(dst, src, sizeof(regex_term_t)); - switch (src->type) { - case REGEX_TERM_SUBEXPR: - dst->subexpr.capacity = src->subexpr.count; - dst->subexpr.contents - = malloc(dst->subexpr.capacity * sizeof(regex_sequence_t)); - assert(NULL != dst->subexpr.contents); - - for (int i = 0; i < dst->subexpr.count; ++i) { - deep_copy_sequence( - &dst->subexpr.contents[i], &src->subexpr.contents[i]); - } - break; - - case REGEX_TERM_CLASS: - dst->class.count = src->class.count; - dst->class.capacity = src->class.capacity; - dst->class.contents - = malloc(dst->class.capacity * sizeof(regex_sequence_t)); - assert(NULL != dst->class.contents); - memcpy(dst->class.contents, src->class.contents, src->class.count); - break; - - default: - break; - } -} - -static void desugar_plus(regex_term_t *term) -{ - regex_sequence_t *alternatives = malloc(sizeof(regex_sequence_t)); - assert(NULL != alternatives); - - alternatives[0].count = alternatives[0].capacity = 2; - alternatives[0].contents = malloc(2 * sizeof(regex_term_t)); - assert(NULL != alternatives[0].contents); - - memcpy(&alternatives[0].contents[0], term, sizeof(regex_term_t)); - deep_copy_term(&alternatives[0].contents[1], term); - alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE; - alternatives[0].contents[1].quantifier = REGEX_QUANTIFIER_STAR; - - term->quantifier = REGEX_QUANTIFIER_NONE; - term->type = REGEX_TERM_SUBEXPR; - term->subexpr.count = term->subexpr.capacity = 1; - term->subexpr.contents = alternatives; -} - -static void desugar_qmark(regex_term_t *term) -{ - regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t)); - assert(NULL != alternatives); - - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = malloc(sizeof(regex_term_t)); - assert(NULL != alternatives[0].contents); - alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE; - alternatives[0].contents[0].type = REGEX_TERM_EMPTY; - - alternatives[1].count = alternatives[0].capacity = 1; - alternatives[1].contents = malloc(sizeof(regex_term_t)); - assert(NULL != alternatives[1].contents); - memcpy(&alternatives[1].contents[0], term, sizeof(regex_term_t)); - alternatives[1].contents[0].quantifier = REGEX_QUANTIFIER_NONE; - - term->quantifier = REGEX_QUANTIFIER_NONE; - term->type = REGEX_TERM_SUBEXPR; - term->subexpr.count = term->subexpr.capacity = 2; - term->subexpr.contents = alternatives; -} - -static void desugar_term(regex_term_t *term) -{ - if (REGEX_TERM_SUBEXPR == term->type) - desugar_regex(&term->subexpr); - - switch (term->quantifier) { - case REGEX_QUANTIFIER_PLUS: - desugar_plus(term); - break; - case REGEX_QUANTIFIER_QMARK: - desugar_qmark(term); - break; - - case REGEX_QUANTIFIER_NONE: - case REGEX_QUANTIFIER_STAR: - break; - } -} - -void desugar_regex(regex_t *regex) -{ - for (int i = 0; i < regex->count; ++i) { - for (int j = 0; j < regex->contents[i].count; ++j) - desugar_term(®ex->contents[i].contents[j]); - } -} diff --git a/lib/include/desugar.h b/lib/include/desugar.h deleted file mode 100644 index d0b1f66..0000000 --- a/lib/include/desugar.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (c) Camden Dixie O'Brien - * SPDX-License-Identifier: AGPL-3.0-only - */ - -#ifndef DESUGAR_H -#define DESUGAR_H - -#include "regex.h" - -void desugar_regex(regex_t *regex); - -#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bdaa7c3..eea7329 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -19,7 +19,6 @@ endfunction() add_test_suites( construct_tests.c convert_tests.c - desugar_tests.c fsa_tests.c integration_tests.c min_heap_tests.c diff --git a/tests/desugar_tests.c b/tests/desugar_tests.c deleted file mode 100644 index 91529cd..0000000 --- a/tests/desugar_tests.c +++ /dev/null @@ -1,362 +0,0 @@ -/* - * Copyright (c) Camden Dixie O'Brien - * SPDX-License-Identifier: AGPL-3.0-only - */ - -#include "desugar.h" -#include "testing.h" - -#include - -static void a_is_unchanged(void) -{ - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_NONE; - terms[0].type = REGEX_TERM_LITERAL; - terms[0].literal = 'a'; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_NOT_NULL(t.contents); - ASSERT_EQ(1, t.contents[0].count); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); - ASSERT_EQ('a', t.contents[0].contents[0].literal); - - regex_free(&t); -} - -static void wildcard_is_unchanged(void) -{ - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_NONE; - terms[0].type = REGEX_TERM_WILDCARD; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_NOT_NULL(t.contents); - ASSERT_EQ(1, t.contents[0].count); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); - - regex_free(&t); -} - -static void abc_is_unchanged(void) -{ - regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); - terms[0].type = REGEX_TERM_LITERAL; - terms[0].literal = 'a'; - terms[1].type = REGEX_TERM_LITERAL; - terms[1].literal = 'b'; - terms[2].type = REGEX_TERM_LITERAL; - terms[2].literal = 'c'; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 3; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_NOT_NULL(t.contents); - ASSERT_EQ(3, t.contents[0].count); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); - ASSERT_EQ('a', t.contents[0].contents[0].literal); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type); - ASSERT_EQ('b', t.contents[0].contents[1].literal); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[2].type); - ASSERT_EQ('c', t.contents[0].contents[2].literal); - - regex_free(&t); -} - -static void a_star_is_unchanged(void) -{ - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_STAR; - terms[0].type = REGEX_TERM_LITERAL; - terms[0].literal = 'a'; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_NOT_NULL(t.contents); - ASSERT_EQ(1, t.contents[0].count); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); - ASSERT_EQ('a', t.contents[0].contents[0].literal); - - regex_free(&t); -} - -static void a_or_b_or_c_is_unchanged(void) -{ - const char *literals = "abc"; - regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t)); - for (int i = 0; i < 3; ++i) { - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_NONE; - terms[0].type = REGEX_TERM_LITERAL; - terms[0].literal = literals[i]; - - alternatives[i].count = alternatives[i].capacity = 1; - alternatives[i].contents = terms; - } - regex_t t = { .count = 3, .capacity = 3, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(3, t.count); - ASSERT_NOT_NULL(t.contents); - for (int i = 0; i < 3; ++i) { - ASSERT_EQ(1, t.contents[i].count); - ASSERT_NOT_NULL(t.contents[i].contents); - ASSERT_EQ( - REGEX_QUANTIFIER_NONE, t.contents[i].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[i].contents[0].type); - ASSERT_EQ(literals[i], t.contents[i].contents[0].literal); - } - - regex_free(&t); -} - -static void subexpr_a_is_unchanged(void) -{ - regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t)); - inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE; - inner_terms[0].type = REGEX_TERM_LITERAL; - inner_terms[0].literal = 'a'; - regex_sequence_t *inner_alternatives - = malloc(1 * sizeof(regex_sequence_t)); - inner_alternatives[0].count = inner_alternatives[0].capacity = 1; - inner_alternatives[0].contents = inner_terms; - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_NONE; - terms[0].type = REGEX_TERM_SUBEXPR; - terms[0].subexpr.count = terms[0].subexpr.capacity = 1; - terms[0].subexpr.contents = inner_alternatives; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_NOT_NULL(t.contents); - ASSERT_EQ(1, t.contents[0].count); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type); - - const regex_t *inner = &t.contents[0].contents[0].subexpr; - ASSERT_EQ(1, inner->count); - ASSERT_NOT_NULL(inner->contents); - ASSERT_EQ(1, inner->contents[0].count); - ASSERT_NOT_NULL(inner->contents[0].contents); - ASSERT_EQ( - REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type); - ASSERT_EQ('a', inner->contents[0].contents[0].literal); - - regex_free(&t); -} - -static void a_plus_becomes_subexpr_aa_star(void) -{ - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_PLUS; - terms[0].type = REGEX_TERM_LITERAL; - terms[0].literal = 'a'; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_NOT_NULL(t.contents); - ASSERT_EQ(1, t.contents[0].count); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type); - - const regex_t *inner = &t.contents[0].contents[0].subexpr; - ASSERT_EQ(1, inner->count); - ASSERT_NOT_NULL(inner->contents); - ASSERT_EQ(2, inner->contents[0].count); - ASSERT_NOT_NULL(inner->contents[0].contents); - ASSERT_EQ( - REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type); - ASSERT_EQ('a', inner->contents[0].contents[0].literal); - ASSERT_EQ( - REGEX_QUANTIFIER_STAR, inner->contents[0].contents[1].quantifier); - ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[1].type); - ASSERT_EQ('a', inner->contents[0].contents[1].literal); - - regex_free(&t); -} - -static void a_qmark_becomes_subexpr_empty_or_a(void) -{ - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_QMARK; - terms[0].type = REGEX_TERM_LITERAL; - terms[0].literal = 'a'; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_NOT_NULL(t.contents); - ASSERT_EQ(1, t.contents[0].count); - ASSERT_NOT_NULL(t.contents[0].contents); - ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type); - - const regex_t *inner = &t.contents[0].contents[0].subexpr; - ASSERT_EQ(2, inner->count); - ASSERT_NOT_NULL(inner->contents); - ASSERT_EQ(1, inner->contents[0].count); - ASSERT_NOT_NULL(inner->contents[0].contents); - ASSERT_EQ( - REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type); - ASSERT_EQ(1, inner->contents[1].count); - ASSERT_NOT_NULL(inner->contents[1].contents); - ASSERT_EQ( - REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type); - ASSERT_EQ('a', inner->contents[1].contents[0].literal); - - regex_free(&t); -} - -static void class_abc_is_unchanged(void) -{ - char *options = malloc(3 * sizeof(char)); - options[0] = 'a'; - options[1] = 'b'; - options[2] = 'c'; - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_NONE; - terms[0].type = REGEX_TERM_CLASS; - terms[0].class.negated = false; - terms[0].class.count = terms[0].class.capacity = 3; - terms[0].class.contents = options; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_EQ(1, t.contents[0].count); - ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_CLASS, t.contents[0].contents[0].type); - - const regex_class_t *class = &t.contents[0].contents[0].class; - ASSERT_EQ(3, class->count); - ASSERT_EQ('a', class->contents[0]); - ASSERT_EQ('b', class->contents[1]); - ASSERT_EQ('c', class->contents[2]); - - regex_free(&t); -} - -static void subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a(void) -{ - regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t)); - inner_terms[0].quantifier = REGEX_QUANTIFIER_QMARK; - inner_terms[0].type = REGEX_TERM_LITERAL; - inner_terms[0].literal = 'a'; - regex_sequence_t *inner_alternatives - = malloc(1 * sizeof(regex_sequence_t)); - inner_alternatives[0].count = inner_alternatives[0].capacity = 1; - inner_alternatives[0].contents = inner_terms; - regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); - terms[0].quantifier = REGEX_QUANTIFIER_NONE; - terms[0].type = REGEX_TERM_SUBEXPR; - terms[0].subexpr.count = terms[0].subexpr.capacity = 1; - terms[0].subexpr.contents = inner_alternatives; - regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); - alternatives[0].count = alternatives[0].capacity = 1; - alternatives[0].contents = terms; - regex_t t = { .count = 1, .capacity = 1, .contents = alternatives }; - - desugar_regex(&t); - - ASSERT_EQ(1, t.count); - ASSERT_EQ(1, t.contents[0].count); - ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type); - - const regex_t *inner; - - inner = &t.contents[0].contents[0].subexpr; - ASSERT_EQ(1, inner->count); - ASSERT_EQ(1, inner->contents[0].count); - ASSERT_EQ( - REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_SUBEXPR, inner->contents[0].contents[0].type); - - inner = &inner->contents[0].contents[0].subexpr; - ASSERT_EQ(2, inner->count); - ASSERT_NOT_NULL(inner->contents); - ASSERT_EQ(1, inner->contents[0].count); - ASSERT_NOT_NULL(inner->contents[0].contents); - ASSERT_EQ( - REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type); - ASSERT_EQ(1, inner->contents[1].count); - ASSERT_NOT_NULL(inner->contents[1].contents); - ASSERT_EQ( - REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier); - ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type); - ASSERT_EQ('a', inner->contents[1].contents[0].literal); - - regex_free(&t); -} - -int main(void) -{ - TESTING_BEGIN(); - a_is_unchanged(); - wildcard_is_unchanged(); - abc_is_unchanged(); - a_star_is_unchanged(); - a_or_b_or_c_is_unchanged(); - subexpr_a_is_unchanged(); - a_plus_becomes_subexpr_aa_star(); - a_qmark_becomes_subexpr_empty_or_a(); - class_abc_is_unchanged(); - subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a(); - return TESTING_END(); -}