Remove desugaring step

This commit is contained in:
Camden Dixie O'Brien 2024-11-03 12:16:52 +00:00
parent e283fd2c52
commit 1f248ad4cd
6 changed files with 0 additions and 503 deletions

View File

@ -2,7 +2,6 @@ add_library(lib
compile.c
construct.c
convert.c
desugar.c
fsa.c
min_heap.c
parse.c

View File

@ -7,7 +7,6 @@
#include "construct.h"
#include "convert.h"
#include "desugar.h"
#include "parse.h"
bool compile(const char *regex, int len, fsa_t *dfa_out)
@ -15,7 +14,6 @@ bool compile(const char *regex, int len, fsa_t *dfa_out)
regex_t pt;
if (-1 == parse_expr(regex, len, &pt))
return false;
desugar_regex(&pt);
fsa_t nfa;
construct_nfa(&pt, &nfa);

View File

@ -1,124 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
static void deep_copy_term(regex_term_t *dst, regex_term_t *src);
static void deep_copy_sequence(regex_sequence_t *dst, regex_sequence_t *src)
{
dst->count = dst->capacity = src->count;
dst->contents = malloc(dst->capacity * sizeof(regex_term_t));
assert(NULL != dst->contents);
for (int i = 0; i < dst->count; ++i)
deep_copy_term(&dst->contents[i], &src->contents[i]);
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src)
{
assert(REGEX_TERM_WILDCARD != src->type);
memcpy(dst, src, sizeof(regex_term_t));
switch (src->type) {
case REGEX_TERM_SUBEXPR:
dst->subexpr.capacity = src->subexpr.count;
dst->subexpr.contents
= malloc(dst->subexpr.capacity * sizeof(regex_sequence_t));
assert(NULL != dst->subexpr.contents);
for (int i = 0; i < dst->subexpr.count; ++i) {
deep_copy_sequence(
&dst->subexpr.contents[i], &src->subexpr.contents[i]);
}
break;
case REGEX_TERM_CLASS:
dst->class.count = src->class.count;
dst->class.capacity = src->class.capacity;
dst->class.contents
= malloc(dst->class.capacity * sizeof(regex_sequence_t));
assert(NULL != dst->class.contents);
memcpy(dst->class.contents, src->class.contents, src->class.count);
break;
default:
break;
}
}
static void desugar_plus(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = malloc(2 * sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
memcpy(&alternatives[0].contents[0], term, sizeof(regex_term_t));
deep_copy_term(&alternatives[0].contents[1], term);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[1].quantifier = REGEX_QUANTIFIER_STAR;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 1;
term->subexpr.contents = alternatives;
}
static void desugar_qmark(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[0].type = REGEX_TERM_EMPTY;
alternatives[1].count = alternatives[0].capacity = 1;
alternatives[1].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[1].contents);
memcpy(&alternatives[1].contents[0], term, sizeof(regex_term_t));
alternatives[1].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 2;
term->subexpr.contents = alternatives;
}
static void desugar_term(regex_term_t *term)
{
if (REGEX_TERM_SUBEXPR == term->type)
desugar_regex(&term->subexpr);
switch (term->quantifier) {
case REGEX_QUANTIFIER_PLUS:
desugar_plus(term);
break;
case REGEX_QUANTIFIER_QMARK:
desugar_qmark(term);
break;
case REGEX_QUANTIFIER_NONE:
case REGEX_QUANTIFIER_STAR:
break;
}
}
void desugar_regex(regex_t *regex)
{
for (int i = 0; i < regex->count; ++i) {
for (int j = 0; j < regex->contents[i].count; ++j)
desugar_term(&regex->contents[i].contents[j]);
}
}

View File

@ -1,13 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef DESUGAR_H
#define DESUGAR_H
#include "regex.h"
void desugar_regex(regex_t *regex);
#endif

View File

@ -19,7 +19,6 @@ endfunction()
add_test_suites(
construct_tests.c
convert_tests.c
desugar_tests.c
fsa_tests.c
integration_tests.c
min_heap_tests.c

View File

@ -1,362 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include "testing.h"
#include <stddef.h>
static void a_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t);
}
static void wildcard_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_WILDCARD;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t);
}
static void abc_is_unchanged(void)
{
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b';
terms[2].type = REGEX_TERM_LITERAL;
terms[2].literal = 'c';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(3, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[2].type);
ASSERT_EQ('c', t.contents[0].contents[2].literal);
regex_free(&t);
}
static void a_star_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_STAR;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t);
}
static void a_or_b_or_c_is_unchanged(void)
{
const char *literals = "abc";
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
for (int i = 0; i < 3; ++i) {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_t t = { .count = 3, .capacity = 3, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(3, t.count);
ASSERT_NOT_NULL(t.contents);
for (int i = 0; i < 3; ++i) {
ASSERT_EQ(1, t.contents[i].count);
ASSERT_NOT_NULL(t.contents[i].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, t.contents[i].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[i].contents[0].type);
ASSERT_EQ(literals[i], t.contents[i].contents[0].literal);
}
regex_free(&t);
}
static void subexpr_a_is_unchanged(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
regex_free(&t);
}
static void a_plus_becomes_subexpr_aa_star(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_PLUS;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(2, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(
REGEX_QUANTIFIER_STAR, inner->contents[0].contents[1].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[1].type);
ASSERT_EQ('a', inner->contents[0].contents[1].literal);
regex_free(&t);
}
static void a_qmark_becomes_subexpr_empty_or_a(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
static void class_abc_is_unchanged(void)
{
char *options = malloc(3 * sizeof(char));
options[0] = 'a';
options[1] = 'b';
options[2] = 'c';
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_CLASS;
terms[0].class.negated = false;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = options;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_CLASS, t.contents[0].contents[0].type);
const regex_class_t *class = &t.contents[0].contents[0].class;
ASSERT_EQ(3, class->count);
ASSERT_EQ('a', class->contents[0]);
ASSERT_EQ('b', class->contents[1]);
ASSERT_EQ('c', class->contents[2]);
regex_free(&t);
}
static void subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner;
inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, inner->contents[0].contents[0].type);
inner = &inner->contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
int main(void)
{
TESTING_BEGIN();
a_is_unchanged();
wildcard_is_unchanged();
abc_is_unchanged();
a_star_is_unchanged();
a_or_b_or_c_is_unchanged();
subexpr_a_is_unchanged();
a_plus_becomes_subexpr_aa_star();
a_qmark_becomes_subexpr_empty_or_a();
class_abc_is_unchanged();
subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a();
return TESTING_END();
}