regex-engine/lib/desugar.c

151 lines
4.1 KiB
C

/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
static void desugar_class(regex_term_t *term)
{
assert(!term->class.negated);
const int count = term->class.count;
regex_sequence_t *alternatives
= malloc(count * sizeof(regex_sequence_t));
assert(NULL != alternatives);
for (int i = 0; i < count; ++i) {
regex_term_t *terms = malloc(sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = term->class.contents[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_class_free(&term->class);
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = count;
term->subexpr.contents = alternatives;
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src);
static void deep_copy_sequence(regex_sequence_t *dst, regex_sequence_t *src)
{
dst->count = dst->capacity = src->count;
dst->contents = malloc(dst->capacity * sizeof(regex_term_t));
assert(NULL != dst->contents);
for (int i = 0; i < dst->count; ++i)
deep_copy_term(&dst->contents[i], &src->contents[i]);
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src)
{
assert(REGEX_TERM_WILDCARD != src->type);
assert(REGEX_TERM_CLASS != src->type);
memcpy(dst, src, sizeof(regex_term_t));
if (REGEX_TERM_SUBEXPR == src->type) {
dst->subexpr.capacity = src->subexpr.count;
dst->subexpr.contents
= malloc(dst->subexpr.capacity * sizeof(regex_sequence_t));
assert(NULL != dst->subexpr.contents);
for (int i = 0; i < dst->subexpr.count; ++i) {
deep_copy_sequence(
&dst->subexpr.contents[i], &src->subexpr.contents[i]);
}
}
}
static void desugar_plus(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = malloc(2 * sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
memcpy(&alternatives[0].contents[0], term, sizeof(regex_term_t));
deep_copy_term(&alternatives[0].contents[1], term);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[1].quantifier = REGEX_QUANTIFIER_STAR;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 1;
term->subexpr.contents = alternatives;
}
static void desugar_qmark(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[0].type = REGEX_TERM_EMPTY;
alternatives[1].count = alternatives[0].capacity = 1;
alternatives[1].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[1].contents);
memcpy(&alternatives[1].contents[0], term, sizeof(regex_term_t));
alternatives[1].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 2;
term->subexpr.contents = alternatives;
}
static void desugar_term(regex_term_t *term)
{
switch (term->type) {
case REGEX_TERM_WILDCARD:
assert(false);
break;
case REGEX_TERM_CLASS:
desugar_class(term);
break;
case REGEX_TERM_SUBEXPR:
desugar_regex(&term->subexpr);
break;
case REGEX_TERM_LITERAL:
case REGEX_TERM_EMPTY:
break;
}
switch (term->quantifier) {
case REGEX_QUANTIFIER_PLUS:
desugar_plus(term);
break;
case REGEX_QUANTIFIER_QMARK:
desugar_qmark(term);
break;
case REGEX_QUANTIFIER_NONE:
case REGEX_QUANTIFIER_STAR:
break;
}
}
void desugar_regex(regex_t *regex)
{
for (int i = 0; i < regex->count; ++i) {
for (int j = 0; j < regex->contents[i].count; ++j) {
desugar_term(&regex->contents[i].contents[j]);
}
}
}