Compare commits

..

3 Commits

26 changed files with 852 additions and 812 deletions

18
.build.yml Normal file
View File

@@ -0,0 +1,18 @@
image: alpine/edge
packages:
- clang
- cmake
- compiler-rt
- ninja
sources:
- https://git.sr.ht/~cdo/regex-engine
tasks:
- configure: |
cd regex-engine
cmake -GNinja -Bbuild -DSANITIZERS=on -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang
- build: |
cd regex-engine
scripts/build.sh
- test: |
cd regex-engine
scripts/test.sh

View File

@@ -8,7 +8,6 @@ enable_testing()
function(set_default_target_options target) function(set_default_target_options target)
set_property(TARGET ${target} PROPERTY C_STANDARD 11) set_property(TARGET ${target} PROPERTY C_STANDARD 11)
set_property(TARGET ${target} PROPERTY C_EXTENSIONS OFF)
target_compile_options(${target} PRIVATE -Wall -Wextra -pedantic) target_compile_options(${target} PRIVATE -Wall -Wextra -pedantic)
if(${SANITIZERS}) if(${SANITIZERS})
target_compile_options(${target} PRIVATE -fsanitize=address,undefined) target_compile_options(${target} PRIVATE -fsanitize=address,undefined)
@@ -19,4 +18,3 @@ endfunction()
add_subdirectory(lib) add_subdirectory(lib)
add_subdirectory(tests) add_subdirectory(tests)
add_subdirectory(demo) add_subdirectory(demo)
add_subdirectory(benchmarks)

6
README
View File

@@ -7,8 +7,8 @@ so here we are.
Grammar Grammar
The engine does not support any specific standard's syntax, unless by This engine is not going to be strictly supporting any standard
coincidence. The grammar I've implemented for expressions is: syntax; the expression syntax I intend to support follows.
regex ::= sequence ( '|' sequence )* regex ::= sequence ( '|' sequence )*
sequence ::= term+ sequence ::= term+
@@ -23,7 +23,7 @@ coincidence. The grammar I've implemented for expressions is:
The build uses CMake. There are two scripts, build.sh and test.sh, The build uses CMake. There are two scripts, build.sh and test.sh,
which will (much to everybody's shock) build the project and run the which will (much to everybody's shock) build the project and run the
tests. I use Clang but the code is ISO C11 so it should compile just tests. I use Clang but the code is ISO C11, it should compile just
fine with GCC. You might need to faff with CMakeLists.txt to get it fine with GCC. You might need to faff with CMakeLists.txt to get it
to work with another compiler due to command-line flag nonsense. to work with another compiler due to command-line flag nonsense.

View File

@@ -1,20 +0,0 @@
add_library(benchmarking benchmarking.c)
set_default_target_options(benchmarking)
target_include_directories(benchmarking PUBLIC include)
function(add_benchmark_suite source)
string(REGEX REPLACE ".c$" "" name ${source})
add_executable(${name} ${source})
set_default_target_options(${name})
target_link_libraries(${name} PRIVATE lib benchmarking m)
endfunction()
function(add_benchmark_suites)
foreach(source ${ARGN})
add_benchmark_suite(${source})
endforeach()
endfunction()
add_benchmark_suites(
matching_benchmarks.c
)

View File

@@ -1,85 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "benchmarking.h"
#include <assert.h>
#include <math.h>
#include <stdio.h>
#define SWAP(x, y) \
do { \
const double tmp = x; \
x = y; \
y = tmp; \
} while (0)
clock_t benchmark_start, benchmark_end;
static void sort(double *xs, int n)
{
if (n <= 0)
return;
const double pivot = xs[(n - 1) / 2];
int lt = 0;
int eq = 0;
int gt = n - 1;
while (eq <= gt) {
if (xs[eq] < pivot) {
SWAP(xs[eq], xs[lt]);
++lt;
++eq;
} else if (xs[eq] > pivot) {
SWAP(xs[eq], xs[gt]);
--gt;
} else {
++eq;
}
}
sort(xs, lt);
sort(xs + gt + 1, n - (gt + 1));
}
void benchmark_summarise(double *res, int reps, benchmark_summary_t *out)
{
assert(reps > 0);
sort(res, reps);
const double median = res[reps / 2];
double sum = 0;
for (int i = 0; i < reps; ++i)
sum += res[i];
const double mean = sum / reps;
double diff_sum = 0;
for (int i = 0; i < reps; ++i)
diff_sum += pow(res[i] - mean, 2);
const double variance = diff_sum / (reps - 1);
out->reps = reps;
out->total = sum;
out->median = median;
out->mean = mean;
out->min = res[0];
out->max = res[reps - 1];
out->stddev = sqrt(variance);
}
void benchmark_print_header(void)
{
printf(
"%-12s %13s %13s %13s %13s %12s\n", "benchmark", "median (µs)",
"mean (µs)", "min (µs)", "max (µs)", "stddev");
}
void benchmark_print(const char *name, const benchmark_summary_t *s)
{
printf(
"%-12s %12.2f %12.2f %12.2f %12.2f %12.2f\n", name, s->median,
s->mean, s->min, s->max, s->stddev);
}

View File

@@ -1,50 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef BENCHMARKING_H
#define BENCHMARKING_H
#include <time.h>
typedef struct {
int reps;
double total, median, mean, min, max, stddev;
} benchmark_summary_t;
#define CLOCK_MICROS(c) (1000000 * (double)c / CLOCKS_PER_SEC)
#define BENCHMARKING_BEGIN() benchmark_print_header()
#define BENCHMARKING_END() 0
#define START_CLOCK() \
do { \
benchmark_start = clock(); \
} while (0)
#define STOP_CLOCK() \
do { \
benchmark_end = clock(); \
} while (0)
#define RUN_BENCHMARK(reps, name, fn, ...) \
do { \
double res[reps]; \
for (int i = 0; i < reps; ++i) { \
fn(__VA_ARGS__); \
res[i] = CLOCK_MICROS(benchmark_end) \
- CLOCK_MICROS(benchmark_start); \
} \
benchmark_summary_t summary; \
benchmark_summarise(res, reps, &summary); \
benchmark_print(name, &summary); \
} while (0)
extern clock_t benchmark_start, benchmark_end;
void benchmark_summarise(double *res, int reps, benchmark_summary_t *out);
void benchmark_print_header(void);
void benchmark_print(const char *name, const benchmark_summary_t *summary);
#endif

View File

@@ -1,54 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "benchmarking.h"
#include "compile.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#define LEN 1000
#define RANGE_FIRST 'a'
#define RANGE_LAST 'z'
#define CLAMP_CHAR(x) (RANGE_FIRST + x % (RANGE_LAST - RANGE_FIRST + 1))
#define RUN_MATCHING_BENCHMARK(reps, name, regex) \
do { \
fsa_t fsa; \
compile(regex, strlen(regex), &fsa); \
RUN_BENCHMARK(reps, name, matching_benchmark, &fsa); \
fsa_free(&fsa); \
} while (0)
static void matching_benchmark(const fsa_t *fsa)
{
char s[LEN];
for (int j = 0; j < LEN; ++j)
s[j] = CLAMP_CHAR(rand());
volatile bool match;
START_CLOCK();
match = fsa_accepts(fsa, s, LEN);
STOP_CLOCK();
(void)match;
}
int main(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
srand(tv.tv_usec);
BENCHMARKING_BEGIN();
RUN_MATCHING_BENCHMARK(10000, "foo or bar", ".*(foo|bar).*");
RUN_MATCHING_BENCHMARK(10000, "regex #1", ".*(abc!?)*|dd+.*");
RUN_MATCHING_BENCHMARK(10000, "regex #2", ".*(l|wh)?[aeiou]+.*");
return BENCHMARKING_END();
}

View File

@@ -12,11 +12,6 @@
#define BUFFER_START_CAPACITY 128 #define BUFFER_START_CAPACITY 128
#define PREFIX ".*("
#define PREFIX_LEN 3
#define SUFFIX ").*"
#define SUFFIX_LEN 3
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
if (argc != 2) { if (argc != 2) {
@@ -24,15 +19,8 @@ int main(int argc, char *argv[])
return EXIT_FAILURE; return EXIT_FAILURE;
} }
const int input_len = strlen(argv[1]);
const int regex_len = input_len + 6;
char *regex = malloc(regex_len);
memcpy(regex, PREFIX, PREFIX_LEN);
memcpy(regex + PREFIX_LEN, argv[1], input_len);
memcpy(regex + PREFIX_LEN + input_len, SUFFIX, SUFFIX_LEN);
fsa_t dfa; fsa_t dfa;
if (!compile(regex, regex_len, &dfa)) { if (!compile(argv[1], strlen(argv[1]), &dfa)) {
fprintf(stderr, "Failed to parse regex\n"); fprintf(stderr, "Failed to parse regex\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }

View File

@@ -2,9 +2,11 @@ add_library(lib
compile.c compile.c
construct.c construct.c
convert.c convert.c
desugar.c
fsa.c fsa.c
min_heap.c min_heap.c
parse.c parse.c
regex.c
) )
set_default_target_options(lib) set_default_target_options(lib)
target_include_directories(lib PUBLIC include) target_include_directories(lib PUBLIC include)

View File

@@ -5,19 +5,21 @@
#include "compile.h" #include "compile.h"
#include "parse.h"
#include "desugar.h"
#include "construct.h" #include "construct.h"
#include "convert.h" #include "convert.h"
#include "parse.h"
bool compile(const char *regex, int len, fsa_t *dfa_out) bool compile(const char *regex, int len, fsa_t *dfa_out)
{ {
parse_tree_t pt; regex_t pt;
if (-1 == parse_expr(regex, len, &pt)) if (-1 == parse_expr(regex, len, &pt))
return false; return false;
desugar_regex(&pt);
fsa_t nfa; fsa_t nfa;
construct_nfa(&pt, &nfa); construct_nfa(&pt, &nfa);
parse_tree_free(&pt); regex_free(&pt);
convert_to_dfa(&nfa, dfa_out); convert_to_dfa(&nfa, dfa_out);
fsa_free(&nfa); fsa_free(&nfa);

View File

@@ -25,9 +25,6 @@ static void add_fsa(fsa_t *f, const fsa_t *o, int *init_out, int *final_out)
} }
memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t)); memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t));
// Mark o's final state as non-final.
f->states[f->count].final = false;
// Retarget the rules of the copied states to refer to the new // Retarget the rules of the copied states to refer to the new
// state indices. // state indices.
for (int i = f->count; i < count; ++i) { for (int i = f->count; i < count; ++i) {
@@ -115,119 +112,62 @@ static void prepend_fsa(fsa_t *f, const fsa_t *o)
f->count = count; f->count = count;
} }
static void construct_base(fsa_t *out) static void construct_base(fsa_t *out, int symbol)
{ {
fsa_init(out); fsa_init(out);
const int id = fsa_add_state(out);
fsa_add_rule(out, id, out->initial, symbol);
out->initial = id;
out->states[0].final = true; out->states[0].final = true;
out->initial = fsa_add_state(out);
}
static void construct_symbol(fsa_t *out, int symbol)
{
construct_base(out);
fsa_add_rule(out, out->initial, 0, symbol);
}
static bool in_class(const parse_class_t *class, char c)
{
for (int i = 0; i < class->count; ++i) {
if (class->contents[i] == c)
return true;
}
return false;
}
static void construct_class(fsa_t *out, const parse_class_t *class)
{
construct_base(out);
if (class->negated) {
for (int i = 0; i < CHAR_COUNT; ++i) {
if (!in_class(class, i))
fsa_add_rule(out, out->initial, 0, i);
}
} else {
for (int i = 0; i < class->count; ++i)
fsa_add_rule(out, out->initial, 0, class->contents[i]);
}
}
static void construct_wildcard(fsa_t *out)
{
construct_base(out);
for (int i = 0; i < CHAR_COUNT; ++i)
fsa_add_rule(out, out->initial, 0, i);
}
static void base_quantify(fsa_t *out, int *init_out, int *final_out)
{
fsa_t f;
memcpy(&f, out, sizeof(fsa_t));
construct_base(out);
add_fsa(out, &f, init_out, final_out);
fsa_add_rule(out, out->initial, *init_out, EPSILON);
fsa_add_rule(out, *final_out, 0, EPSILON);
} }
static void construct_star(fsa_t *out) static void construct_star(fsa_t *out)
{ {
int sub_init, sub_final; fsa_t f;
base_quantify(out, &sub_init, &sub_final); memcpy(&f, out, sizeof(fsa_t));
fsa_add_rule(out, sub_final, sub_init, EPSILON);
fsa_add_rule(out, out->initial, 0, EPSILON); construct_base(out, EPSILON);
int f_initial, f_final;
add_fsa(out, &f, &f_initial, &f_final);
fsa_add_rule(out, out->initial, f_initial, EPSILON);
fsa_add_rule(out, f_final, f_initial, EPSILON);
fsa_add_rule(out, f_final, 0, EPSILON);
} }
static void construct_plus(fsa_t *out) static void construct_term(const regex_term_t *term, fsa_t *out)
{
int sub_init, sub_final;
base_quantify(out, &sub_init, &sub_final);
fsa_add_rule(out, sub_final, sub_init, EPSILON);
}
static void construct_qmark(fsa_t *out)
{
int sub_init, sub_final;
base_quantify(out, &sub_init, &sub_final);
fsa_add_rule(out, out->initial, 0, EPSILON);
}
static void construct_term(const parse_term_t *term, fsa_t *out)
{ {
switch (term->type) { switch (term->type) {
case PARSE_TERM_EMPTY: case REGEX_TERM_EMPTY:
construct_symbol(out, EPSILON); construct_base(out, EPSILON);
break; break;
case PARSE_TERM_LITERAL: case REGEX_TERM_LITERAL:
construct_symbol(out, term->literal); construct_base(out, term->literal);
break; break;
case PARSE_TERM_SUBEXPR: case REGEX_TERM_SUBEXPR:
construct_nfa(&term->subexpr, out); construct_nfa(&term->subexpr, out);
break; break;
case PARSE_TERM_CLASS: case REGEX_TERM_WILDCARD:
construct_class(out, &term->class); case REGEX_TERM_CLASS:
break; assert(false);
case PARSE_TERM_WILDCARD:
construct_wildcard(out);
break; break;
} }
switch (term->quantifier) { switch (term->quantifier) {
case PARSE_QUANTIFIER_NONE: case REGEX_QUANTIFIER_NONE:
break; break;
case PARSE_QUANTIFIER_STAR: case REGEX_QUANTIFIER_STAR:
construct_star(out); construct_star(out);
break; break;
case PARSE_QUANTIFIER_PLUS: case REGEX_QUANTIFIER_PLUS:
construct_plus(out); case REGEX_QUANTIFIER_QMARK:
break; assert(false);
case PARSE_QUANTIFIER_QMARK:
construct_qmark(out);
break; break;
} }
assert(out->states[0].final); assert(out->states[0].final);
} }
static void construct_sequence(const parse_sequence_t *seq, fsa_t *out) static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
{ {
assert(seq->count > 0); assert(seq->count > 0);
@@ -261,7 +201,7 @@ static void construct_union(fsa_t *f, const fsa_t *o)
fsa_add_rule(f, final, 0, EPSILON); fsa_add_rule(f, final, 0, EPSILON);
} }
void construct_nfa(const parse_tree_t *regex, fsa_t *out) void construct_nfa(const regex_t *regex, fsa_t *out)
{ {
assert(regex->count > 0); assert(regex->count > 0);

View File

@@ -160,17 +160,13 @@ static void insert(table_t *table, int *nfa_states, int count, int dfa_state)
table->entries = calloc(table->capacity, sizeof(table_entry_t)); table->entries = calloc(table->capacity, sizeof(table_entry_t));
assert(NULL != table->entries); assert(NULL != table->entries);
for (int i = 0; i < old_capacity; ++i) { for (int i = 0; i < old_capacity; ++i) {
if (0 != entries[i].nfa_state_count) { if (0 != entries[i].nfa_state_count)
insert( continue;
table, entries[i].nfa_states, entries[i].nfa_state_count, insert(
entries[i].dfa_state); table, entries[i].nfa_states, entries[i].nfa_state_count,
} entries[i].dfa_state);
} }
free(entries); free(entries);
// Recurse to insert the entry now that the table has been
// expanded.
insert(table, nfa_states, count, dfa_state);
} }
static bool lookup_or_create( static bool lookup_or_create(

150
lib/desugar.c Normal file
View File

@@ -0,0 +1,150 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
static void desugar_class(regex_term_t *term)
{
assert(!term->class.negated);
const int count = term->class.count;
regex_sequence_t *alternatives
= malloc(count * sizeof(regex_sequence_t));
assert(NULL != alternatives);
for (int i = 0; i < count; ++i) {
regex_term_t *terms = malloc(sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = term->class.contents[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_class_free(&term->class);
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = count;
term->subexpr.contents = alternatives;
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src);
static void deep_copy_sequence(regex_sequence_t *dst, regex_sequence_t *src)
{
dst->count = dst->capacity = src->count;
dst->contents = malloc(dst->capacity * sizeof(regex_term_t));
assert(NULL != dst->contents);
for (int i = 0; i < dst->count; ++i)
deep_copy_term(&dst->contents[i], &src->contents[i]);
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src)
{
assert(REGEX_TERM_WILDCARD != src->type);
assert(REGEX_TERM_CLASS != src->type);
memcpy(dst, src, sizeof(regex_term_t));
if (REGEX_TERM_SUBEXPR == src->type) {
dst->subexpr.capacity = src->subexpr.count;
dst->subexpr.contents
= malloc(dst->subexpr.capacity * sizeof(regex_sequence_t));
assert(NULL != dst->subexpr.contents);
for (int i = 0; i < dst->subexpr.count; ++i) {
deep_copy_sequence(
&dst->subexpr.contents[i], &src->subexpr.contents[i]);
}
}
}
static void desugar_plus(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = malloc(2 * sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
memcpy(&alternatives[0].contents[0], term, sizeof(regex_term_t));
deep_copy_term(&alternatives[0].contents[1], term);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[1].quantifier = REGEX_QUANTIFIER_STAR;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 1;
term->subexpr.contents = alternatives;
}
static void desugar_qmark(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[0].type = REGEX_TERM_EMPTY;
alternatives[1].count = alternatives[0].capacity = 1;
alternatives[1].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[1].contents);
memcpy(&alternatives[1].contents[0], term, sizeof(regex_term_t));
alternatives[1].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 2;
term->subexpr.contents = alternatives;
}
static void desugar_term(regex_term_t *term)
{
switch (term->type) {
case REGEX_TERM_WILDCARD:
assert(false);
break;
case REGEX_TERM_CLASS:
desugar_class(term);
break;
case REGEX_TERM_SUBEXPR:
desugar_regex(&term->subexpr);
break;
case REGEX_TERM_LITERAL:
case REGEX_TERM_EMPTY:
break;
}
switch (term->quantifier) {
case REGEX_QUANTIFIER_PLUS:
desugar_plus(term);
break;
case REGEX_QUANTIFIER_QMARK:
desugar_qmark(term);
break;
case REGEX_QUANTIFIER_NONE:
case REGEX_QUANTIFIER_STAR:
break;
}
}
void desugar_regex(regex_t *regex)
{
for (int i = 0; i < regex->count; ++i) {
for (int j = 0; j < regex->contents[i].count; ++j) {
desugar_term(&regex->contents[i].contents[j]);
}
}
}

View File

@@ -33,8 +33,7 @@ int fsa_add_state(fsa_t *fsa)
{ {
if (fsa->count >= fsa->capacity) { if (fsa->count >= fsa->capacity) {
fsa->capacity *= 2; fsa->capacity *= 2;
fsa->states fsa->states = realloc(fsa->states, fsa->capacity);
= realloc(fsa->states, fsa->capacity * sizeof(fsa_state_t));
assert(NULL != fsa->states); assert(NULL != fsa->states);
} }
@@ -57,8 +56,7 @@ void fsa_add_rule(fsa_t *fsa, int from, int to, int input)
fsa_state_t *state = &fsa->states[from]; fsa_state_t *state = &fsa->states[from];
if (state->count >= state->capacity) { if (state->count >= state->capacity) {
state->capacity *= 2; state->capacity *= 2;
state->rules state->rules = realloc(state->rules, state->capacity);
= realloc(state->rules, state->capacity * sizeof(fsa_rule_t));
assert(NULL != state->rules); assert(NULL != state->rules);
} }

View File

@@ -7,8 +7,8 @@
#define CONSTRUCT_H #define CONSTRUCT_H
#include "fsa.h" #include "fsa.h"
#include "parse.h" #include "regex.h"
void construct_nfa(const parse_tree_t *regex, fsa_t *out); void construct_nfa(const regex_t *regex, fsa_t *out);
#endif #endif

13
lib/include/desugar.h Normal file
View File

@@ -0,0 +1,13 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef DESUGAR_H
#define DESUGAR_H
#include "regex.h"
void desugar_regex(regex_t *regex);
#endif

View File

@@ -6,53 +6,10 @@
#ifndef PARSE_H #ifndef PARSE_H
#define PARSE_H #define PARSE_H
#include <stdbool.h> #include "regex.h"
#define PARSE_FAIL (-1) #define PARSE_FAIL (-1)
typedef struct { int parse_expr(const char *input, int rem, regex_t *out);
bool negated;
int count, capacity;
char *contents;
} parse_class_t;
typedef enum {
PARSE_QUANTIFIER_NONE,
PARSE_QUANTIFIER_STAR,
PARSE_QUANTIFIER_PLUS,
PARSE_QUANTIFIER_QMARK,
} parse_quantifier_t;
typedef enum {
PARSE_TERM_WILDCARD,
PARSE_TERM_CLASS,
PARSE_TERM_LITERAL,
PARSE_TERM_SUBEXPR,
PARSE_TERM_EMPTY,
} parse_term_type_t;
struct _parse_term;
typedef struct {
int count, capacity;
struct _parse_term *contents;
} parse_sequence_t;
typedef struct {
int count, capacity;
parse_sequence_t *contents;
} parse_tree_t;
typedef struct _parse_term {
parse_quantifier_t quantifier;
parse_term_type_t type;
union {
parse_class_t class;
char literal;
parse_tree_t subexpr;
};
} parse_term_t;
int parse_expr(const char *input, int rem, parse_tree_t *out);
void parse_tree_free(const parse_tree_t *t);
#endif #endif

56
lib/include/regex.h Normal file
View File

@@ -0,0 +1,56 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef REGEX_H
#define REGEX_H
#include <stdbool.h>
typedef struct {
bool negated;
int count, capacity;
char *contents;
} regex_class_t;
typedef enum {
REGEX_QUANTIFIER_NONE,
REGEX_QUANTIFIER_STAR,
REGEX_QUANTIFIER_PLUS,
REGEX_QUANTIFIER_QMARK,
} regex_quantifier_t;
typedef enum {
REGEX_TERM_WILDCARD,
REGEX_TERM_CLASS,
REGEX_TERM_LITERAL,
REGEX_TERM_SUBEXPR,
REGEX_TERM_EMPTY,
} regex_term_type_t;
struct _regex_term;
typedef struct {
int count, capacity;
struct _regex_term *contents;
} regex_sequence_t;
typedef struct {
int count, capacity;
regex_sequence_t *contents;
} regex_t;
typedef struct _regex_term {
regex_quantifier_t quantifier;
regex_term_type_t type;
union {
regex_class_t class;
char literal;
regex_t subexpr;
};
} regex_term_t;
void regex_free(const regex_t *t);
void regex_class_free(const regex_class_t *c);
#endif

View File

@@ -45,7 +45,7 @@ static int parse_literal(const char *input, int rem, char *out)
} }
} }
static int parse_class(const char *input, int rem, parse_class_t *out) static int parse_class(const char *input, int rem, regex_class_t *out)
{ {
int result, used = 0; int result, used = 0;
@@ -87,7 +87,7 @@ static int parse_class(const char *input, int rem, parse_class_t *out)
return out->count > 0 ? used : -1; return out->count > 0 ? used : -1;
} }
static int parse_term(const char *input, int rem, parse_term_t *out) static int parse_term(const char *input, int rem, regex_term_t *out)
{ {
int result, used = 0; int result, used = 0;
@@ -95,7 +95,7 @@ static int parse_term(const char *input, int rem, parse_term_t *out)
return PARSE_FAIL; return PARSE_FAIL;
if ('.' == input[0]) { if ('.' == input[0]) {
out->type = PARSE_TERM_WILDCARD; out->type = REGEX_TERM_WILDCARD;
++used; ++used;
} else if ('(' == input[0]) { } else if ('(' == input[0]) {
++used; ++used;
@@ -103,7 +103,7 @@ static int parse_term(const char *input, int rem, parse_term_t *out)
result = parse_expr(input + used, rem - used, &out->subexpr); result = parse_expr(input + used, rem - used, &out->subexpr);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = PARSE_TERM_SUBEXPR; out->type = REGEX_TERM_SUBEXPR;
used += result; used += result;
if (')' != input[used]) if (')' != input[used])
@@ -113,54 +113,54 @@ static int parse_term(const char *input, int rem, parse_term_t *out)
result = parse_class(input + used, rem - used, &out->class); result = parse_class(input + used, rem - used, &out->class);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = PARSE_TERM_CLASS; out->type = REGEX_TERM_CLASS;
used += result; used += result;
} else { } else {
result = parse_literal(input + used, rem - used, &out->literal); result = parse_literal(input + used, rem - used, &out->literal);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = PARSE_TERM_LITERAL; out->type = REGEX_TERM_LITERAL;
used += result; used += result;
} }
if (used < rem) { if (used < rem) {
switch (input[used]) { switch (input[used]) {
case '*': case '*':
out->quantifier = PARSE_QUANTIFIER_STAR; out->quantifier = REGEX_QUANTIFIER_STAR;
++used; ++used;
break; break;
case '+': case '+':
out->quantifier = PARSE_QUANTIFIER_PLUS; out->quantifier = REGEX_QUANTIFIER_PLUS;
++used; ++used;
break; break;
case '?': case '?':
out->quantifier = PARSE_QUANTIFIER_QMARK; out->quantifier = REGEX_QUANTIFIER_QMARK;
++used; ++used;
break; break;
default: default:
out->quantifier = PARSE_QUANTIFIER_NONE; out->quantifier = REGEX_QUANTIFIER_NONE;
} }
} else { } else {
out->quantifier = PARSE_QUANTIFIER_NONE; out->quantifier = REGEX_QUANTIFIER_NONE;
} }
return used; return used;
} }
static int parse_sequence(const char *input, int rem, parse_sequence_t *out) static int parse_sequence(const char *input, int rem, regex_sequence_t *out)
{ {
int result, used = 0; int result, used = 0;
out->count = 0; out->count = 0;
out->capacity = SEQUENCE_START_CAPACITY; out->capacity = SEQUENCE_START_CAPACITY;
out->contents = malloc(out->capacity * sizeof(parse_term_t)); out->contents = malloc(out->capacity * sizeof(regex_term_t));
assert(NULL != out->contents); assert(NULL != out->contents);
while (used < rem) { while (used < rem) {
if (out->count >= out->capacity) { if (out->count >= out->capacity) {
out->capacity *= 2; out->capacity *= 2;
out->contents = realloc( out->contents = realloc(
out->contents, out->capacity * sizeof(parse_term_t)); out->contents, out->capacity * sizeof(regex_term_t));
assert(NULL != out->contents); assert(NULL != out->contents);
} }
@@ -175,13 +175,13 @@ static int parse_sequence(const char *input, int rem, parse_sequence_t *out)
return out->count > 0 ? used : -1; return out->count > 0 ? used : -1;
} }
int parse_expr(const char *input, int rem, parse_tree_t *out) int parse_expr(const char *input, int rem, regex_t *out)
{ {
int result, used = 0; int result, used = 0;
out->count = 0; out->count = 0;
out->capacity = TREE_START_CAPACITY; out->capacity = TREE_START_CAPACITY;
out->contents = malloc(out->capacity * sizeof(parse_sequence_t)); out->contents = malloc(out->capacity * sizeof(regex_sequence_t));
assert(NULL != out->contents); assert(NULL != out->contents);
result = parse_sequence(input + used, rem - used, &out->contents[0]); result = parse_sequence(input + used, rem - used, &out->contents[0]);
@@ -198,7 +198,7 @@ int parse_expr(const char *input, int rem, parse_tree_t *out)
if (out->count >= out->capacity) { if (out->count >= out->capacity) {
out->capacity *= 2; out->capacity *= 2;
out->contents = realloc( out->contents = realloc(
out->contents, out->capacity * sizeof(parse_sequence_t)); out->contents, out->capacity * sizeof(regex_sequence_t));
assert(NULL != out->contents); assert(NULL != out->contents);
} }
@@ -212,37 +212,3 @@ int parse_expr(const char *input, int rem, parse_tree_t *out)
return used; return used;
} }
static void class_free(const parse_class_t *c)
{
if (NULL != c->contents)
free(c->contents);
}
static void sequence_free(const parse_sequence_t *s)
{
if (NULL != s->contents) {
for (int i = 0; i < s->count; ++i) {
switch (s->contents[i].type) {
case PARSE_TERM_CLASS:
class_free(&s->contents[i].class);
break;
case PARSE_TERM_SUBEXPR:
parse_tree_free(&s->contents[i].subexpr);
break;
default:
break;
}
}
free(s->contents);
}
}
void parse_tree_free(const parse_tree_t *t)
{
if (NULL != t->contents) {
for (int i = 0; i < t->count; ++i)
sequence_free(&t->contents[i]);
free(t->contents);
}
}

42
lib/regex.c Normal file
View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "regex.h"
#include <stdlib.h>
static void sequence_free(const regex_sequence_t *s)
{
if (NULL != s->contents) {
for (int i = 0; i < s->count; ++i) {
switch (s->contents[i].type) {
case REGEX_TERM_CLASS:
regex_class_free(&s->contents[i].class);
break;
case REGEX_TERM_SUBEXPR:
regex_free(&s->contents[i].subexpr);
break;
default:
break;
}
}
free(s->contents);
}
}
void regex_free(const regex_t *t)
{
if (NULL != t->contents) {
for (int i = 0; i < t->count; ++i)
sequence_free(&t->contents[i]);
free(t->contents);
}
}
void regex_class_free(const regex_class_t *c)
{
if (NULL != c->contents)
free(c->contents);
}

View File

@@ -1,4 +1,4 @@
#!/bin/sh #!/bin/sh
cd "$(git rev-parse --show-toplevel)" cd "$(git rev-parse --show-toplevel)"
find . -not \( -path './.git' -prune \) -not \( -path './build' -prune \) \ find . -not \( -path './.git' -prune \) -not \( -path './build' -prune \) \
| entr -cs 'scripts/build.sh && scripts/test.sh' | entr -s 'clear && scripts/build.sh && scripts/test.sh'

View File

@@ -19,6 +19,7 @@ endfunction()
add_test_suites( add_test_suites(
construct_tests.c construct_tests.c
convert_tests.c convert_tests.c
desugar_tests.c
fsa_tests.c fsa_tests.c
integration_tests.c integration_tests.c
min_heap_tests.c min_heap_tests.c

View File

@@ -34,13 +34,13 @@ static bool accepts(const fsa_t *nfa, const char *input)
static void test_empty_expression(void) static void test_empty_expression(void)
{ {
parse_term_t *terms = malloc(1 * sizeof(parse_term_t)); regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE; terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_EMPTY; terms[0].type = REGEX_TERM_EMPTY;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t)); regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
const parse_tree_t regex const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives }; = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
@@ -49,45 +49,20 @@ static void test_empty_expression(void)
ASSERT_TRUE(accepts(&fsa, "")); ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "a"));
parse_tree_free(&regex); regex_free(&regex);
fsa_free(&fsa);
}
static void test_wildcard(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_WILDCARD;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_TRUE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_literal_expression(void) static void test_literal_expression(void)
{ {
parse_term_t *terms = malloc(1 * sizeof(parse_term_t)); regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE; terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_LITERAL; terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t)); regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
const parse_tree_t regex const regex_t regex
= { .count = 1, .capacity = 1, .contents = alternatives }; = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
@@ -96,27 +71,26 @@ static void test_literal_expression(void)
ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_sequence(void) static void test_sequence(void)
{ {
parse_term_t *terms = malloc(3 * sizeof(parse_term_t)); regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE; terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_LITERAL; terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
terms[1].quantifier = PARSE_QUANTIFIER_NONE; terms[1].quantifier = REGEX_QUANTIFIER_NONE;
terms[1].type = PARSE_TERM_LITERAL; terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
terms[2].quantifier = PARSE_QUANTIFIER_NONE; terms[2].quantifier = REGEX_QUANTIFIER_NONE;
terms[2].type = PARSE_TERM_LITERAL; terms[2].type = REGEX_TERM_LITERAL;
terms[2].literal = 'c'; terms[2].literal = 'c';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t)); regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms; alternatives[0].contents = terms;
parse_tree_t regex regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -127,25 +101,24 @@ static void test_sequence(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "abcd")); ASSERT_FALSE(accepts(&fsa, "abcd"));
parse_tree_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_union(void) static void test_union(void)
{ {
const char *literals = "abc"; const char *literals = "abc";
parse_sequence_t *alternatives = malloc(3 * sizeof(parse_sequence_t)); regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
parse_term_t *terms = malloc(1 * sizeof(parse_term_t)); regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE; terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_LITERAL; terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = literals[i]; terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1; alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms; alternatives[i].contents = terms;
} }
parse_tree_t regex regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives };
= { .count = 3, .capacity = 3, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -156,21 +129,20 @@ static void test_union(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "aa")); ASSERT_FALSE(accepts(&fsa, "aa"));
parse_tree_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_star(void) static void test_star(void)
{ {
parse_term_t *terms = malloc(1 * sizeof(parse_term_t)); regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_STAR; terms[0].quantifier = REGEX_QUANTIFIER_STAR;
terms[0].type = PARSE_TERM_LITERAL; terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t)); regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
parse_tree_t regex regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -180,78 +152,29 @@ static void test_star(void)
ASSERT_TRUE(accepts(&fsa, "aaaaaa")); ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex); regex_free(&regex);
fsa_free(&fsa);
}
static void test_plus(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_PLUS;
terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_qmark(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_QMARK;
terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "aa"));
ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_subexpression(void) static void test_subexpression(void)
{ {
parse_term_t *inner_terms = malloc(1 * sizeof(parse_term_t)); regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = PARSE_QUANTIFIER_NONE; inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms[0].type = PARSE_TERM_LITERAL; inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a'; inner_terms[0].literal = 'a';
parse_sequence_t *inner_alternatives regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(parse_sequence_t)); = malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms; inner_alternatives[0].contents = inner_terms;
parse_term_t *terms = malloc(1 * sizeof(parse_term_t)); regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE; terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_SUBEXPR; terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1; terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives; terms[0].subexpr.contents = inner_alternatives;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t)); regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
parse_tree_t regex regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -259,108 +182,42 @@ static void test_subexpression(void)
ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex); regex_free(&regex);
fsa_free(&fsa);
}
static void test_class(void)
{
char *class_contents = malloc(3);
class_contents[0] = 'a';
class_contents[1] = 'b';
class_contents[2] = 'c';
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_CLASS;
terms[0].class.negated = false;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = class_contents;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
ASSERT_FALSE(accepts(&fsa, "d"));
parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_negated_class(void)
{
char *class_contents = malloc(3);
class_contents[0] = 'a';
class_contents[1] = 'b';
class_contents[2] = 'c';
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_CLASS;
terms[0].class.negated = true;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = class_contents;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "d"));
ASSERT_TRUE(accepts(&fsa, "e"));
ASSERT_FALSE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b"));
ASSERT_FALSE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_sequence_containing_starred_union(void) static void test_sequence_containing_starred_union(void)
{ {
// ab(c|d)* // ab(c|d)*
parse_term_t *inner_terms0 = malloc(1 * sizeof(parse_term_t)); regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t));
inner_terms0[0].quantifier = PARSE_QUANTIFIER_NONE; inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms0[0].type = PARSE_TERM_LITERAL; inner_terms0[0].type = REGEX_TERM_LITERAL;
inner_terms0[0].literal = 'c'; inner_terms0[0].literal = 'c';
parse_term_t *inner_terms1 = malloc(1 * sizeof(parse_term_t)); regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t));
inner_terms1[0].quantifier = PARSE_QUANTIFIER_NONE; inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms1[0].type = PARSE_TERM_LITERAL; inner_terms1[0].type = REGEX_TERM_LITERAL;
inner_terms1[0].literal = 'd'; inner_terms1[0].literal = 'd';
parse_sequence_t *inner_alternatives regex_sequence_t *inner_alternatives
= malloc(2 * sizeof(parse_sequence_t)); = malloc(2 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0; inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1; inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1; inner_alternatives[1].contents = inner_terms1;
parse_term_t *terms = malloc(3 * sizeof(parse_term_t)); regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE; terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_LITERAL; terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
terms[1].quantifier = PARSE_QUANTIFIER_NONE; terms[1].quantifier = REGEX_QUANTIFIER_NONE;
terms[1].type = PARSE_TERM_LITERAL; terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
terms[2].quantifier = PARSE_QUANTIFIER_STAR; terms[2].quantifier = REGEX_QUANTIFIER_STAR;
terms[2].type = PARSE_TERM_SUBEXPR; terms[2].type = REGEX_TERM_SUBEXPR;
terms[2].subexpr.count = terms[2].subexpr.capacity = 2; terms[2].subexpr.count = terms[2].subexpr.capacity = 2;
terms[2].subexpr.contents = inner_alternatives; terms[2].subexpr.contents = inner_alternatives;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t)); regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms; alternatives[0].contents = terms;
parse_tree_t regex regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives };
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -376,7 +233,7 @@ static void test_sequence_containing_starred_union(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "foo"));
parse_tree_free(&regex); regex_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
@@ -384,24 +241,23 @@ static void
test_union_of_single_term_and_sequence_containing_starred_term(void) test_union_of_single_term_and_sequence_containing_starred_term(void)
{ {
// a|b*c // a|b*c
parse_term_t *terms0 = malloc(1 * sizeof(parse_term_t)); regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t));
terms0[0].quantifier = PARSE_QUANTIFIER_NONE; terms0[0].quantifier = REGEX_QUANTIFIER_NONE;
terms0[0].type = PARSE_TERM_LITERAL; terms0[0].type = REGEX_TERM_LITERAL;
terms0[0].literal = 'a'; terms0[0].literal = 'a';
parse_term_t *terms1 = malloc(2 * sizeof(parse_term_t)); regex_term_t *terms1 = malloc(2 * sizeof(regex_term_t));
terms1[0].quantifier = PARSE_QUANTIFIER_STAR; terms1[0].quantifier = REGEX_QUANTIFIER_STAR;
terms1[0].type = PARSE_TERM_LITERAL; terms1[0].type = REGEX_TERM_LITERAL;
terms1[0].literal = 'b'; terms1[0].literal = 'b';
terms1[1].quantifier = PARSE_QUANTIFIER_NONE; terms1[1].quantifier = REGEX_QUANTIFIER_NONE;
terms1[1].type = PARSE_TERM_LITERAL; terms1[1].type = REGEX_TERM_LITERAL;
terms1[1].literal = 'c'; terms1[1].literal = 'c';
parse_sequence_t *alternatives = malloc(2 * sizeof(parse_sequence_t)); regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms0; alternatives[0].contents = terms0;
alternatives[1].count = alternatives[1].capacity = 2; alternatives[1].count = alternatives[1].capacity = 2;
alternatives[1].contents = terms1; alternatives[1].contents = terms1;
parse_tree_t regex regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives };
= { .count = 2, .capacity = 2, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -413,49 +269,7 @@ test_union_of_single_term_and_sequence_containing_starred_term(void)
ASSERT_FALSE(accepts(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "foo"));
ASSERT_FALSE(accepts(&fsa, "ba")); ASSERT_FALSE(accepts(&fsa, "ba"));
parse_tree_free(&regex); regex_free(&regex);
fsa_free(&fsa);
}
static void test_sequence_of_subexpr_a_or_empty_and_b(void)
{
// (a|ε)b
parse_term_t *inner_terms0 = malloc(1 * sizeof(parse_term_t));
inner_terms0[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms0[0].type = PARSE_TERM_LITERAL;
inner_terms0[0].literal = 'a';
parse_term_t *inner_terms1 = malloc(1 * sizeof(parse_term_t));
inner_terms1[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms1[0].type = PARSE_TERM_EMPTY;
parse_sequence_t *inner_alternatives
= malloc(2 * sizeof(parse_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1;
parse_term_t *terms = malloc(2 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 2;
terms[0].subexpr.contents = inner_alternatives;
terms[1].quantifier = PARSE_QUANTIFIER_NONE;
terms[1].type = PARSE_TERM_LITERAL;
terms[1].literal = 'b';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = terms;
parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "ab"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
@@ -466,20 +280,14 @@ int main(void)
// Base cases // Base cases
test_empty_expression(); test_empty_expression();
test_literal_expression(); test_literal_expression();
test_wildcard();
test_sequence(); test_sequence();
test_union(); test_union();
test_star(); test_star();
test_plus();
test_qmark();
test_subexpression(); test_subexpression();
test_class();
test_negated_class();
// Compound expressions // Compound expressions
test_sequence_containing_starred_union(); test_sequence_containing_starred_union();
test_union_of_single_term_and_sequence_containing_starred_term(); test_union_of_single_term_and_sequence_containing_starred_term();
test_sequence_of_subexpr_a_or_empty_and_b();
return TESTING_END(); return TESTING_END();
} }

357
tests/desugar_tests.c Normal file
View File

@@ -0,0 +1,357 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include "testing.h"
#include <stddef.h>
static void a_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t);
}
static void abc_is_unchanged(void)
{
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b';
terms[2].type = REGEX_TERM_LITERAL;
terms[2].literal = 'c';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(3, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[2].type);
ASSERT_EQ('c', t.contents[0].contents[2].literal);
regex_free(&t);
}
static void a_star_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_STAR;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t);
}
static void a_or_b_or_c_is_unchanged(void)
{
const char *literals = "abc";
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
for (int i = 0; i < 3; ++i) {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_t t = { .count = 3, .capacity = 3, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(3, t.count);
ASSERT_NOT_NULL(t.contents);
for (int i = 0; i < 3; ++i) {
ASSERT_EQ(1, t.contents[i].count);
ASSERT_NOT_NULL(t.contents[i].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, t.contents[i].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[i].contents[0].type);
ASSERT_EQ(literals[i], t.contents[i].contents[0].literal);
}
regex_free(&t);
}
static void subexpr_a_is_unchanged(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
regex_free(&t);
}
static void a_plus_becomes_subexpr_aa_star(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_PLUS;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(2, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(
REGEX_QUANTIFIER_STAR, inner->contents[0].contents[1].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[1].type);
ASSERT_EQ('a', inner->contents[0].contents[1].literal);
regex_free(&t);
}
static void a_qmark_becomes_subexpr_empty_or_a(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
static void class_abc_becomes_subexpr_a_or_b_or_c(void)
{
char *options = malloc(3 * sizeof(char));
options[0] = 'a';
options[1] = 'b';
options[2] = 'c';
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_CLASS;
terms[0].class.negated = false;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = options;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(3, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('b', inner->contents[1].contents[0].literal);
ASSERT_EQ(1, inner->contents[2].count);
ASSERT_NOT_NULL(inner->contents[2].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[2].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[2].contents[0].type);
ASSERT_EQ('c', inner->contents[2].contents[0].literal);
regex_free(&t);
}
static void subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner;
inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, inner->contents[0].contents[0].type);
inner = &inner->contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
int main(void)
{
TESTING_BEGIN();
a_is_unchanged();
abc_is_unchanged();
a_star_is_unchanged();
a_or_b_or_c_is_unchanged();
subexpr_a_is_unchanged();
a_plus_becomes_subexpr_aa_star();
a_qmark_becomes_subexpr_empty_or_a();
class_abc_becomes_subexpr_a_or_b_or_c();
subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a();
return TESTING_END();
}

View File

@@ -47,54 +47,11 @@ static void test_arbitrary_regex_1(void)
fsa_free(&dfa); fsa_free(&dfa);
} }
static void test_arbitrary_regex_2(void)
{
fsa_t dfa;
const char *regex = "(l|wh)?[aeiou]+";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "laaaa");
ASSERT_ACCEPTS(&dfa, "eeeee");
ASSERT_ACCEPTS(&dfa, "iii");
ASSERT_ACCEPTS(&dfa, "whooo");
ASSERT_ACCEPTS(&dfa, "u");
ASSERT_REJECTS(&dfa, "wh");
ASSERT_REJECTS(&dfa, "lxxx");
fsa_free(&dfa);
}
static void test_system_header_include_regex(void)
{
fsa_t dfa;
const char *regex = "#include <[abcdefghijklmnopqrstuvwxyz]+\\.h>";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "#include <stdio.h>");
ASSERT_REJECTS(&dfa, "#include \"foo.h\"");
fsa_free(&dfa);
}
static void test_quoted_string_regex(void)
{
fsa_t dfa;
const char *regex = "'(\\\\'|[^'])*'";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "''");
ASSERT_ACCEPTS(&dfa, "'foo bar baz'");
ASSERT_ACCEPTS(&dfa, "'foo \\'bar\\' baz'");
ASSERT_REJECTS(&dfa, "'foo 'bar' baz'");
fsa_free(&dfa);
}
int main(void) int main(void)
{ {
TESTING_BEGIN(); TESTING_BEGIN();
test_foo_or_bar_regex(); test_foo_or_bar_regex();
test_even_number_of_Is_regex(); test_even_number_of_Is_regex();
test_arbitrary_regex_1(); test_arbitrary_regex_1();
test_arbitrary_regex_2();
test_system_header_include_regex();
test_quoted_string_regex();
return TESTING_END(); return TESTING_END();
} }

View File

@@ -10,268 +10,268 @@
static void a_has_1_alternative(void) static void a_has_1_alternative(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("a", &t); const int result = PARSE_EXPR_STRING("a", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
parse_tree_free(&t); regex_free(&t);
} }
static void a_pipe_b_has_2_alternatives(void) static void a_pipe_b_has_2_alternatives(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("a|b", &t); const int result = PARSE_EXPR_STRING("a|b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(2, t.count); ASSERT_EQ(2, t.count);
parse_tree_free(&t); regex_free(&t);
} }
static void a_pipe_b_pipe_c_has_3_alternatives(void) static void a_pipe_b_pipe_c_has_3_alternatives(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("a|b|c", &t); const int result = PARSE_EXPR_STRING("a|b|c", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(3, t.count); ASSERT_EQ(3, t.count);
parse_tree_free(&t); regex_free(&t);
} }
static void a_is_parsed_as_unquantified_literal(void) static void a_is_parsed_as_unquantified_literal(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("a", &t); const int result = PARSE_EXPR_STRING("a", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal); ASSERT_EQ('a', t.contents[0].contents[0].literal);
parse_tree_free(&t); regex_free(&t);
} }
static void b_is_parsed_as_unquantified_literal(void) static void b_is_parsed_as_unquantified_literal(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("b", &t); const int result = PARSE_EXPR_STRING("b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('b', t.contents[0].contents[0].literal); ASSERT_EQ('b', t.contents[0].contents[0].literal);
parse_tree_free(&t); regex_free(&t);
} }
static void abc_is_parsed_as_sequence_of_unquantified_literals(void) static void abc_is_parsed_as_sequence_of_unquantified_literals(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("abc", &t); const int result = PARSE_EXPR_STRING("abc", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(3, t.contents[0].count); ASSERT_EQ(3, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal); ASSERT_EQ('a', t.contents[0].contents[0].literal);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[1].type); ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal); ASSERT_EQ('b', t.contents[0].contents[1].literal);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[2].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[2].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[2].type); ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[2].type);
ASSERT_EQ('c', t.contents[0].contents[2].literal); ASSERT_EQ('c', t.contents[0].contents[2].literal);
parse_tree_free(&t); regex_free(&t);
} }
static void dot_is_parsed_as_unquantified_wildcard_term(void) static void dot_is_parsed_as_unquantified_wildcard_term(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING(".", &t); const int result = PARSE_EXPR_STRING(".", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type);
parse_tree_free(&t); regex_free(&t);
} }
static void backslash_dot_is_parsed_as_unquantified_literal(void) static void backslash_dot_is_parsed_as_unquantified_literal(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("\\.", &t); const int result = PARSE_EXPR_STRING("\\.", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('.', t.contents[0].contents[0].literal); ASSERT_EQ('.', t.contents[0].contents[0].literal);
parse_tree_free(&t); regex_free(&t);
} }
static void backslash_backslash_is_parsed_as_unquantified_literal(void) static void backslash_backslash_is_parsed_as_unquantified_literal(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("\\\\", &t); const int result = PARSE_EXPR_STRING("\\\\", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('\\', t.contents[0].contents[0].literal); ASSERT_EQ('\\', t.contents[0].contents[0].literal);
parse_tree_free(&t); regex_free(&t);
} }
static void a_pipe_b_in_parens_is_parsed_as_subexpr_term(void) static void a_pipe_b_in_parens_is_parsed_as_subexpr_term(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("(a|b)", &t); const int result = PARSE_EXPR_STRING("(a|b)", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_SUBEXPR, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const parse_tree_t *inner = &t.contents[0].contents[0].subexpr; const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count); ASSERT_EQ(2, inner->count);
ASSERT_EQ(1, inner->contents[0].count); ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ( ASSERT_EQ(
PARSE_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal); ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(1, inner->contents[1].count); ASSERT_EQ(1, inner->contents[1].count);
ASSERT_EQ( ASSERT_EQ(
PARSE_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier); REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[1].contents[0].type); ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('b', inner->contents[1].contents[0].literal); ASSERT_EQ('b', inner->contents[1].contents[0].literal);
parse_tree_free(&t); regex_free(&t);
} }
static void a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(void) static void a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("(a)b", &t); const int result = PARSE_EXPR_STRING("(a)b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(2, t.contents[0].count); ASSERT_EQ(2, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_SUBEXPR, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[1].type); ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal); ASSERT_EQ('b', t.contents[0].contents[1].literal);
const parse_tree_t *inner = &t.contents[0].contents[0].subexpr; const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->contents[0].count); ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ( ASSERT_EQ(
PARSE_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal); ASSERT_EQ('a', inner->contents[0].contents[0].literal);
parse_tree_free(&t); regex_free(&t);
} }
static void dot_star_is_parsed_as_star_quantified_wildcard(void) static void dot_star_is_parsed_as_star_quantified_wildcard(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING(".*", &t); const int result = PARSE_EXPR_STRING(".*", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type);
parse_tree_free(&t); regex_free(&t);
} }
static void dot_plus_is_parsed_as_plus_quantified_wildcard(void) static void dot_plus_is_parsed_as_plus_quantified_wildcard(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING(".+", &t); const int result = PARSE_EXPR_STRING(".+", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_PLUS, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_PLUS, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type);
parse_tree_free(&t); regex_free(&t);
} }
static void dot_question_mark_is_parsed_as_qmrk_quantified_wildcard(void) static void dot_question_mark_is_parsed_as_qmrk_quantified_wildcard(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING(".?", &t); const int result = PARSE_EXPR_STRING(".?", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_QMARK, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_QMARK, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type);
parse_tree_free(&t); regex_free(&t);
} }
static void a_in_brackets_is_parsed_as_class_containing_only_a(void) static void a_in_brackets_is_parsed_as_class_containing_only_a(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("[a]", &t); const int result = PARSE_EXPR_STRING("[a]", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_CLASS, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_CLASS, t.contents[0].contents[0].type);
ASSERT_FALSE(t.contents[0].contents[0].class.negated); ASSERT_FALSE(t.contents[0].contents[0].class.negated);
ASSERT_EQ(1, t.contents[0].contents[0].class.count); ASSERT_EQ(1, t.contents[0].contents[0].class.count);
ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents); ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents);
ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]); ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]);
parse_tree_free(&t); regex_free(&t);
} }
static void caret_a_in_brackets_parses_as_negated_class(void) static void caret_a_in_brackets_parses_as_negated_class(void)
{ {
parse_tree_t t; regex_t t;
const int result = PARSE_EXPR_STRING("[^a]", &t); const int result = PARSE_EXPR_STRING("[^a]", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(PARSE_TERM_CLASS, t.contents[0].contents[0].type); ASSERT_EQ(REGEX_TERM_CLASS, t.contents[0].contents[0].type);
ASSERT_TRUE(t.contents[0].contents[0].class.negated); ASSERT_TRUE(t.contents[0].contents[0].class.negated);
ASSERT_EQ(1, t.contents[0].contents[0].class.count); ASSERT_EQ(1, t.contents[0].contents[0].class.count);
ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents); ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents);
ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]); ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]);
parse_tree_free(&t); regex_free(&t);
} }
int main(void) int main(void)