Compare commits

..

21 Commits

Author SHA1 Message Date
601829bd29 Increase length of benchmark strings 2024-11-10 16:29:44 +00:00
4131af3912 Assign match result to volatile in benchmarks
This is needed to avoid the compiler eliding the call in
highly-optimised builds.
2024-11-10 16:28:38 +00:00
97529fdd2b Write some matching benchmarks 2024-11-10 15:22:28 +00:00
e4d3b08bf2 Create benchmarking library 2024-11-10 15:22:28 +00:00
15a6195bf0 Use entr's -c flag in script 2024-11-10 15:17:00 +00:00
b7737fba39 Tweak README 2024-11-03 13:20:26 +00:00
dad687216b Surround input regex with .*( ).* in demo 2024-11-03 12:31:06 +00:00
656726a8c1 Move regex_t into parse.h and rename to parse_tree_t 2024-11-03 12:23:58 +00:00
1f248ad4cd Remove desugaring step 2024-11-03 12:16:52 +00:00
e283fd2c52 Support + and ? in construct_nfa() 2024-11-03 12:16:38 +00:00
1fea81b74b Remove wildcard assert from desugar 2024-11-03 12:06:27 +00:00
77e1a77e02 Support wildcards in construct 2024-11-03 11:59:56 +00:00
892ff89a66 Add integration test using negated class 2024-11-03 11:55:05 +00:00
3c89cc4e99 Remove class desugaring 2024-11-03 11:55:05 +00:00
38b5b48289 Support classes in construct step 2024-11-03 11:55:05 +00:00
3c4146468e Reorder header includes in compile.c 2024-11-03 11:54:56 +00:00
f95de25842 Turn off extensions in set_default_target_options 2024-11-02 23:47:09 +00:00
d6d5951b95 Fix allocation issue in FSA module 2024-11-02 23:15:27 +00:00
232295fff4 Fix bug in table growing routine 2024-11-02 23:14:59 +00:00
34fee99232 Fix bug in construct_nfa
Intermediate final states were being left in by add_fsa(); we always
want to mark the added FSA's final state as non-final.
2024-11-02 23:12:23 +00:00
074b174d0f Create some integration tests 2024-11-02 17:35:04 +00:00
26 changed files with 812 additions and 852 deletions

View File

@@ -1,18 +0,0 @@
image: alpine/edge
packages:
- clang
- cmake
- compiler-rt
- ninja
sources:
- https://git.sr.ht/~cdo/regex-engine
tasks:
- configure: |
cd regex-engine
cmake -GNinja -Bbuild -DSANITIZERS=on -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang
- build: |
cd regex-engine
scripts/build.sh
- test: |
cd regex-engine
scripts/test.sh

View File

@@ -8,6 +8,7 @@ enable_testing()
function(set_default_target_options target) function(set_default_target_options target)
set_property(TARGET ${target} PROPERTY C_STANDARD 11) set_property(TARGET ${target} PROPERTY C_STANDARD 11)
set_property(TARGET ${target} PROPERTY C_EXTENSIONS OFF)
target_compile_options(${target} PRIVATE -Wall -Wextra -pedantic) target_compile_options(${target} PRIVATE -Wall -Wextra -pedantic)
if(${SANITIZERS}) if(${SANITIZERS})
target_compile_options(${target} PRIVATE -fsanitize=address,undefined) target_compile_options(${target} PRIVATE -fsanitize=address,undefined)
@@ -18,3 +19,4 @@ endfunction()
add_subdirectory(lib) add_subdirectory(lib)
add_subdirectory(tests) add_subdirectory(tests)
add_subdirectory(demo) add_subdirectory(demo)
add_subdirectory(benchmarks)

6
README
View File

@@ -7,8 +7,8 @@ so here we are.
Grammar Grammar
This engine is not going to be strictly supporting any standard The engine does not support any specific standard's syntax, unless by
syntax; the expression syntax I intend to support follows. coincidence. The grammar I've implemented for expressions is:
regex ::= sequence ( '|' sequence )* regex ::= sequence ( '|' sequence )*
sequence ::= term+ sequence ::= term+
@@ -23,7 +23,7 @@ syntax; the expression syntax I intend to support follows.
The build uses CMake. There are two scripts, build.sh and test.sh, The build uses CMake. There are two scripts, build.sh and test.sh,
which will (much to everybody's shock) build the project and run the which will (much to everybody's shock) build the project and run the
tests. I use Clang but the code is ISO C11, it should compile just tests. I use Clang but the code is ISO C11 so it should compile just
fine with GCC. You might need to faff with CMakeLists.txt to get it fine with GCC. You might need to faff with CMakeLists.txt to get it
to work with another compiler due to command-line flag nonsense. to work with another compiler due to command-line flag nonsense.

20
benchmarks/CMakeLists.txt Normal file
View File

@@ -0,0 +1,20 @@
add_library(benchmarking benchmarking.c)
set_default_target_options(benchmarking)
target_include_directories(benchmarking PUBLIC include)
function(add_benchmark_suite source)
string(REGEX REPLACE ".c$" "" name ${source})
add_executable(${name} ${source})
set_default_target_options(${name})
target_link_libraries(${name} PRIVATE lib benchmarking m)
endfunction()
function(add_benchmark_suites)
foreach(source ${ARGN})
add_benchmark_suite(${source})
endforeach()
endfunction()
add_benchmark_suites(
matching_benchmarks.c
)

85
benchmarks/benchmarking.c Normal file
View File

@@ -0,0 +1,85 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "benchmarking.h"
#include <assert.h>
#include <math.h>
#include <stdio.h>
#define SWAP(x, y) \
do { \
const double tmp = x; \
x = y; \
y = tmp; \
} while (0)
clock_t benchmark_start, benchmark_end;
static void sort(double *xs, int n)
{
if (n <= 0)
return;
const double pivot = xs[(n - 1) / 2];
int lt = 0;
int eq = 0;
int gt = n - 1;
while (eq <= gt) {
if (xs[eq] < pivot) {
SWAP(xs[eq], xs[lt]);
++lt;
++eq;
} else if (xs[eq] > pivot) {
SWAP(xs[eq], xs[gt]);
--gt;
} else {
++eq;
}
}
sort(xs, lt);
sort(xs + gt + 1, n - (gt + 1));
}
void benchmark_summarise(double *res, int reps, benchmark_summary_t *out)
{
assert(reps > 0);
sort(res, reps);
const double median = res[reps / 2];
double sum = 0;
for (int i = 0; i < reps; ++i)
sum += res[i];
const double mean = sum / reps;
double diff_sum = 0;
for (int i = 0; i < reps; ++i)
diff_sum += pow(res[i] - mean, 2);
const double variance = diff_sum / (reps - 1);
out->reps = reps;
out->total = sum;
out->median = median;
out->mean = mean;
out->min = res[0];
out->max = res[reps - 1];
out->stddev = sqrt(variance);
}
void benchmark_print_header(void)
{
printf(
"%-12s %13s %13s %13s %13s %12s\n", "benchmark", "median (µs)",
"mean (µs)", "min (µs)", "max (µs)", "stddev");
}
void benchmark_print(const char *name, const benchmark_summary_t *s)
{
printf(
"%-12s %12.2f %12.2f %12.2f %12.2f %12.2f\n", name, s->median,
s->mean, s->min, s->max, s->stddev);
}

View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef BENCHMARKING_H
#define BENCHMARKING_H
#include <time.h>
typedef struct {
int reps;
double total, median, mean, min, max, stddev;
} benchmark_summary_t;
#define CLOCK_MICROS(c) (1000000 * (double)c / CLOCKS_PER_SEC)
#define BENCHMARKING_BEGIN() benchmark_print_header()
#define BENCHMARKING_END() 0
#define START_CLOCK() \
do { \
benchmark_start = clock(); \
} while (0)
#define STOP_CLOCK() \
do { \
benchmark_end = clock(); \
} while (0)
#define RUN_BENCHMARK(reps, name, fn, ...) \
do { \
double res[reps]; \
for (int i = 0; i < reps; ++i) { \
fn(__VA_ARGS__); \
res[i] = CLOCK_MICROS(benchmark_end) \
- CLOCK_MICROS(benchmark_start); \
} \
benchmark_summary_t summary; \
benchmark_summarise(res, reps, &summary); \
benchmark_print(name, &summary); \
} while (0)
extern clock_t benchmark_start, benchmark_end;
void benchmark_summarise(double *res, int reps, benchmark_summary_t *out);
void benchmark_print_header(void);
void benchmark_print(const char *name, const benchmark_summary_t *summary);
#endif

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "benchmarking.h"
#include "compile.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#define LEN 1000
#define RANGE_FIRST 'a'
#define RANGE_LAST 'z'
#define CLAMP_CHAR(x) (RANGE_FIRST + x % (RANGE_LAST - RANGE_FIRST + 1))
#define RUN_MATCHING_BENCHMARK(reps, name, regex) \
do { \
fsa_t fsa; \
compile(regex, strlen(regex), &fsa); \
RUN_BENCHMARK(reps, name, matching_benchmark, &fsa); \
fsa_free(&fsa); \
} while (0)
static void matching_benchmark(const fsa_t *fsa)
{
char s[LEN];
for (int j = 0; j < LEN; ++j)
s[j] = CLAMP_CHAR(rand());
volatile bool match;
START_CLOCK();
match = fsa_accepts(fsa, s, LEN);
STOP_CLOCK();
(void)match;
}
int main(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
srand(tv.tv_usec);
BENCHMARKING_BEGIN();
RUN_MATCHING_BENCHMARK(10000, "foo or bar", ".*(foo|bar).*");
RUN_MATCHING_BENCHMARK(10000, "regex #1", ".*(abc!?)*|dd+.*");
RUN_MATCHING_BENCHMARK(10000, "regex #2", ".*(l|wh)?[aeiou]+.*");
return BENCHMARKING_END();
}

View File

@@ -12,6 +12,11 @@
#define BUFFER_START_CAPACITY 128 #define BUFFER_START_CAPACITY 128
#define PREFIX ".*("
#define PREFIX_LEN 3
#define SUFFIX ").*"
#define SUFFIX_LEN 3
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
if (argc != 2) { if (argc != 2) {
@@ -19,8 +24,15 @@ int main(int argc, char *argv[])
return EXIT_FAILURE; return EXIT_FAILURE;
} }
const int input_len = strlen(argv[1]);
const int regex_len = input_len + 6;
char *regex = malloc(regex_len);
memcpy(regex, PREFIX, PREFIX_LEN);
memcpy(regex + PREFIX_LEN, argv[1], input_len);
memcpy(regex + PREFIX_LEN + input_len, SUFFIX, SUFFIX_LEN);
fsa_t dfa; fsa_t dfa;
if (!compile(argv[1], strlen(argv[1]), &dfa)) { if (!compile(regex, regex_len, &dfa)) {
fprintf(stderr, "Failed to parse regex\n"); fprintf(stderr, "Failed to parse regex\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }

View File

@@ -2,11 +2,9 @@ add_library(lib
compile.c compile.c
construct.c construct.c
convert.c convert.c
desugar.c
fsa.c fsa.c
min_heap.c min_heap.c
parse.c parse.c
regex.c
) )
set_default_target_options(lib) set_default_target_options(lib)
target_include_directories(lib PUBLIC include) target_include_directories(lib PUBLIC include)

View File

@@ -5,21 +5,19 @@
#include "compile.h" #include "compile.h"
#include "parse.h"
#include "desugar.h"
#include "construct.h" #include "construct.h"
#include "convert.h" #include "convert.h"
#include "parse.h"
bool compile(const char *regex, int len, fsa_t *dfa_out) bool compile(const char *regex, int len, fsa_t *dfa_out)
{ {
regex_t pt; parse_tree_t pt;
if (-1 == parse_expr(regex, len, &pt)) if (-1 == parse_expr(regex, len, &pt))
return false; return false;
desugar_regex(&pt);
fsa_t nfa; fsa_t nfa;
construct_nfa(&pt, &nfa); construct_nfa(&pt, &nfa);
regex_free(&pt); parse_tree_free(&pt);
convert_to_dfa(&nfa, dfa_out); convert_to_dfa(&nfa, dfa_out);
fsa_free(&nfa); fsa_free(&nfa);

View File

@@ -25,6 +25,9 @@ static void add_fsa(fsa_t *f, const fsa_t *o, int *init_out, int *final_out)
} }
memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t)); memcpy(f->states + f->count, o->states, o->count * sizeof(fsa_state_t));
// Mark o's final state as non-final.
f->states[f->count].final = false;
// Retarget the rules of the copied states to refer to the new // Retarget the rules of the copied states to refer to the new
// state indices. // state indices.
for (int i = f->count; i < count; ++i) { for (int i = f->count; i < count; ++i) {
@@ -112,62 +115,119 @@ static void prepend_fsa(fsa_t *f, const fsa_t *o)
f->count = count; f->count = count;
} }
static void construct_base(fsa_t *out, int symbol) static void construct_base(fsa_t *out)
{ {
fsa_init(out); fsa_init(out);
const int id = fsa_add_state(out);
fsa_add_rule(out, id, out->initial, symbol);
out->initial = id;
out->states[0].final = true; out->states[0].final = true;
out->initial = fsa_add_state(out);
}
static void construct_symbol(fsa_t *out, int symbol)
{
construct_base(out);
fsa_add_rule(out, out->initial, 0, symbol);
}
static bool in_class(const parse_class_t *class, char c)
{
for (int i = 0; i < class->count; ++i) {
if (class->contents[i] == c)
return true;
}
return false;
}
static void construct_class(fsa_t *out, const parse_class_t *class)
{
construct_base(out);
if (class->negated) {
for (int i = 0; i < CHAR_COUNT; ++i) {
if (!in_class(class, i))
fsa_add_rule(out, out->initial, 0, i);
}
} else {
for (int i = 0; i < class->count; ++i)
fsa_add_rule(out, out->initial, 0, class->contents[i]);
}
}
static void construct_wildcard(fsa_t *out)
{
construct_base(out);
for (int i = 0; i < CHAR_COUNT; ++i)
fsa_add_rule(out, out->initial, 0, i);
}
static void base_quantify(fsa_t *out, int *init_out, int *final_out)
{
fsa_t f;
memcpy(&f, out, sizeof(fsa_t));
construct_base(out);
add_fsa(out, &f, init_out, final_out);
fsa_add_rule(out, out->initial, *init_out, EPSILON);
fsa_add_rule(out, *final_out, 0, EPSILON);
} }
static void construct_star(fsa_t *out) static void construct_star(fsa_t *out)
{ {
fsa_t f; int sub_init, sub_final;
memcpy(&f, out, sizeof(fsa_t)); base_quantify(out, &sub_init, &sub_final);
fsa_add_rule(out, sub_final, sub_init, EPSILON);
construct_base(out, EPSILON); fsa_add_rule(out, out->initial, 0, EPSILON);
int f_initial, f_final;
add_fsa(out, &f, &f_initial, &f_final);
fsa_add_rule(out, out->initial, f_initial, EPSILON);
fsa_add_rule(out, f_final, f_initial, EPSILON);
fsa_add_rule(out, f_final, 0, EPSILON);
} }
static void construct_term(const regex_term_t *term, fsa_t *out) static void construct_plus(fsa_t *out)
{
int sub_init, sub_final;
base_quantify(out, &sub_init, &sub_final);
fsa_add_rule(out, sub_final, sub_init, EPSILON);
}
static void construct_qmark(fsa_t *out)
{
int sub_init, sub_final;
base_quantify(out, &sub_init, &sub_final);
fsa_add_rule(out, out->initial, 0, EPSILON);
}
static void construct_term(const parse_term_t *term, fsa_t *out)
{ {
switch (term->type) { switch (term->type) {
case REGEX_TERM_EMPTY: case PARSE_TERM_EMPTY:
construct_base(out, EPSILON); construct_symbol(out, EPSILON);
break; break;
case REGEX_TERM_LITERAL: case PARSE_TERM_LITERAL:
construct_base(out, term->literal); construct_symbol(out, term->literal);
break; break;
case REGEX_TERM_SUBEXPR: case PARSE_TERM_SUBEXPR:
construct_nfa(&term->subexpr, out); construct_nfa(&term->subexpr, out);
break; break;
case REGEX_TERM_WILDCARD: case PARSE_TERM_CLASS:
case REGEX_TERM_CLASS: construct_class(out, &term->class);
assert(false); break;
case PARSE_TERM_WILDCARD:
construct_wildcard(out);
break; break;
} }
switch (term->quantifier) { switch (term->quantifier) {
case REGEX_QUANTIFIER_NONE: case PARSE_QUANTIFIER_NONE:
break; break;
case REGEX_QUANTIFIER_STAR: case PARSE_QUANTIFIER_STAR:
construct_star(out); construct_star(out);
break; break;
case REGEX_QUANTIFIER_PLUS: case PARSE_QUANTIFIER_PLUS:
case REGEX_QUANTIFIER_QMARK: construct_plus(out);
assert(false); break;
case PARSE_QUANTIFIER_QMARK:
construct_qmark(out);
break; break;
} }
assert(out->states[0].final); assert(out->states[0].final);
} }
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out) static void construct_sequence(const parse_sequence_t *seq, fsa_t *out)
{ {
assert(seq->count > 0); assert(seq->count > 0);
@@ -201,7 +261,7 @@ static void construct_union(fsa_t *f, const fsa_t *o)
fsa_add_rule(f, final, 0, EPSILON); fsa_add_rule(f, final, 0, EPSILON);
} }
void construct_nfa(const regex_t *regex, fsa_t *out) void construct_nfa(const parse_tree_t *regex, fsa_t *out)
{ {
assert(regex->count > 0); assert(regex->count > 0);

View File

@@ -160,13 +160,17 @@ static void insert(table_t *table, int *nfa_states, int count, int dfa_state)
table->entries = calloc(table->capacity, sizeof(table_entry_t)); table->entries = calloc(table->capacity, sizeof(table_entry_t));
assert(NULL != table->entries); assert(NULL != table->entries);
for (int i = 0; i < old_capacity; ++i) { for (int i = 0; i < old_capacity; ++i) {
if (0 != entries[i].nfa_state_count) if (0 != entries[i].nfa_state_count) {
continue; insert(
insert( table, entries[i].nfa_states, entries[i].nfa_state_count,
table, entries[i].nfa_states, entries[i].nfa_state_count, entries[i].dfa_state);
entries[i].dfa_state); }
} }
free(entries); free(entries);
// Recurse to insert the entry now that the table has been
// expanded.
insert(table, nfa_states, count, dfa_state);
} }
static bool lookup_or_create( static bool lookup_or_create(

View File

@@ -1,150 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
static void desugar_class(regex_term_t *term)
{
assert(!term->class.negated);
const int count = term->class.count;
regex_sequence_t *alternatives
= malloc(count * sizeof(regex_sequence_t));
assert(NULL != alternatives);
for (int i = 0; i < count; ++i) {
regex_term_t *terms = malloc(sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = term->class.contents[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_class_free(&term->class);
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = count;
term->subexpr.contents = alternatives;
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src);
static void deep_copy_sequence(regex_sequence_t *dst, regex_sequence_t *src)
{
dst->count = dst->capacity = src->count;
dst->contents = malloc(dst->capacity * sizeof(regex_term_t));
assert(NULL != dst->contents);
for (int i = 0; i < dst->count; ++i)
deep_copy_term(&dst->contents[i], &src->contents[i]);
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src)
{
assert(REGEX_TERM_WILDCARD != src->type);
assert(REGEX_TERM_CLASS != src->type);
memcpy(dst, src, sizeof(regex_term_t));
if (REGEX_TERM_SUBEXPR == src->type) {
dst->subexpr.capacity = src->subexpr.count;
dst->subexpr.contents
= malloc(dst->subexpr.capacity * sizeof(regex_sequence_t));
assert(NULL != dst->subexpr.contents);
for (int i = 0; i < dst->subexpr.count; ++i) {
deep_copy_sequence(
&dst->subexpr.contents[i], &src->subexpr.contents[i]);
}
}
}
static void desugar_plus(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = malloc(2 * sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
memcpy(&alternatives[0].contents[0], term, sizeof(regex_term_t));
deep_copy_term(&alternatives[0].contents[1], term);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[1].quantifier = REGEX_QUANTIFIER_STAR;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 1;
term->subexpr.contents = alternatives;
}
static void desugar_qmark(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[0].type = REGEX_TERM_EMPTY;
alternatives[1].count = alternatives[0].capacity = 1;
alternatives[1].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[1].contents);
memcpy(&alternatives[1].contents[0], term, sizeof(regex_term_t));
alternatives[1].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 2;
term->subexpr.contents = alternatives;
}
static void desugar_term(regex_term_t *term)
{
switch (term->type) {
case REGEX_TERM_WILDCARD:
assert(false);
break;
case REGEX_TERM_CLASS:
desugar_class(term);
break;
case REGEX_TERM_SUBEXPR:
desugar_regex(&term->subexpr);
break;
case REGEX_TERM_LITERAL:
case REGEX_TERM_EMPTY:
break;
}
switch (term->quantifier) {
case REGEX_QUANTIFIER_PLUS:
desugar_plus(term);
break;
case REGEX_QUANTIFIER_QMARK:
desugar_qmark(term);
break;
case REGEX_QUANTIFIER_NONE:
case REGEX_QUANTIFIER_STAR:
break;
}
}
void desugar_regex(regex_t *regex)
{
for (int i = 0; i < regex->count; ++i) {
for (int j = 0; j < regex->contents[i].count; ++j) {
desugar_term(&regex->contents[i].contents[j]);
}
}
}

View File

@@ -33,7 +33,8 @@ int fsa_add_state(fsa_t *fsa)
{ {
if (fsa->count >= fsa->capacity) { if (fsa->count >= fsa->capacity) {
fsa->capacity *= 2; fsa->capacity *= 2;
fsa->states = realloc(fsa->states, fsa->capacity); fsa->states
= realloc(fsa->states, fsa->capacity * sizeof(fsa_state_t));
assert(NULL != fsa->states); assert(NULL != fsa->states);
} }
@@ -56,7 +57,8 @@ void fsa_add_rule(fsa_t *fsa, int from, int to, int input)
fsa_state_t *state = &fsa->states[from]; fsa_state_t *state = &fsa->states[from];
if (state->count >= state->capacity) { if (state->count >= state->capacity) {
state->capacity *= 2; state->capacity *= 2;
state->rules = realloc(state->rules, state->capacity); state->rules
= realloc(state->rules, state->capacity * sizeof(fsa_rule_t));
assert(NULL != state->rules); assert(NULL != state->rules);
} }

View File

@@ -7,8 +7,8 @@
#define CONSTRUCT_H #define CONSTRUCT_H
#include "fsa.h" #include "fsa.h"
#include "regex.h" #include "parse.h"
void construct_nfa(const regex_t *regex, fsa_t *out); void construct_nfa(const parse_tree_t *regex, fsa_t *out);
#endif #endif

View File

@@ -1,13 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef DESUGAR_H
#define DESUGAR_H
#include "regex.h"
void desugar_regex(regex_t *regex);
#endif

View File

@@ -6,10 +6,53 @@
#ifndef PARSE_H #ifndef PARSE_H
#define PARSE_H #define PARSE_H
#include "regex.h" #include <stdbool.h>
#define PARSE_FAIL (-1) #define PARSE_FAIL (-1)
int parse_expr(const char *input, int rem, regex_t *out); typedef struct {
bool negated;
int count, capacity;
char *contents;
} parse_class_t;
typedef enum {
PARSE_QUANTIFIER_NONE,
PARSE_QUANTIFIER_STAR,
PARSE_QUANTIFIER_PLUS,
PARSE_QUANTIFIER_QMARK,
} parse_quantifier_t;
typedef enum {
PARSE_TERM_WILDCARD,
PARSE_TERM_CLASS,
PARSE_TERM_LITERAL,
PARSE_TERM_SUBEXPR,
PARSE_TERM_EMPTY,
} parse_term_type_t;
struct _parse_term;
typedef struct {
int count, capacity;
struct _parse_term *contents;
} parse_sequence_t;
typedef struct {
int count, capacity;
parse_sequence_t *contents;
} parse_tree_t;
typedef struct _parse_term {
parse_quantifier_t quantifier;
parse_term_type_t type;
union {
parse_class_t class;
char literal;
parse_tree_t subexpr;
};
} parse_term_t;
int parse_expr(const char *input, int rem, parse_tree_t *out);
void parse_tree_free(const parse_tree_t *t);
#endif #endif

View File

@@ -1,56 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef REGEX_H
#define REGEX_H
#include <stdbool.h>
typedef struct {
bool negated;
int count, capacity;
char *contents;
} regex_class_t;
typedef enum {
REGEX_QUANTIFIER_NONE,
REGEX_QUANTIFIER_STAR,
REGEX_QUANTIFIER_PLUS,
REGEX_QUANTIFIER_QMARK,
} regex_quantifier_t;
typedef enum {
REGEX_TERM_WILDCARD,
REGEX_TERM_CLASS,
REGEX_TERM_LITERAL,
REGEX_TERM_SUBEXPR,
REGEX_TERM_EMPTY,
} regex_term_type_t;
struct _regex_term;
typedef struct {
int count, capacity;
struct _regex_term *contents;
} regex_sequence_t;
typedef struct {
int count, capacity;
regex_sequence_t *contents;
} regex_t;
typedef struct _regex_term {
regex_quantifier_t quantifier;
regex_term_type_t type;
union {
regex_class_t class;
char literal;
regex_t subexpr;
};
} regex_term_t;
void regex_free(const regex_t *t);
void regex_class_free(const regex_class_t *c);
#endif

View File

@@ -45,7 +45,7 @@ static int parse_literal(const char *input, int rem, char *out)
} }
} }
static int parse_class(const char *input, int rem, regex_class_t *out) static int parse_class(const char *input, int rem, parse_class_t *out)
{ {
int result, used = 0; int result, used = 0;
@@ -87,7 +87,7 @@ static int parse_class(const char *input, int rem, regex_class_t *out)
return out->count > 0 ? used : -1; return out->count > 0 ? used : -1;
} }
static int parse_term(const char *input, int rem, regex_term_t *out) static int parse_term(const char *input, int rem, parse_term_t *out)
{ {
int result, used = 0; int result, used = 0;
@@ -95,7 +95,7 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
return PARSE_FAIL; return PARSE_FAIL;
if ('.' == input[0]) { if ('.' == input[0]) {
out->type = REGEX_TERM_WILDCARD; out->type = PARSE_TERM_WILDCARD;
++used; ++used;
} else if ('(' == input[0]) { } else if ('(' == input[0]) {
++used; ++used;
@@ -103,7 +103,7 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
result = parse_expr(input + used, rem - used, &out->subexpr); result = parse_expr(input + used, rem - used, &out->subexpr);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = REGEX_TERM_SUBEXPR; out->type = PARSE_TERM_SUBEXPR;
used += result; used += result;
if (')' != input[used]) if (')' != input[used])
@@ -113,54 +113,54 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
result = parse_class(input + used, rem - used, &out->class); result = parse_class(input + used, rem - used, &out->class);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = REGEX_TERM_CLASS; out->type = PARSE_TERM_CLASS;
used += result; used += result;
} else { } else {
result = parse_literal(input + used, rem - used, &out->literal); result = parse_literal(input + used, rem - used, &out->literal);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = REGEX_TERM_LITERAL; out->type = PARSE_TERM_LITERAL;
used += result; used += result;
} }
if (used < rem) { if (used < rem) {
switch (input[used]) { switch (input[used]) {
case '*': case '*':
out->quantifier = REGEX_QUANTIFIER_STAR; out->quantifier = PARSE_QUANTIFIER_STAR;
++used; ++used;
break; break;
case '+': case '+':
out->quantifier = REGEX_QUANTIFIER_PLUS; out->quantifier = PARSE_QUANTIFIER_PLUS;
++used; ++used;
break; break;
case '?': case '?':
out->quantifier = REGEX_QUANTIFIER_QMARK; out->quantifier = PARSE_QUANTIFIER_QMARK;
++used; ++used;
break; break;
default: default:
out->quantifier = REGEX_QUANTIFIER_NONE; out->quantifier = PARSE_QUANTIFIER_NONE;
} }
} else { } else {
out->quantifier = REGEX_QUANTIFIER_NONE; out->quantifier = PARSE_QUANTIFIER_NONE;
} }
return used; return used;
} }
static int parse_sequence(const char *input, int rem, regex_sequence_t *out) static int parse_sequence(const char *input, int rem, parse_sequence_t *out)
{ {
int result, used = 0; int result, used = 0;
out->count = 0; out->count = 0;
out->capacity = SEQUENCE_START_CAPACITY; out->capacity = SEQUENCE_START_CAPACITY;
out->contents = malloc(out->capacity * sizeof(regex_term_t)); out->contents = malloc(out->capacity * sizeof(parse_term_t));
assert(NULL != out->contents); assert(NULL != out->contents);
while (used < rem) { while (used < rem) {
if (out->count >= out->capacity) { if (out->count >= out->capacity) {
out->capacity *= 2; out->capacity *= 2;
out->contents = realloc( out->contents = realloc(
out->contents, out->capacity * sizeof(regex_term_t)); out->contents, out->capacity * sizeof(parse_term_t));
assert(NULL != out->contents); assert(NULL != out->contents);
} }
@@ -175,13 +175,13 @@ static int parse_sequence(const char *input, int rem, regex_sequence_t *out)
return out->count > 0 ? used : -1; return out->count > 0 ? used : -1;
} }
int parse_expr(const char *input, int rem, regex_t *out) int parse_expr(const char *input, int rem, parse_tree_t *out)
{ {
int result, used = 0; int result, used = 0;
out->count = 0; out->count = 0;
out->capacity = TREE_START_CAPACITY; out->capacity = TREE_START_CAPACITY;
out->contents = malloc(out->capacity * sizeof(regex_sequence_t)); out->contents = malloc(out->capacity * sizeof(parse_sequence_t));
assert(NULL != out->contents); assert(NULL != out->contents);
result = parse_sequence(input + used, rem - used, &out->contents[0]); result = parse_sequence(input + used, rem - used, &out->contents[0]);
@@ -198,7 +198,7 @@ int parse_expr(const char *input, int rem, regex_t *out)
if (out->count >= out->capacity) { if (out->count >= out->capacity) {
out->capacity *= 2; out->capacity *= 2;
out->contents = realloc( out->contents = realloc(
out->contents, out->capacity * sizeof(regex_sequence_t)); out->contents, out->capacity * sizeof(parse_sequence_t));
assert(NULL != out->contents); assert(NULL != out->contents);
} }
@@ -212,3 +212,37 @@ int parse_expr(const char *input, int rem, regex_t *out)
return used; return used;
} }
static void class_free(const parse_class_t *c)
{
if (NULL != c->contents)
free(c->contents);
}
static void sequence_free(const parse_sequence_t *s)
{
if (NULL != s->contents) {
for (int i = 0; i < s->count; ++i) {
switch (s->contents[i].type) {
case PARSE_TERM_CLASS:
class_free(&s->contents[i].class);
break;
case PARSE_TERM_SUBEXPR:
parse_tree_free(&s->contents[i].subexpr);
break;
default:
break;
}
}
free(s->contents);
}
}
void parse_tree_free(const parse_tree_t *t)
{
if (NULL != t->contents) {
for (int i = 0; i < t->count; ++i)
sequence_free(&t->contents[i]);
free(t->contents);
}
}

View File

@@ -1,42 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "regex.h"
#include <stdlib.h>
static void sequence_free(const regex_sequence_t *s)
{
if (NULL != s->contents) {
for (int i = 0; i < s->count; ++i) {
switch (s->contents[i].type) {
case REGEX_TERM_CLASS:
regex_class_free(&s->contents[i].class);
break;
case REGEX_TERM_SUBEXPR:
regex_free(&s->contents[i].subexpr);
break;
default:
break;
}
}
free(s->contents);
}
}
void regex_free(const regex_t *t)
{
if (NULL != t->contents) {
for (int i = 0; i < t->count; ++i)
sequence_free(&t->contents[i]);
free(t->contents);
}
}
void regex_class_free(const regex_class_t *c)
{
if (NULL != c->contents)
free(c->contents);
}

View File

@@ -1,4 +1,4 @@
#!/bin/sh #!/bin/sh
cd "$(git rev-parse --show-toplevel)" cd "$(git rev-parse --show-toplevel)"
find . -not \( -path './.git' -prune \) -not \( -path './build' -prune \) \ find . -not \( -path './.git' -prune \) -not \( -path './build' -prune \) \
| entr -s 'clear && scripts/build.sh && scripts/test.sh' | entr -cs 'scripts/build.sh && scripts/test.sh'

View File

@@ -19,7 +19,6 @@ endfunction()
add_test_suites( add_test_suites(
construct_tests.c construct_tests.c
convert_tests.c convert_tests.c
desugar_tests.c
fsa_tests.c fsa_tests.c
integration_tests.c integration_tests.c
min_heap_tests.c min_heap_tests.c

View File

@@ -34,13 +34,13 @@ static bool accepts(const fsa_t *nfa, const char *input)
static void test_empty_expression(void) static void test_empty_expression(void)
{ {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_EMPTY; terms[0].type = PARSE_TERM_EMPTY;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
const regex_t regex const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives }; = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
@@ -49,20 +49,45 @@ static void test_empty_expression(void)
ASSERT_TRUE(accepts(&fsa, "")); ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "a"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_wildcard(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_WILDCARD;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_TRUE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_literal_expression(void) static void test_literal_expression(void)
{ {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
const regex_t regex const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives }; = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
@@ -71,26 +96,27 @@ static void test_literal_expression(void)
ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_sequence(void) static void test_sequence(void)
{ {
regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); parse_term_t *terms = malloc(3 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].quantifier = PARSE_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL; terms[1].type = PARSE_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
terms[2].quantifier = REGEX_QUANTIFIER_NONE; terms[2].quantifier = PARSE_QUANTIFIER_NONE;
terms[2].type = REGEX_TERM_LITERAL; terms[2].type = PARSE_TERM_LITERAL;
terms[2].literal = 'c'; terms[2].literal = 'c';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -101,24 +127,25 @@ static void test_sequence(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "abcd")); ASSERT_FALSE(accepts(&fsa, "abcd"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_union(void) static void test_union(void)
{ {
const char *literals = "abc"; const char *literals = "abc";
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(3 * sizeof(parse_sequence_t));
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = literals[i]; terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1; alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms; alternatives[i].contents = terms;
} }
regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives }; parse_tree_t regex
= { .count = 3, .capacity = 3, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -129,20 +156,21 @@ static void test_union(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "aa")); ASSERT_FALSE(accepts(&fsa, "aa"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_star(void) static void test_star(void)
{ {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_STAR; terms[0].quantifier = PARSE_QUANTIFIER_STAR;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -152,29 +180,78 @@ static void test_star(void)
ASSERT_TRUE(accepts(&fsa, "aaaaaa")); ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_plus(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_PLUS;
terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_qmark(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_QMARK;
terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "aa"));
ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_subexpression(void) static void test_subexpression(void)
{ {
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *inner_terms = malloc(1 * sizeof(parse_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms[0].type = REGEX_TERM_LITERAL; inner_terms[0].type = PARSE_TERM_LITERAL;
inner_terms[0].literal = 'a'; inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives parse_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t)); = malloc(1 * sizeof(parse_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms; inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR; terms[0].type = PARSE_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1; terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives; terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -182,42 +259,108 @@ static void test_subexpression(void)
ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_class(void)
{
char *class_contents = malloc(3);
class_contents[0] = 'a';
class_contents[1] = 'b';
class_contents[2] = 'c';
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_CLASS;
terms[0].class.negated = false;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = class_contents;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
ASSERT_FALSE(accepts(&fsa, "d"));
parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_negated_class(void)
{
char *class_contents = malloc(3);
class_contents[0] = 'a';
class_contents[1] = 'b';
class_contents[2] = 'c';
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_CLASS;
terms[0].class.negated = true;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = class_contents;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "d"));
ASSERT_TRUE(accepts(&fsa, "e"));
ASSERT_FALSE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b"));
ASSERT_FALSE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_sequence_containing_starred_union(void) static void test_sequence_containing_starred_union(void)
{ {
// ab(c|d)* // ab(c|d)*
regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t)); parse_term_t *inner_terms0 = malloc(1 * sizeof(parse_term_t));
inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms0[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms0[0].type = REGEX_TERM_LITERAL; inner_terms0[0].type = PARSE_TERM_LITERAL;
inner_terms0[0].literal = 'c'; inner_terms0[0].literal = 'c';
regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t)); parse_term_t *inner_terms1 = malloc(1 * sizeof(parse_term_t));
inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms1[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms1[0].type = REGEX_TERM_LITERAL; inner_terms1[0].type = PARSE_TERM_LITERAL;
inner_terms1[0].literal = 'd'; inner_terms1[0].literal = 'd';
regex_sequence_t *inner_alternatives parse_sequence_t *inner_alternatives
= malloc(2 * sizeof(regex_sequence_t)); = malloc(2 * sizeof(parse_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0; inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1; inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1; inner_alternatives[1].contents = inner_terms1;
regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); parse_term_t *terms = malloc(3 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].quantifier = PARSE_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL; terms[1].type = PARSE_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
terms[2].quantifier = REGEX_QUANTIFIER_STAR; terms[2].quantifier = PARSE_QUANTIFIER_STAR;
terms[2].type = REGEX_TERM_SUBEXPR; terms[2].type = PARSE_TERM_SUBEXPR;
terms[2].subexpr.count = terms[2].subexpr.capacity = 2; terms[2].subexpr.count = terms[2].subexpr.capacity = 2;
terms[2].subexpr.contents = inner_alternatives; terms[2].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -233,7 +376,7 @@ static void test_sequence_containing_starred_union(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "foo"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
@@ -241,23 +384,24 @@ static void
test_union_of_single_term_and_sequence_containing_starred_term(void) test_union_of_single_term_and_sequence_containing_starred_term(void)
{ {
// a|b*c // a|b*c
regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms0 = malloc(1 * sizeof(parse_term_t));
terms0[0].quantifier = REGEX_QUANTIFIER_NONE; terms0[0].quantifier = PARSE_QUANTIFIER_NONE;
terms0[0].type = REGEX_TERM_LITERAL; terms0[0].type = PARSE_TERM_LITERAL;
terms0[0].literal = 'a'; terms0[0].literal = 'a';
regex_term_t *terms1 = malloc(2 * sizeof(regex_term_t)); parse_term_t *terms1 = malloc(2 * sizeof(parse_term_t));
terms1[0].quantifier = REGEX_QUANTIFIER_STAR; terms1[0].quantifier = PARSE_QUANTIFIER_STAR;
terms1[0].type = REGEX_TERM_LITERAL; terms1[0].type = PARSE_TERM_LITERAL;
terms1[0].literal = 'b'; terms1[0].literal = 'b';
terms1[1].quantifier = REGEX_QUANTIFIER_NONE; terms1[1].quantifier = PARSE_QUANTIFIER_NONE;
terms1[1].type = REGEX_TERM_LITERAL; terms1[1].type = PARSE_TERM_LITERAL;
terms1[1].literal = 'c'; terms1[1].literal = 'c';
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(2 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms0; alternatives[0].contents = terms0;
alternatives[1].count = alternatives[1].capacity = 2; alternatives[1].count = alternatives[1].capacity = 2;
alternatives[1].contents = terms1; alternatives[1].contents = terms1;
regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives }; parse_tree_t regex
= { .count = 2, .capacity = 2, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@@ -269,7 +413,49 @@ test_union_of_single_term_and_sequence_containing_starred_term(void)
ASSERT_FALSE(accepts(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "foo"));
ASSERT_FALSE(accepts(&fsa, "ba")); ASSERT_FALSE(accepts(&fsa, "ba"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_sequence_of_subexpr_a_or_empty_and_b(void)
{
// (a|ε)b
parse_term_t *inner_terms0 = malloc(1 * sizeof(parse_term_t));
inner_terms0[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms0[0].type = PARSE_TERM_LITERAL;
inner_terms0[0].literal = 'a';
parse_term_t *inner_terms1 = malloc(1 * sizeof(parse_term_t));
inner_terms1[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms1[0].type = PARSE_TERM_EMPTY;
parse_sequence_t *inner_alternatives
= malloc(2 * sizeof(parse_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1;
parse_term_t *terms = malloc(2 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 2;
terms[0].subexpr.contents = inner_alternatives;
terms[1].quantifier = PARSE_QUANTIFIER_NONE;
terms[1].type = PARSE_TERM_LITERAL;
terms[1].literal = 'b';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = terms;
parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "ab"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
@@ -280,14 +466,20 @@ int main(void)
// Base cases // Base cases
test_empty_expression(); test_empty_expression();
test_literal_expression(); test_literal_expression();
test_wildcard();
test_sequence(); test_sequence();
test_union(); test_union();
test_star(); test_star();
test_plus();
test_qmark();
test_subexpression(); test_subexpression();
test_class();
test_negated_class();
// Compound expressions // Compound expressions
test_sequence_containing_starred_union(); test_sequence_containing_starred_union();
test_union_of_single_term_and_sequence_containing_starred_term(); test_union_of_single_term_and_sequence_containing_starred_term();
test_sequence_of_subexpr_a_or_empty_and_b();
return TESTING_END(); return TESTING_END();
} }

View File

@@ -1,357 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include "testing.h"
#include <stddef.h>
static void a_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t);
}
static void abc_is_unchanged(void)
{
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b';
terms[2].type = REGEX_TERM_LITERAL;
terms[2].literal = 'c';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(3, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[2].type);
ASSERT_EQ('c', t.contents[0].contents[2].literal);
regex_free(&t);
}
static void a_star_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_STAR;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t);
}
static void a_or_b_or_c_is_unchanged(void)
{
const char *literals = "abc";
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
for (int i = 0; i < 3; ++i) {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_t t = { .count = 3, .capacity = 3, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(3, t.count);
ASSERT_NOT_NULL(t.contents);
for (int i = 0; i < 3; ++i) {
ASSERT_EQ(1, t.contents[i].count);
ASSERT_NOT_NULL(t.contents[i].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, t.contents[i].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[i].contents[0].type);
ASSERT_EQ(literals[i], t.contents[i].contents[0].literal);
}
regex_free(&t);
}
static void subexpr_a_is_unchanged(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
regex_free(&t);
}
static void a_plus_becomes_subexpr_aa_star(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_PLUS;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(2, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(
REGEX_QUANTIFIER_STAR, inner->contents[0].contents[1].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[1].type);
ASSERT_EQ('a', inner->contents[0].contents[1].literal);
regex_free(&t);
}
static void a_qmark_becomes_subexpr_empty_or_a(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
static void class_abc_becomes_subexpr_a_or_b_or_c(void)
{
char *options = malloc(3 * sizeof(char));
options[0] = 'a';
options[1] = 'b';
options[2] = 'c';
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_CLASS;
terms[0].class.negated = false;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = options;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(3, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('b', inner->contents[1].contents[0].literal);
ASSERT_EQ(1, inner->contents[2].count);
ASSERT_NOT_NULL(inner->contents[2].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[2].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[2].contents[0].type);
ASSERT_EQ('c', inner->contents[2].contents[0].literal);
regex_free(&t);
}
static void subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner;
inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, inner->contents[0].contents[0].type);
inner = &inner->contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
int main(void)
{
TESTING_BEGIN();
a_is_unchanged();
abc_is_unchanged();
a_star_is_unchanged();
a_or_b_or_c_is_unchanged();
subexpr_a_is_unchanged();
a_plus_becomes_subexpr_aa_star();
a_qmark_becomes_subexpr_empty_or_a();
class_abc_becomes_subexpr_a_or_b_or_c();
subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a();
return TESTING_END();
}

View File

@@ -47,11 +47,54 @@ static void test_arbitrary_regex_1(void)
fsa_free(&dfa); fsa_free(&dfa);
} }
static void test_arbitrary_regex_2(void)
{
fsa_t dfa;
const char *regex = "(l|wh)?[aeiou]+";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "laaaa");
ASSERT_ACCEPTS(&dfa, "eeeee");
ASSERT_ACCEPTS(&dfa, "iii");
ASSERT_ACCEPTS(&dfa, "whooo");
ASSERT_ACCEPTS(&dfa, "u");
ASSERT_REJECTS(&dfa, "wh");
ASSERT_REJECTS(&dfa, "lxxx");
fsa_free(&dfa);
}
static void test_system_header_include_regex(void)
{
fsa_t dfa;
const char *regex = "#include <[abcdefghijklmnopqrstuvwxyz]+\\.h>";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "#include <stdio.h>");
ASSERT_REJECTS(&dfa, "#include \"foo.h\"");
fsa_free(&dfa);
}
static void test_quoted_string_regex(void)
{
fsa_t dfa;
const char *regex = "'(\\\\'|[^'])*'";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "''");
ASSERT_ACCEPTS(&dfa, "'foo bar baz'");
ASSERT_ACCEPTS(&dfa, "'foo \\'bar\\' baz'");
ASSERT_REJECTS(&dfa, "'foo 'bar' baz'");
fsa_free(&dfa);
}
int main(void) int main(void)
{ {
TESTING_BEGIN(); TESTING_BEGIN();
test_foo_or_bar_regex(); test_foo_or_bar_regex();
test_even_number_of_Is_regex(); test_even_number_of_Is_regex();
test_arbitrary_regex_1(); test_arbitrary_regex_1();
test_arbitrary_regex_2();
test_system_header_include_regex();
test_quoted_string_regex();
return TESTING_END(); return TESTING_END();
} }

View File

@@ -10,268 +10,268 @@
static void a_has_1_alternative(void) static void a_has_1_alternative(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("a", &t); const int result = PARSE_EXPR_STRING("a", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
regex_free(&t); parse_tree_free(&t);
} }
static void a_pipe_b_has_2_alternatives(void) static void a_pipe_b_has_2_alternatives(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("a|b", &t); const int result = PARSE_EXPR_STRING("a|b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(2, t.count); ASSERT_EQ(2, t.count);
regex_free(&t); parse_tree_free(&t);
} }
static void a_pipe_b_pipe_c_has_3_alternatives(void) static void a_pipe_b_pipe_c_has_3_alternatives(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("a|b|c", &t); const int result = PARSE_EXPR_STRING("a|b|c", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(3, t.count); ASSERT_EQ(3, t.count);
regex_free(&t); parse_tree_free(&t);
} }
static void a_is_parsed_as_unquantified_literal(void) static void a_is_parsed_as_unquantified_literal(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("a", &t); const int result = PARSE_EXPR_STRING("a", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal); ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void b_is_parsed_as_unquantified_literal(void) static void b_is_parsed_as_unquantified_literal(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("b", &t); const int result = PARSE_EXPR_STRING("b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('b', t.contents[0].contents[0].literal); ASSERT_EQ('b', t.contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void abc_is_parsed_as_sequence_of_unquantified_literals(void) static void abc_is_parsed_as_sequence_of_unquantified_literals(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("abc", &t); const int result = PARSE_EXPR_STRING("abc", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(3, t.contents[0].count); ASSERT_EQ(3, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal); ASSERT_EQ('a', t.contents[0].contents[0].literal);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal); ASSERT_EQ('b', t.contents[0].contents[1].literal);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[2].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[2].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[2].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[2].type);
ASSERT_EQ('c', t.contents[0].contents[2].literal); ASSERT_EQ('c', t.contents[0].contents[2].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void dot_is_parsed_as_unquantified_wildcard_term(void) static void dot_is_parsed_as_unquantified_wildcard_term(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING(".", &t); const int result = PARSE_EXPR_STRING(".", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t); parse_tree_free(&t);
} }
static void backslash_dot_is_parsed_as_unquantified_literal(void) static void backslash_dot_is_parsed_as_unquantified_literal(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("\\.", &t); const int result = PARSE_EXPR_STRING("\\.", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('.', t.contents[0].contents[0].literal); ASSERT_EQ('.', t.contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void backslash_backslash_is_parsed_as_unquantified_literal(void) static void backslash_backslash_is_parsed_as_unquantified_literal(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("\\\\", &t); const int result = PARSE_EXPR_STRING("\\\\", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('\\', t.contents[0].contents[0].literal); ASSERT_EQ('\\', t.contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void a_pipe_b_in_parens_is_parsed_as_subexpr_term(void) static void a_pipe_b_in_parens_is_parsed_as_subexpr_term(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("(a|b)", &t); const int result = PARSE_EXPR_STRING("(a|b)", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr; const parse_tree_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count); ASSERT_EQ(2, inner->count);
ASSERT_EQ(1, inner->contents[0].count); ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ( ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); PARSE_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal); ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(1, inner->contents[1].count); ASSERT_EQ(1, inner->contents[1].count);
ASSERT_EQ( ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier); PARSE_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('b', inner->contents[1].contents[0].literal); ASSERT_EQ('b', inner->contents[1].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(void) static void a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("(a)b", &t); const int result = PARSE_EXPR_STRING("(a)b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(2, t.contents[0].count); ASSERT_EQ(2, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_SUBEXPR, t.contents[0].contents[0].type);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal); ASSERT_EQ('b', t.contents[0].contents[1].literal);
const regex_t *inner = &t.contents[0].contents[0].subexpr; const parse_tree_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->contents[0].count); ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ( ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); PARSE_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal); ASSERT_EQ('a', inner->contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void dot_star_is_parsed_as_star_quantified_wildcard(void) static void dot_star_is_parsed_as_star_quantified_wildcard(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING(".*", &t); const int result = PARSE_EXPR_STRING(".*", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t); parse_tree_free(&t);
} }
static void dot_plus_is_parsed_as_plus_quantified_wildcard(void) static void dot_plus_is_parsed_as_plus_quantified_wildcard(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING(".+", &t); const int result = PARSE_EXPR_STRING(".+", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_PLUS, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_PLUS, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t); parse_tree_free(&t);
} }
static void dot_question_mark_is_parsed_as_qmrk_quantified_wildcard(void) static void dot_question_mark_is_parsed_as_qmrk_quantified_wildcard(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING(".?", &t); const int result = PARSE_EXPR_STRING(".?", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_QMARK, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_QMARK, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t); parse_tree_free(&t);
} }
static void a_in_brackets_is_parsed_as_class_containing_only_a(void) static void a_in_brackets_is_parsed_as_class_containing_only_a(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("[a]", &t); const int result = PARSE_EXPR_STRING("[a]", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_CLASS, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_CLASS, t.contents[0].contents[0].type);
ASSERT_FALSE(t.contents[0].contents[0].class.negated); ASSERT_FALSE(t.contents[0].contents[0].class.negated);
ASSERT_EQ(1, t.contents[0].contents[0].class.count); ASSERT_EQ(1, t.contents[0].contents[0].class.count);
ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents); ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents);
ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]); ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]);
regex_free(&t); parse_tree_free(&t);
} }
static void caret_a_in_brackets_parses_as_negated_class(void) static void caret_a_in_brackets_parses_as_negated_class(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("[^a]", &t); const int result = PARSE_EXPR_STRING("[^a]", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_CLASS, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_CLASS, t.contents[0].contents[0].type);
ASSERT_TRUE(t.contents[0].contents[0].class.negated); ASSERT_TRUE(t.contents[0].contents[0].class.negated);
ASSERT_EQ(1, t.contents[0].contents[0].class.count); ASSERT_EQ(1, t.contents[0].contents[0].class.count);
ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents); ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents);
ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]); ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]);
regex_free(&t); parse_tree_free(&t);
} }
int main(void) int main(void)