Compare commits

..

10 Commits

21 changed files with 918 additions and 2 deletions

18
.build.yml Normal file
View File

@ -0,0 +1,18 @@
image: alpine/edge
packages:
- clang
- cmake
- compiler-rt
- ninja
sources:
- https://git.sr.ht/~cdo/regex-engine
tasks:
- configure: |
cd regex-engine
cmake -GNinja -Bbuild -DSANITIZERS=on -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang
- build: |
cd regex-engine
scripts/build.sh
- test: |
cd regex-engine
scripts/test.sh

View File

@ -17,3 +17,4 @@ endfunction()
add_subdirectory(lib)
add_subdirectory(tests)
add_subdirectory(demo)

2
README
View File

@ -27,7 +27,7 @@ tests. I use Clang but the code is ISO C11, it should compile just
fine with GCC. You might need to faff with CMakeLists.txt to get it
to work with another compiler due to command-line flag nonsense.
scripts/build.sh # Compile library and tests
scripts/build.sh # Compile library, demo and tests
scripts/test.sh # Run tests
There is also an entr.sh script which will watch all the project's

3
demo/CMakeLists.txt Normal file
View File

@ -0,0 +1,3 @@
add_executable(shitgrep shitgrep.c)
set_default_target_options(shitgrep)
target_link_libraries(shitgrep PRIVATE lib)

53
demo/shitgrep.c Normal file
View File

@ -0,0 +1,53 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "compile.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_START_CAPACITY 128
int main(int argc, char *argv[])
{
if (argc != 2) {
fprintf(stderr, "Usage: %s REGEX\n", argv[0]);
return EXIT_FAILURE;
}
fsa_t dfa;
if (!compile(argv[1], strlen(argv[1]), &dfa)) {
fprintf(stderr, "Failed to parse regex\n");
return EXIT_FAILURE;
}
int len = 0, capacity = BUFFER_START_CAPACITY;
char *buffer = malloc(capacity);
assert(NULL != buffer);
int c;
while ((c = getchar()) != EOF) {
if (capacity < len + 1) {
capacity *= 2;
buffer = realloc(buffer, capacity);
assert(NULL != buffer);
}
if ('\n' == c) {
if (fsa_accepts(&dfa, buffer, len)) {
buffer[len++] = '\n';
fwrite(buffer, 1, len, stdout);
}
len = 0;
} else {
buffer[len++] = c;
}
}
fsa_free(&dfa);
free(buffer);
return EXIT_SUCCESS;
}

View File

@ -1,7 +1,10 @@
add_library(lib
compile.c
construct.c
convert.c
desugar.c
fsa.c
min_heap.c
parse.c
regex.c
)

28
lib/compile.c Normal file
View File

@ -0,0 +1,28 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "compile.h"
#include "parse.h"
#include "desugar.h"
#include "construct.h"
#include "convert.h"
bool compile(const char *regex, int len, fsa_t *dfa_out)
{
regex_t pt;
if (-1 == parse_expr(regex, len, &pt))
return false;
desugar_regex(&pt);
fsa_t nfa;
construct_nfa(&pt, &nfa);
regex_free(&pt);
convert_to_dfa(&nfa, dfa_out);
fsa_free(&nfa);
return true;
}

275
lib/convert.c Normal file
View File

@ -0,0 +1,275 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "convert.h"
#include "min_heap.h"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_START_CAPACITY 8
#define TABLE_START_CAPACITY 32
#define TABLE_START_SHIFT 27 // 32 - log_2(TABLE_START_CAPACITY)
#define TABLE_WRAP_COEFF 2654435769 // Closest odd number to 2^32 / φ
#define TABLE_DOUBLING_THRESHOLD 6
typedef struct {
int count, capacity, *states;
} buffer_t;
typedef struct {
int probe_count, nfa_state_count, dfa_state, *nfa_states;
} table_entry_t;
typedef struct {
int capacity, shift, max_probe_count;
table_entry_t *entries;
} table_t;
typedef struct {
const fsa_t *nfa;
fsa_t *dfa;
buffer_t buffer;
table_t table;
} conversion_context_t;
static bool add_state(buffer_t *buffer, int nfa_state)
{
for (int i = 0; i < buffer->count; ++i) {
if (nfa_state == buffer->states[i])
return false;
}
if (buffer->capacity < buffer->count + 1) {
buffer->capacity *= 2;
buffer->states
= realloc(buffer->states, buffer->capacity * sizeof(int));
assert(NULL != buffer->states);
}
buffer->states[buffer->count++] = nfa_state;
return true;
}
static void get_epsilon_closure(conversion_context_t *ctx, int nfa_state)
{
if (!add_state(&ctx->buffer, nfa_state))
return;
for (int i = 0; i < ctx->nfa->states[nfa_state].count; ++i) {
const fsa_rule_t *rule = &ctx->nfa->states[nfa_state].rules[i];
if (EPSILON == rule->input)
get_epsilon_closure(ctx, rule->next);
}
}
static int *move_buffer_sorted(buffer_t *buffer)
{
int *states, *p;
p = states = malloc(buffer->count * sizeof(int));
assert(NULL != states);
min_heap_heapify(buffer->states, buffer->count);
do
*p++ = min_heap_pop(buffer->states, &buffer->count);
while (0 < buffer->count);
return states;
}
static uint32_t hash(const int *states, int count)
{
assert(count > 0);
uint32_t x = states[0];
for (int i = 1; i < count; ++i)
x ^= (uint32_t)states[i];
return x;
}
static uint32_t wrap(uint32_t hash, int probe_count, int shift)
{
hash += probe_count;
hash *= TABLE_WRAP_COEFF;
return hash >> shift;
}
static bool lookup(
const table_t *table, const int *nfa_states, int count,
int *dfa_state_out)
{
const uint32_t h = hash(nfa_states, count);
for (int i = 0; i <= table->max_probe_count; ++i) {
const uint32_t loc = wrap(h, i, table->shift);
const table_entry_t *entry = &table->entries[loc];
if (entry->nfa_state_count != count)
continue;
int size = count * sizeof(int);
if (memcmp(entry->nfa_states, nfa_states, size) == 0) {
*dfa_state_out = entry->dfa_state;
return true;
}
}
return false;
}
static void insert(table_t *table, int *nfa_states, int count, int dfa_state)
{
uint32_t h = hash(nfa_states, count);
for (int i = 0; i < TABLE_DOUBLING_THRESHOLD; ++i) {
const uint32_t loc = wrap(h, i, table->shift);
table_entry_t *entry = &table->entries[loc];
if (0 == entry->nfa_state_count) {
// Slot is empty: insert the entry here.
entry->nfa_states = nfa_states;
entry->nfa_state_count = count;
entry->dfa_state = dfa_state;
entry->probe_count = i;
if (entry->probe_count > table->max_probe_count)
table->max_probe_count = entry->probe_count;
return;
} else if (entry->probe_count < i) {
// Slot contains entry with lesser probe count: steal the
// slot for the current entry.
table_entry_t tmp;
memcpy(&tmp, entry, sizeof(table_entry_t));
entry->nfa_states = nfa_states;
entry->nfa_state_count = count;
entry->dfa_state = dfa_state;
entry->probe_count = i;
if (entry->probe_count > table->max_probe_count)
table->max_probe_count = entry->probe_count;
// Continue with the slot's previous entry.
nfa_states = tmp.nfa_states;
count = tmp.nfa_state_count;
dfa_state = tmp.dfa_state;
i = tmp.probe_count;
h = hash(nfa_states, count);
}
}
// Double the capacity of the table.
table_entry_t *entries = table->entries;
const int old_capacity = table->capacity;
--table->shift;
table->capacity *= 2;
table->entries = calloc(table->capacity, sizeof(table_entry_t));
assert(NULL != table->entries);
for (int i = 0; i < old_capacity; ++i) {
if (0 != entries[i].nfa_state_count)
continue;
insert(
table, entries[i].nfa_states, entries[i].nfa_state_count,
entries[i].dfa_state);
}
free(entries);
}
static bool lookup_or_create(
conversion_context_t *ctx, int *nfa_states, int count,
int *dfa_state_out)
{
// Check if the DFA state for these NFA states already exists.
if (lookup(&ctx->table, nfa_states, count, dfa_state_out))
return false;
// Create the DFA state, marking it as final if any of the NFA
// states are final.
const int dfa_state = fsa_add_state(ctx->dfa);
for (int i = 0; i < count; ++i) {
if (ctx->nfa->states[nfa_states[i]].final) {
ctx->dfa->states[dfa_state].final = true;
break;
}
}
// Insert the DFA state into the table under the NFA states.
insert(&ctx->table, nfa_states, count, dfa_state);
*dfa_state_out = dfa_state;
return true;
}
int convert_step(conversion_context_t *ctx)
{
assert(0 != ctx->buffer.count);
int count = ctx->buffer.count;
int *nfa_states = move_buffer_sorted(&ctx->buffer);
int dfa_state;
if (!lookup_or_create(ctx, nfa_states, count, &dfa_state)) {
// Base case: state already exists.
free(nfa_states);
return dfa_state;
}
bool handled[CHAR_COUNT] = { 0 };
for (int i = 0; i < count; ++i) {
const fsa_state_t *nfa_state = &ctx->nfa->states[nfa_states[i]];
for (int j = 0; j < nfa_state->count; ++j) {
const int input = nfa_state->rules[j].input;
if (EPSILON == input || handled[input])
continue;
// Get epsilon closure of the target of this rule.
get_epsilon_closure(ctx, nfa_state->rules[j].next);
// Get epsilon closure for targets of any other rules the
// current state has with this input.
for (int k = j + 1; k < nfa_state->count; ++k) {
if (input == nfa_state->rules[k].input)
get_epsilon_closure(ctx, nfa_state->rules[k].next);
}
// Do the same for all states after this one (we have
// already done them if they came before).
for (int k = i + 1; k < count; ++k) {
const fsa_state_t *nfa_state
= &ctx->nfa->states[nfa_states[k]];
for (int l = 0; l < nfa_state->count; ++l) {
if (input == nfa_state->rules[l].input)
get_epsilon_closure(ctx, nfa_state->rules[l].next);
}
}
// The buffer now contains the all states reachable via
// epsilon move or the given input -- recurse.
int new_dfa_state = convert_step(ctx);
fsa_add_rule(ctx->dfa, dfa_state, new_dfa_state, input);
handled[input] = true;
}
}
return dfa_state;
}
void convert_to_dfa(const fsa_t *nfa, fsa_t *dfa_out)
{
fsa_init(dfa_out);
conversion_context_t ctx = { .nfa = nfa, .dfa = dfa_out };
ctx.buffer.count = 0;
ctx.buffer.capacity = BUFFER_START_CAPACITY;
ctx.buffer.states = malloc(ctx.buffer.capacity * sizeof(int));
assert(NULL != ctx.buffer.states);
ctx.table.capacity = TABLE_START_CAPACITY;
ctx.table.shift = TABLE_START_SHIFT;
ctx.table.max_probe_count = 0;
ctx.table.entries = calloc(ctx.table.capacity, sizeof(table_entry_t));
assert(NULL != ctx.table.entries);
get_epsilon_closure(&ctx, nfa->initial);
ctx.dfa->initial = convert_step(&ctx);
free(ctx.buffer.states);
for (int i = 0; i < ctx.table.capacity; ++i)
free(ctx.table.entries[i].nfa_states);
free(ctx.table.entries);
}

View File

@ -117,8 +117,11 @@ static void desugar_term(regex_term_t *term)
desugar_class(term);
break;
case REGEX_TERM_LITERAL:
case REGEX_TERM_SUBEXPR:
desugar_regex(&term->subexpr);
break;
case REGEX_TERM_LITERAL:
case REGEX_TERM_EMPTY:
break;
}

View File

@ -65,3 +65,24 @@ void fsa_add_rule(fsa_t *fsa, int from, int to, int input)
rule->next = to;
++state->count;
}
bool fsa_accepts(const fsa_t *dfa, const char *input, int len)
{
const char *end = input + len;
int current = dfa->initial;
while (input < end) {
bool found = false;
const fsa_rule_t *rules = dfa->states[current].rules;
for (int i = 0; i < dfa->states[current].count; ++i) {
if (rules[i].input == *input) {
current = rules[i].next;
found = true;
break;
}
}
if (!found)
return false;
++input;
}
return dfa->states[current].final;
}

13
lib/include/compile.h Normal file
View File

@ -0,0 +1,13 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef COMPILE_H
#define COMPILE_H
#include "fsa.h"
bool compile(const char *regex, int len, fsa_t *dfa_out);
#endif

13
lib/include/convert.h Normal file
View File

@ -0,0 +1,13 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef CONVERT_H
#define CONVERT_H
#include "fsa.h"
void convert_to_dfa(const fsa_t *nfa, fsa_t *dfa_out);
#endif

View File

@ -35,4 +35,6 @@ void fsa_free(const fsa_t *fsa);
int fsa_add_state(fsa_t *fsa);
void fsa_add_rule(fsa_t *fsa, int from, int to, int input);
bool fsa_accepts(const fsa_t *dfa, const char *input, int len);
#endif

12
lib/include/min_heap.h Normal file
View File

@ -0,0 +1,12 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef MIN_HEAP_H
#define MIN_HEAP_H
void min_heap_heapify(int *xs, int count);
int min_heap_pop(int *xs, int *count);
#endif

53
lib/min_heap.c Normal file
View File

@ -0,0 +1,53 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "min_heap.h"
static inline int left(int i)
{
return 2 * i + 1;
}
static inline int parent(int i)
{
return (i - 1) / 2;
}
static inline void swap(int *xs, int a, int b)
{
int tmp = xs[a];
xs[a] = xs[b];
xs[b] = tmp;
}
static void sift_down(int *xs, int root, int count)
{
int child;
while ((child = left(root)) < count) {
if (child + 1 < count && xs[child] > xs[child + 1])
++child;
if (xs[root] > xs[child]) {
swap(xs, root, child);
root = child;
} else {
return;
}
}
}
void min_heap_heapify(int *xs, int count)
{
for (int i = parent(count - 1); i >= 0; --i)
sift_down(xs, i, count);
}
int min_heap_pop(int *xs, int *count)
{
int min = xs[0];
--(*count);
xs[0] = xs[*count];
sift_down(xs, 0, *count);
return min;
}

View File

@ -18,7 +18,10 @@ endfunction()
add_test_suites(
construct_tests.c
convert_tests.c
desugar_tests.c
fsa_tests.c
integration_tests.c
min_heap_tests.c
parse_tests.c
)

251
tests/convert_tests.c Normal file
View File

@ -0,0 +1,251 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "convert.h"
#include "testing.h"
static bool is_deterministic(const fsa_t *fsa)
{
for (int i = 0; i < fsa->count; ++i) {
bool seen[CHAR_COUNT] = { 0 };
fsa_state_t *state = &fsa->states[i];
for (int j = 0; j < state->count; ++j) {
const int input = state->rules[j].input;
if (EPSILON == input)
return false;
if (seen[input])
return false;
seen[input] = true;
}
}
return true;
}
static void test_trivial_case(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
nfa.states[b].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_REJECTS(&dfa, "aa");
ASSERT_REJECTS(&dfa, "b");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_epsilon_move(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
nfa.states[c].final = true;
fsa_add_rule(&nfa, a, b, EPSILON);
fsa_add_rule(&nfa, a, c, 'a');
fsa_add_rule(&nfa, b, c, 'b');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_ACCEPTS(&dfa, "b");
ASSERT_REJECTS(&dfa, "aa");
ASSERT_REJECTS(&dfa, "bb");
ASSERT_REJECTS(&dfa, "ab");
ASSERT_REJECTS(&dfa, "ba");
ASSERT_REJECTS(&dfa, "c");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_branch(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
const int d = fsa_add_state(&nfa);
nfa.states[d].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_add_rule(&nfa, a, c, 'a');
fsa_add_rule(&nfa, b, d, 'b');
fsa_add_rule(&nfa, c, d, 'a');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "aa");
ASSERT_ACCEPTS(&dfa, "ab");
ASSERT_REJECTS(&dfa, "a");
ASSERT_REJECTS(&dfa, "aaa");
ASSERT_REJECTS(&dfa, "abb");
ASSERT_REJECTS(&dfa, "c");
ASSERT_REJECTS(&dfa, "ac");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_nfa_a(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
const int d = fsa_add_state(&nfa);
nfa.states[c].final = true;
nfa.states[d].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_add_rule(&nfa, a, c, EPSILON);
fsa_add_rule(&nfa, b, b, 'b');
fsa_add_rule(&nfa, b, d, 'b');
fsa_add_rule(&nfa, c, b, EPSILON);
fsa_add_rule(&nfa, c, d, 'a');
fsa_add_rule(&nfa, d, c, 'a');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "");
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_ACCEPTS(&dfa, "b");
ASSERT_ACCEPTS(&dfa, "ab");
ASSERT_ACCEPTS(&dfa, "ba");
ASSERT_ACCEPTS(&dfa, "aaaab");
ASSERT_REJECTS(&dfa, "aaab");
ASSERT_REJECTS(&dfa, "aaaba");
ASSERT_REJECTS(&dfa, "aaabb");
ASSERT_REJECTS(&dfa, "aaaaab");
ASSERT_REJECTS(&dfa, "aaaaaba");
ASSERT_REJECTS(&dfa, "aaaaabb");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_nfa_b(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
const int d = fsa_add_state(&nfa);
nfa.states[c].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_add_rule(&nfa, a, c, EPSILON);
fsa_add_rule(&nfa, b, c, EPSILON);
fsa_add_rule(&nfa, c, b, 'b');
fsa_add_rule(&nfa, c, d, 'a');
fsa_add_rule(&nfa, d, b, 'a');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "");
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_ACCEPTS(&dfa, "aaaaaa");
ASSERT_ACCEPTS(&dfa, "b");
ASSERT_ACCEPTS(&dfa, "bbbbb");
ASSERT_ACCEPTS(&dfa, "aaaaaa");
ASSERT_ACCEPTS(&dfa, "aaaaabaa");
ASSERT_ACCEPTS(&dfa, "aaaaabaab");
ASSERT_REJECTS(&dfa, "ba");
ASSERT_REJECTS(&dfa, "aba");
ASSERT_REJECTS(&dfa, "abab");
ASSERT_REJECTS(&dfa, "aaaaaba");
ASSERT_REJECTS(&dfa, "aaaaabaaa");
ASSERT_REJECTS(&dfa, "aaaaabbaabbaaa");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_nfa_c(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
const int d = fsa_add_state(&nfa);
const int e = fsa_add_state(&nfa);
nfa.states[e].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_add_rule(&nfa, a, c, 'a');
fsa_add_rule(&nfa, a, d, 'b');
fsa_add_rule(&nfa, b, b, 'a');
fsa_add_rule(&nfa, b, d, 'b');
fsa_add_rule(&nfa, b, e, EPSILON);
fsa_add_rule(&nfa, d, b, 'a');
fsa_add_rule(&nfa, d, c, 'b');
fsa_add_rule(&nfa, d, d, 'a');
fsa_add_rule(&nfa, e, a, 'b');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_ACCEPTS(&dfa, "aba");
ASSERT_ACCEPTS(&dfa, "aaba");
ASSERT_ACCEPTS(&dfa, "abaaba");
ASSERT_ACCEPTS(&dfa, "ba");
ASSERT_ACCEPTS(&dfa, "babba");
ASSERT_ACCEPTS(&dfa, "baaa");
ASSERT_ACCEPTS(&dfa, "baba");
ASSERT_ACCEPTS(&dfa, "babaa");
ASSERT_REJECTS(&dfa, "");
ASSERT_REJECTS(&dfa, "ab");
ASSERT_REJECTS(&dfa, "aab");
ASSERT_REJECTS(&dfa, "abbab");
ASSERT_REJECTS(&dfa, "b");
ASSERT_REJECTS(&dfa, "bb");
ASSERT_REJECTS(&dfa, "baaabab");
ASSERT_REJECTS(&dfa, "aabababab");
fsa_free(&nfa);
fsa_free(&dfa);
}
int main(void)
{
TESTING_BEGIN();
// Base cases
test_trivial_case();
test_epsilon_move();
test_branch();
// Compound cases
test_nfa_a();
test_nfa_b();
test_nfa_c();
return TESTING_END();
}

View File

@ -287,6 +287,60 @@ static void class_abc_becomes_subexpr_a_or_b_or_c(void)
regex_free(&t);
}
static void subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner;
inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, inner->contents[0].contents[0].type);
inner = &inner->contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
int main(void)
{
TESTING_BEGIN();
@ -298,5 +352,6 @@ int main(void)
a_plus_becomes_subexpr_aa_star();
a_qmark_becomes_subexpr_empty_or_a();
class_abc_becomes_subexpr_a_or_b_or_c();
subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a();
return TESTING_END();
}

View File

@ -34,6 +34,9 @@
#define ASSERT_NOT_NULL(p) ASSERT_FALSE(NULL == (p))
#define ASSERT_MEM_EQ(p, q, n) ASSERT_FALSE(memcmp(p, q, n) != 0)
#define ASSERT_ACCEPTS(dfa, s) ASSERT_TRUE(fsa_accepts(dfa, s, strlen(s)))
#define ASSERT_REJECTS(dfa, s) ASSERT_FALSE(fsa_accepts(dfa, s, strlen(s)))
extern int fail_count;
#endif

57
tests/integration_tests.c Normal file
View File

@ -0,0 +1,57 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "compile.h"
#include "testing.h"
static void test_foo_or_bar_regex(void)
{
fsa_t dfa;
const char *regex = "foo|bar";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "foo");
ASSERT_ACCEPTS(&dfa, "bar");
ASSERT_REJECTS(&dfa, "baz");
fsa_free(&dfa);
}
static void test_even_number_of_Is_regex(void)
{
fsa_t dfa;
const char *regex = "(II)*";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "");
ASSERT_ACCEPTS(&dfa, "II");
ASSERT_ACCEPTS(&dfa, "IIII");
ASSERT_ACCEPTS(&dfa, "IIIIIIIIII");
ASSERT_REJECTS(&dfa, "III");
ASSERT_REJECTS(&dfa, "IIIII");
ASSERT_REJECTS(&dfa, "IIIIIIIII");
fsa_free(&dfa);
}
static void test_arbitrary_regex_1(void)
{
fsa_t dfa;
const char *regex = "(abc!?)*|dd+";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "abc!abcabc");
ASSERT_ACCEPTS(&dfa, "dddddddd");
ASSERT_REJECTS(&dfa, "d");
ASSERT_REJECTS(&dfa, "abcd");
fsa_free(&dfa);
}
int main(void)
{
TESTING_BEGIN();
test_foo_or_bar_regex();
test_even_number_of_Is_regex();
test_arbitrary_regex_1();
return TESTING_END();
}

49
tests/min_heap_tests.c Normal file
View File

@ -0,0 +1,49 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "min_heap.h"
#include "testing.h"
#include <stdbool.h>
static bool is_min_heap(int *xs, int count)
{
for (int i = 0; i < count; ++i) {
const int left = 2 * i + 1;
const int right = 2 * i + 2;
if (left < count && xs[left] < xs[i])
return false;
if (right < count && xs[right] < xs[i])
return false;
}
return true;
}
static void array_is_min_heap_after_heapify(void)
{
int xs[] = { 54, 12, 35, 43, 21, 12, 34, 52, 34, 23 };
const int len = sizeof(xs) / sizeof(int);
min_heap_heapify(xs, len);
ASSERT_TRUE(is_min_heap(xs, len));
}
static void extract_root_yields_min(void)
{
int xs[] = { 71, 31, 12, 21, 65, 53, 54, 10 };
int len = 8;
min_heap_heapify(xs, len);
ASSERT_EQ(10, min_heap_pop(xs, &len));
ASSERT_EQ(12, min_heap_pop(xs, &len));
ASSERT_EQ(21, min_heap_pop(xs, &len));
ASSERT_EQ(5, len);
}
int main(void)
{
TESTING_BEGIN();
array_is_min_heap_after_heapify();
extract_root_yields_min();
return TESTING_END();
}