Compare commits

...

10 Commits

Author SHA1 Message Date
cdo 55c78fe706 Free DFA at end of integration tests 2024-11-02 17:24:40 +00:00
cdo e4a9ac6ad8 Add build manifest for CI 2024-11-02 17:24:40 +00:00
cdo 7f5aa1766c Create some integration tests 2024-11-02 17:24:40 +00:00
cdo 5dbcaaaf40 Add ASSERT_ACCEPTS and ASSERT_REJECTS testing macros 2024-11-02 17:24:40 +00:00
cdo c6f0cf6381 Recurse on subexpression when desugaring 2024-11-02 17:24:40 +00:00
cdo c935279def Make demo program 2024-11-02 17:24:39 +00:00
cdo 18271a2988 Create compile module combining passes together 2024-11-02 16:23:44 +00:00
cdo 018aec5339 Move procedure for running NFA into FSA module 2024-11-02 16:23:44 +00:00
cdo 557ab451a8 Implement conversion from NFA to DFA 2024-11-02 16:23:44 +00:00
cdo 6b52d4d9cd Implement min heap 2024-11-02 14:15:22 +00:00
21 changed files with 918 additions and 2 deletions
+18
View File
@@ -0,0 +1,18 @@
image: alpine/edge
packages:
- clang
- cmake
- compiler-rt
- ninja
sources:
- https://git.sr.ht/~cdo/regex-engine
tasks:
- configure: |
cd regex-engine
cmake -GNinja -Bbuild -DSANITIZERS=on -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang
- build: |
cd regex-engine
scripts/build.sh
- test: |
cd regex-engine
scripts/test.sh
+1
View File
@@ -17,3 +17,4 @@ endfunction()
add_subdirectory(lib) add_subdirectory(lib)
add_subdirectory(tests) add_subdirectory(tests)
add_subdirectory(demo)
+1 -1
View File
@@ -27,7 +27,7 @@ tests. I use Clang but the code is ISO C11, it should compile just
fine with GCC. You might need to faff with CMakeLists.txt to get it fine with GCC. You might need to faff with CMakeLists.txt to get it
to work with another compiler due to command-line flag nonsense. to work with another compiler due to command-line flag nonsense.
scripts/build.sh # Compile library and tests scripts/build.sh # Compile library, demo and tests
scripts/test.sh # Run tests scripts/test.sh # Run tests
There is also an entr.sh script which will watch all the project's There is also an entr.sh script which will watch all the project's
+3
View File
@@ -0,0 +1,3 @@
add_executable(shitgrep shitgrep.c)
set_default_target_options(shitgrep)
target_link_libraries(shitgrep PRIVATE lib)
+53
View File
@@ -0,0 +1,53 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "compile.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_START_CAPACITY 128
int main(int argc, char *argv[])
{
if (argc != 2) {
fprintf(stderr, "Usage: %s REGEX\n", argv[0]);
return EXIT_FAILURE;
}
fsa_t dfa;
if (!compile(argv[1], strlen(argv[1]), &dfa)) {
fprintf(stderr, "Failed to parse regex\n");
return EXIT_FAILURE;
}
int len = 0, capacity = BUFFER_START_CAPACITY;
char *buffer = malloc(capacity);
assert(NULL != buffer);
int c;
while ((c = getchar()) != EOF) {
if (capacity < len + 1) {
capacity *= 2;
buffer = realloc(buffer, capacity);
assert(NULL != buffer);
}
if ('\n' == c) {
if (fsa_accepts(&dfa, buffer, len)) {
buffer[len++] = '\n';
fwrite(buffer, 1, len, stdout);
}
len = 0;
} else {
buffer[len++] = c;
}
}
fsa_free(&dfa);
free(buffer);
return EXIT_SUCCESS;
}
+3
View File
@@ -1,7 +1,10 @@
add_library(lib add_library(lib
compile.c
construct.c construct.c
convert.c
desugar.c desugar.c
fsa.c fsa.c
min_heap.c
parse.c parse.c
regex.c regex.c
) )
+28
View File
@@ -0,0 +1,28 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "compile.h"
#include "parse.h"
#include "desugar.h"
#include "construct.h"
#include "convert.h"
bool compile(const char *regex, int len, fsa_t *dfa_out)
{
regex_t pt;
if (-1 == parse_expr(regex, len, &pt))
return false;
desugar_regex(&pt);
fsa_t nfa;
construct_nfa(&pt, &nfa);
regex_free(&pt);
convert_to_dfa(&nfa, dfa_out);
fsa_free(&nfa);
return true;
}
+275
View File
@@ -0,0 +1,275 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "convert.h"
#include "min_heap.h"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_START_CAPACITY 8
#define TABLE_START_CAPACITY 32
#define TABLE_START_SHIFT 27 // 32 - log_2(TABLE_START_CAPACITY)
#define TABLE_WRAP_COEFF 2654435769 // Closest odd number to 2^32 / φ
#define TABLE_DOUBLING_THRESHOLD 6
typedef struct {
int count, capacity, *states;
} buffer_t;
typedef struct {
int probe_count, nfa_state_count, dfa_state, *nfa_states;
} table_entry_t;
typedef struct {
int capacity, shift, max_probe_count;
table_entry_t *entries;
} table_t;
typedef struct {
const fsa_t *nfa;
fsa_t *dfa;
buffer_t buffer;
table_t table;
} conversion_context_t;
static bool add_state(buffer_t *buffer, int nfa_state)
{
for (int i = 0; i < buffer->count; ++i) {
if (nfa_state == buffer->states[i])
return false;
}
if (buffer->capacity < buffer->count + 1) {
buffer->capacity *= 2;
buffer->states
= realloc(buffer->states, buffer->capacity * sizeof(int));
assert(NULL != buffer->states);
}
buffer->states[buffer->count++] = nfa_state;
return true;
}
static void get_epsilon_closure(conversion_context_t *ctx, int nfa_state)
{
if (!add_state(&ctx->buffer, nfa_state))
return;
for (int i = 0; i < ctx->nfa->states[nfa_state].count; ++i) {
const fsa_rule_t *rule = &ctx->nfa->states[nfa_state].rules[i];
if (EPSILON == rule->input)
get_epsilon_closure(ctx, rule->next);
}
}
static int *move_buffer_sorted(buffer_t *buffer)
{
int *states, *p;
p = states = malloc(buffer->count * sizeof(int));
assert(NULL != states);
min_heap_heapify(buffer->states, buffer->count);
do
*p++ = min_heap_pop(buffer->states, &buffer->count);
while (0 < buffer->count);
return states;
}
static uint32_t hash(const int *states, int count)
{
assert(count > 0);
uint32_t x = states[0];
for (int i = 1; i < count; ++i)
x ^= (uint32_t)states[i];
return x;
}
static uint32_t wrap(uint32_t hash, int probe_count, int shift)
{
hash += probe_count;
hash *= TABLE_WRAP_COEFF;
return hash >> shift;
}
static bool lookup(
const table_t *table, const int *nfa_states, int count,
int *dfa_state_out)
{
const uint32_t h = hash(nfa_states, count);
for (int i = 0; i <= table->max_probe_count; ++i) {
const uint32_t loc = wrap(h, i, table->shift);
const table_entry_t *entry = &table->entries[loc];
if (entry->nfa_state_count != count)
continue;
int size = count * sizeof(int);
if (memcmp(entry->nfa_states, nfa_states, size) == 0) {
*dfa_state_out = entry->dfa_state;
return true;
}
}
return false;
}
static void insert(table_t *table, int *nfa_states, int count, int dfa_state)
{
uint32_t h = hash(nfa_states, count);
for (int i = 0; i < TABLE_DOUBLING_THRESHOLD; ++i) {
const uint32_t loc = wrap(h, i, table->shift);
table_entry_t *entry = &table->entries[loc];
if (0 == entry->nfa_state_count) {
// Slot is empty: insert the entry here.
entry->nfa_states = nfa_states;
entry->nfa_state_count = count;
entry->dfa_state = dfa_state;
entry->probe_count = i;
if (entry->probe_count > table->max_probe_count)
table->max_probe_count = entry->probe_count;
return;
} else if (entry->probe_count < i) {
// Slot contains entry with lesser probe count: steal the
// slot for the current entry.
table_entry_t tmp;
memcpy(&tmp, entry, sizeof(table_entry_t));
entry->nfa_states = nfa_states;
entry->nfa_state_count = count;
entry->dfa_state = dfa_state;
entry->probe_count = i;
if (entry->probe_count > table->max_probe_count)
table->max_probe_count = entry->probe_count;
// Continue with the slot's previous entry.
nfa_states = tmp.nfa_states;
count = tmp.nfa_state_count;
dfa_state = tmp.dfa_state;
i = tmp.probe_count;
h = hash(nfa_states, count);
}
}
// Double the capacity of the table.
table_entry_t *entries = table->entries;
const int old_capacity = table->capacity;
--table->shift;
table->capacity *= 2;
table->entries = calloc(table->capacity, sizeof(table_entry_t));
assert(NULL != table->entries);
for (int i = 0; i < old_capacity; ++i) {
if (0 != entries[i].nfa_state_count)
continue;
insert(
table, entries[i].nfa_states, entries[i].nfa_state_count,
entries[i].dfa_state);
}
free(entries);
}
static bool lookup_or_create(
conversion_context_t *ctx, int *nfa_states, int count,
int *dfa_state_out)
{
// Check if the DFA state for these NFA states already exists.
if (lookup(&ctx->table, nfa_states, count, dfa_state_out))
return false;
// Create the DFA state, marking it as final if any of the NFA
// states are final.
const int dfa_state = fsa_add_state(ctx->dfa);
for (int i = 0; i < count; ++i) {
if (ctx->nfa->states[nfa_states[i]].final) {
ctx->dfa->states[dfa_state].final = true;
break;
}
}
// Insert the DFA state into the table under the NFA states.
insert(&ctx->table, nfa_states, count, dfa_state);
*dfa_state_out = dfa_state;
return true;
}
int convert_step(conversion_context_t *ctx)
{
assert(0 != ctx->buffer.count);
int count = ctx->buffer.count;
int *nfa_states = move_buffer_sorted(&ctx->buffer);
int dfa_state;
if (!lookup_or_create(ctx, nfa_states, count, &dfa_state)) {
// Base case: state already exists.
free(nfa_states);
return dfa_state;
}
bool handled[CHAR_COUNT] = { 0 };
for (int i = 0; i < count; ++i) {
const fsa_state_t *nfa_state = &ctx->nfa->states[nfa_states[i]];
for (int j = 0; j < nfa_state->count; ++j) {
const int input = nfa_state->rules[j].input;
if (EPSILON == input || handled[input])
continue;
// Get epsilon closure of the target of this rule.
get_epsilon_closure(ctx, nfa_state->rules[j].next);
// Get epsilon closure for targets of any other rules the
// current state has with this input.
for (int k = j + 1; k < nfa_state->count; ++k) {
if (input == nfa_state->rules[k].input)
get_epsilon_closure(ctx, nfa_state->rules[k].next);
}
// Do the same for all states after this one (we have
// already done them if they came before).
for (int k = i + 1; k < count; ++k) {
const fsa_state_t *nfa_state
= &ctx->nfa->states[nfa_states[k]];
for (int l = 0; l < nfa_state->count; ++l) {
if (input == nfa_state->rules[l].input)
get_epsilon_closure(ctx, nfa_state->rules[l].next);
}
}
// The buffer now contains the all states reachable via
// epsilon move or the given input -- recurse.
int new_dfa_state = convert_step(ctx);
fsa_add_rule(ctx->dfa, dfa_state, new_dfa_state, input);
handled[input] = true;
}
}
return dfa_state;
}
void convert_to_dfa(const fsa_t *nfa, fsa_t *dfa_out)
{
fsa_init(dfa_out);
conversion_context_t ctx = { .nfa = nfa, .dfa = dfa_out };
ctx.buffer.count = 0;
ctx.buffer.capacity = BUFFER_START_CAPACITY;
ctx.buffer.states = malloc(ctx.buffer.capacity * sizeof(int));
assert(NULL != ctx.buffer.states);
ctx.table.capacity = TABLE_START_CAPACITY;
ctx.table.shift = TABLE_START_SHIFT;
ctx.table.max_probe_count = 0;
ctx.table.entries = calloc(ctx.table.capacity, sizeof(table_entry_t));
assert(NULL != ctx.table.entries);
get_epsilon_closure(&ctx, nfa->initial);
ctx.dfa->initial = convert_step(&ctx);
free(ctx.buffer.states);
for (int i = 0; i < ctx.table.capacity; ++i)
free(ctx.table.entries[i].nfa_states);
free(ctx.table.entries);
}
+4 -1
View File
@@ -117,8 +117,11 @@ static void desugar_term(regex_term_t *term)
desugar_class(term); desugar_class(term);
break; break;
case REGEX_TERM_LITERAL:
case REGEX_TERM_SUBEXPR: case REGEX_TERM_SUBEXPR:
desugar_regex(&term->subexpr);
break;
case REGEX_TERM_LITERAL:
case REGEX_TERM_EMPTY: case REGEX_TERM_EMPTY:
break; break;
} }
+21
View File
@@ -65,3 +65,24 @@ void fsa_add_rule(fsa_t *fsa, int from, int to, int input)
rule->next = to; rule->next = to;
++state->count; ++state->count;
} }
bool fsa_accepts(const fsa_t *dfa, const char *input, int len)
{
const char *end = input + len;
int current = dfa->initial;
while (input < end) {
bool found = false;
const fsa_rule_t *rules = dfa->states[current].rules;
for (int i = 0; i < dfa->states[current].count; ++i) {
if (rules[i].input == *input) {
current = rules[i].next;
found = true;
break;
}
}
if (!found)
return false;
++input;
}
return dfa->states[current].final;
}
+13
View File
@@ -0,0 +1,13 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef COMPILE_H
#define COMPILE_H
#include "fsa.h"
bool compile(const char *regex, int len, fsa_t *dfa_out);
#endif
+13
View File
@@ -0,0 +1,13 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef CONVERT_H
#define CONVERT_H
#include "fsa.h"
void convert_to_dfa(const fsa_t *nfa, fsa_t *dfa_out);
#endif
+2
View File
@@ -35,4 +35,6 @@ void fsa_free(const fsa_t *fsa);
int fsa_add_state(fsa_t *fsa); int fsa_add_state(fsa_t *fsa);
void fsa_add_rule(fsa_t *fsa, int from, int to, int input); void fsa_add_rule(fsa_t *fsa, int from, int to, int input);
bool fsa_accepts(const fsa_t *dfa, const char *input, int len);
#endif #endif
+12
View File
@@ -0,0 +1,12 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef MIN_HEAP_H
#define MIN_HEAP_H
void min_heap_heapify(int *xs, int count);
int min_heap_pop(int *xs, int *count);
#endif
+53
View File
@@ -0,0 +1,53 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "min_heap.h"
static inline int left(int i)
{
return 2 * i + 1;
}
static inline int parent(int i)
{
return (i - 1) / 2;
}
static inline void swap(int *xs, int a, int b)
{
int tmp = xs[a];
xs[a] = xs[b];
xs[b] = tmp;
}
static void sift_down(int *xs, int root, int count)
{
int child;
while ((child = left(root)) < count) {
if (child + 1 < count && xs[child] > xs[child + 1])
++child;
if (xs[root] > xs[child]) {
swap(xs, root, child);
root = child;
} else {
return;
}
}
}
void min_heap_heapify(int *xs, int count)
{
for (int i = parent(count - 1); i >= 0; --i)
sift_down(xs, i, count);
}
int min_heap_pop(int *xs, int *count)
{
int min = xs[0];
--(*count);
xs[0] = xs[*count];
sift_down(xs, 0, *count);
return min;
}
+3
View File
@@ -18,7 +18,10 @@ endfunction()
add_test_suites( add_test_suites(
construct_tests.c construct_tests.c
convert_tests.c
desugar_tests.c desugar_tests.c
fsa_tests.c fsa_tests.c
integration_tests.c
min_heap_tests.c
parse_tests.c parse_tests.c
) )
+251
View File
@@ -0,0 +1,251 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "convert.h"
#include "testing.h"
static bool is_deterministic(const fsa_t *fsa)
{
for (int i = 0; i < fsa->count; ++i) {
bool seen[CHAR_COUNT] = { 0 };
fsa_state_t *state = &fsa->states[i];
for (int j = 0; j < state->count; ++j) {
const int input = state->rules[j].input;
if (EPSILON == input)
return false;
if (seen[input])
return false;
seen[input] = true;
}
}
return true;
}
static void test_trivial_case(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
nfa.states[b].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_REJECTS(&dfa, "aa");
ASSERT_REJECTS(&dfa, "b");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_epsilon_move(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
nfa.states[c].final = true;
fsa_add_rule(&nfa, a, b, EPSILON);
fsa_add_rule(&nfa, a, c, 'a');
fsa_add_rule(&nfa, b, c, 'b');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_ACCEPTS(&dfa, "b");
ASSERT_REJECTS(&dfa, "aa");
ASSERT_REJECTS(&dfa, "bb");
ASSERT_REJECTS(&dfa, "ab");
ASSERT_REJECTS(&dfa, "ba");
ASSERT_REJECTS(&dfa, "c");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_branch(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
const int d = fsa_add_state(&nfa);
nfa.states[d].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_add_rule(&nfa, a, c, 'a');
fsa_add_rule(&nfa, b, d, 'b');
fsa_add_rule(&nfa, c, d, 'a');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "aa");
ASSERT_ACCEPTS(&dfa, "ab");
ASSERT_REJECTS(&dfa, "a");
ASSERT_REJECTS(&dfa, "aaa");
ASSERT_REJECTS(&dfa, "abb");
ASSERT_REJECTS(&dfa, "c");
ASSERT_REJECTS(&dfa, "ac");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_nfa_a(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
const int d = fsa_add_state(&nfa);
nfa.states[c].final = true;
nfa.states[d].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_add_rule(&nfa, a, c, EPSILON);
fsa_add_rule(&nfa, b, b, 'b');
fsa_add_rule(&nfa, b, d, 'b');
fsa_add_rule(&nfa, c, b, EPSILON);
fsa_add_rule(&nfa, c, d, 'a');
fsa_add_rule(&nfa, d, c, 'a');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "");
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_ACCEPTS(&dfa, "b");
ASSERT_ACCEPTS(&dfa, "ab");
ASSERT_ACCEPTS(&dfa, "ba");
ASSERT_ACCEPTS(&dfa, "aaaab");
ASSERT_REJECTS(&dfa, "aaab");
ASSERT_REJECTS(&dfa, "aaaba");
ASSERT_REJECTS(&dfa, "aaabb");
ASSERT_REJECTS(&dfa, "aaaaab");
ASSERT_REJECTS(&dfa, "aaaaaba");
ASSERT_REJECTS(&dfa, "aaaaabb");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_nfa_b(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
const int d = fsa_add_state(&nfa);
nfa.states[c].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_add_rule(&nfa, a, c, EPSILON);
fsa_add_rule(&nfa, b, c, EPSILON);
fsa_add_rule(&nfa, c, b, 'b');
fsa_add_rule(&nfa, c, d, 'a');
fsa_add_rule(&nfa, d, b, 'a');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "");
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_ACCEPTS(&dfa, "aaaaaa");
ASSERT_ACCEPTS(&dfa, "b");
ASSERT_ACCEPTS(&dfa, "bbbbb");
ASSERT_ACCEPTS(&dfa, "aaaaaa");
ASSERT_ACCEPTS(&dfa, "aaaaabaa");
ASSERT_ACCEPTS(&dfa, "aaaaabaab");
ASSERT_REJECTS(&dfa, "ba");
ASSERT_REJECTS(&dfa, "aba");
ASSERT_REJECTS(&dfa, "abab");
ASSERT_REJECTS(&dfa, "aaaaaba");
ASSERT_REJECTS(&dfa, "aaaaabaaa");
ASSERT_REJECTS(&dfa, "aaaaabbaabbaaa");
fsa_free(&nfa);
fsa_free(&dfa);
}
static void test_nfa_c(void)
{
fsa_t nfa;
fsa_init(&nfa);
const int a = nfa.initial;
const int b = fsa_add_state(&nfa);
const int c = fsa_add_state(&nfa);
const int d = fsa_add_state(&nfa);
const int e = fsa_add_state(&nfa);
nfa.states[e].final = true;
fsa_add_rule(&nfa, a, b, 'a');
fsa_add_rule(&nfa, a, c, 'a');
fsa_add_rule(&nfa, a, d, 'b');
fsa_add_rule(&nfa, b, b, 'a');
fsa_add_rule(&nfa, b, d, 'b');
fsa_add_rule(&nfa, b, e, EPSILON);
fsa_add_rule(&nfa, d, b, 'a');
fsa_add_rule(&nfa, d, c, 'b');
fsa_add_rule(&nfa, d, d, 'a');
fsa_add_rule(&nfa, e, a, 'b');
fsa_t dfa;
convert_to_dfa(&nfa, &dfa);
ASSERT_TRUE(is_deterministic(&dfa));
ASSERT_ACCEPTS(&dfa, "a");
ASSERT_ACCEPTS(&dfa, "aba");
ASSERT_ACCEPTS(&dfa, "aaba");
ASSERT_ACCEPTS(&dfa, "abaaba");
ASSERT_ACCEPTS(&dfa, "ba");
ASSERT_ACCEPTS(&dfa, "babba");
ASSERT_ACCEPTS(&dfa, "baaa");
ASSERT_ACCEPTS(&dfa, "baba");
ASSERT_ACCEPTS(&dfa, "babaa");
ASSERT_REJECTS(&dfa, "");
ASSERT_REJECTS(&dfa, "ab");
ASSERT_REJECTS(&dfa, "aab");
ASSERT_REJECTS(&dfa, "abbab");
ASSERT_REJECTS(&dfa, "b");
ASSERT_REJECTS(&dfa, "bb");
ASSERT_REJECTS(&dfa, "baaabab");
ASSERT_REJECTS(&dfa, "aabababab");
fsa_free(&nfa);
fsa_free(&dfa);
}
int main(void)
{
TESTING_BEGIN();
// Base cases
test_trivial_case();
test_epsilon_move();
test_branch();
// Compound cases
test_nfa_a();
test_nfa_b();
test_nfa_c();
return TESTING_END();
}
+55
View File
@@ -287,6 +287,60 @@ static void class_abc_becomes_subexpr_a_or_b_or_c(void)
regex_free(&t); regex_free(&t);
} }
static void subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner;
inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, inner->contents[0].contents[0].type);
inner = &inner->contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
int main(void) int main(void)
{ {
TESTING_BEGIN(); TESTING_BEGIN();
@@ -298,5 +352,6 @@ int main(void)
a_plus_becomes_subexpr_aa_star(); a_plus_becomes_subexpr_aa_star();
a_qmark_becomes_subexpr_empty_or_a(); a_qmark_becomes_subexpr_empty_or_a();
class_abc_becomes_subexpr_a_or_b_or_c(); class_abc_becomes_subexpr_a_or_b_or_c();
subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a();
return TESTING_END(); return TESTING_END();
} }
+3
View File
@@ -34,6 +34,9 @@
#define ASSERT_NOT_NULL(p) ASSERT_FALSE(NULL == (p)) #define ASSERT_NOT_NULL(p) ASSERT_FALSE(NULL == (p))
#define ASSERT_MEM_EQ(p, q, n) ASSERT_FALSE(memcmp(p, q, n) != 0) #define ASSERT_MEM_EQ(p, q, n) ASSERT_FALSE(memcmp(p, q, n) != 0)
#define ASSERT_ACCEPTS(dfa, s) ASSERT_TRUE(fsa_accepts(dfa, s, strlen(s)))
#define ASSERT_REJECTS(dfa, s) ASSERT_FALSE(fsa_accepts(dfa, s, strlen(s)))
extern int fail_count; extern int fail_count;
#endif #endif
+57
View File
@@ -0,0 +1,57 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "compile.h"
#include "testing.h"
static void test_foo_or_bar_regex(void)
{
fsa_t dfa;
const char *regex = "foo|bar";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "foo");
ASSERT_ACCEPTS(&dfa, "bar");
ASSERT_REJECTS(&dfa, "baz");
fsa_free(&dfa);
}
static void test_even_number_of_Is_regex(void)
{
fsa_t dfa;
const char *regex = "(II)*";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "");
ASSERT_ACCEPTS(&dfa, "II");
ASSERT_ACCEPTS(&dfa, "IIII");
ASSERT_ACCEPTS(&dfa, "IIIIIIIIII");
ASSERT_REJECTS(&dfa, "III");
ASSERT_REJECTS(&dfa, "IIIII");
ASSERT_REJECTS(&dfa, "IIIIIIIII");
fsa_free(&dfa);
}
static void test_arbitrary_regex_1(void)
{
fsa_t dfa;
const char *regex = "(abc!?)*|dd+";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "abc!abcabc");
ASSERT_ACCEPTS(&dfa, "dddddddd");
ASSERT_REJECTS(&dfa, "d");
ASSERT_REJECTS(&dfa, "abcd");
fsa_free(&dfa);
}
int main(void)
{
TESTING_BEGIN();
test_foo_or_bar_regex();
test_even_number_of_Is_regex();
test_arbitrary_regex_1();
return TESTING_END();
}
+49
View File
@@ -0,0 +1,49 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "min_heap.h"
#include "testing.h"
#include <stdbool.h>
static bool is_min_heap(int *xs, int count)
{
for (int i = 0; i < count; ++i) {
const int left = 2 * i + 1;
const int right = 2 * i + 2;
if (left < count && xs[left] < xs[i])
return false;
if (right < count && xs[right] < xs[i])
return false;
}
return true;
}
static void array_is_min_heap_after_heapify(void)
{
int xs[] = { 54, 12, 35, 43, 21, 12, 34, 52, 34, 23 };
const int len = sizeof(xs) / sizeof(int);
min_heap_heapify(xs, len);
ASSERT_TRUE(is_min_heap(xs, len));
}
static void extract_root_yields_min(void)
{
int xs[] = { 71, 31, 12, 21, 65, 53, 54, 10 };
int len = 8;
min_heap_heapify(xs, len);
ASSERT_EQ(10, min_heap_pop(xs, &len));
ASSERT_EQ(12, min_heap_pop(xs, &len));
ASSERT_EQ(21, min_heap_pop(xs, &len));
ASSERT_EQ(5, len);
}
int main(void)
{
TESTING_BEGIN();
array_is_min_heap_after_heapify();
extract_root_yields_min();
return TESTING_END();
}