Compare commits

...

10 Commits

16 changed files with 526 additions and 839 deletions

View File

@ -12,6 +12,11 @@
#define BUFFER_START_CAPACITY 128 #define BUFFER_START_CAPACITY 128
#define PREFIX ".*("
#define PREFIX_LEN 3
#define SUFFIX ").*"
#define SUFFIX_LEN 3
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
if (argc != 2) { if (argc != 2) {
@ -19,8 +24,15 @@ int main(int argc, char *argv[])
return EXIT_FAILURE; return EXIT_FAILURE;
} }
const int input_len = strlen(argv[1]);
const int regex_len = input_len + 6;
char *regex = malloc(regex_len);
memcpy(regex, PREFIX, PREFIX_LEN);
memcpy(regex + PREFIX_LEN, argv[1], input_len);
memcpy(regex + PREFIX_LEN + input_len, SUFFIX, SUFFIX_LEN);
fsa_t dfa; fsa_t dfa;
if (!compile(argv[1], strlen(argv[1]), &dfa)) { if (!compile(regex, regex_len, &dfa)) {
fprintf(stderr, "Failed to parse regex\n"); fprintf(stderr, "Failed to parse regex\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }

View File

@ -2,11 +2,9 @@ add_library(lib
compile.c compile.c
construct.c construct.c
convert.c convert.c
desugar.c
fsa.c fsa.c
min_heap.c min_heap.c
parse.c parse.c
regex.c
) )
set_default_target_options(lib) set_default_target_options(lib)
target_include_directories(lib PUBLIC include) target_include_directories(lib PUBLIC include)

View File

@ -5,21 +5,19 @@
#include "compile.h" #include "compile.h"
#include "parse.h"
#include "desugar.h"
#include "construct.h" #include "construct.h"
#include "convert.h" #include "convert.h"
#include "parse.h"
bool compile(const char *regex, int len, fsa_t *dfa_out) bool compile(const char *regex, int len, fsa_t *dfa_out)
{ {
regex_t pt; parse_tree_t pt;
if (-1 == parse_expr(regex, len, &pt)) if (-1 == parse_expr(regex, len, &pt))
return false; return false;
desugar_regex(&pt);
fsa_t nfa; fsa_t nfa;
construct_nfa(&pt, &nfa); construct_nfa(&pt, &nfa);
regex_free(&pt); parse_tree_free(&pt);
convert_to_dfa(&nfa, dfa_out); convert_to_dfa(&nfa, dfa_out);
fsa_free(&nfa); fsa_free(&nfa);

View File

@ -115,62 +115,119 @@ static void prepend_fsa(fsa_t *f, const fsa_t *o)
f->count = count; f->count = count;
} }
static void construct_base(fsa_t *out, int symbol) static void construct_base(fsa_t *out)
{ {
fsa_init(out); fsa_init(out);
const int id = fsa_add_state(out);
fsa_add_rule(out, id, out->initial, symbol);
out->initial = id;
out->states[0].final = true; out->states[0].final = true;
out->initial = fsa_add_state(out);
}
static void construct_symbol(fsa_t *out, int symbol)
{
construct_base(out);
fsa_add_rule(out, out->initial, 0, symbol);
}
static bool in_class(const parse_class_t *class, char c)
{
for (int i = 0; i < class->count; ++i) {
if (class->contents[i] == c)
return true;
}
return false;
}
static void construct_class(fsa_t *out, const parse_class_t *class)
{
construct_base(out);
if (class->negated) {
for (int i = 0; i < CHAR_COUNT; ++i) {
if (!in_class(class, i))
fsa_add_rule(out, out->initial, 0, i);
}
} else {
for (int i = 0; i < class->count; ++i)
fsa_add_rule(out, out->initial, 0, class->contents[i]);
}
}
static void construct_wildcard(fsa_t *out)
{
construct_base(out);
for (int i = 0; i < CHAR_COUNT; ++i)
fsa_add_rule(out, out->initial, 0, i);
}
static void base_quantify(fsa_t *out, int *init_out, int *final_out)
{
fsa_t f;
memcpy(&f, out, sizeof(fsa_t));
construct_base(out);
add_fsa(out, &f, init_out, final_out);
fsa_add_rule(out, out->initial, *init_out, EPSILON);
fsa_add_rule(out, *final_out, 0, EPSILON);
} }
static void construct_star(fsa_t *out) static void construct_star(fsa_t *out)
{ {
fsa_t f; int sub_init, sub_final;
memcpy(&f, out, sizeof(fsa_t)); base_quantify(out, &sub_init, &sub_final);
fsa_add_rule(out, sub_final, sub_init, EPSILON);
construct_base(out, EPSILON); fsa_add_rule(out, out->initial, 0, EPSILON);
int f_initial, f_final;
add_fsa(out, &f, &f_initial, &f_final);
fsa_add_rule(out, out->initial, f_initial, EPSILON);
fsa_add_rule(out, f_final, f_initial, EPSILON);
fsa_add_rule(out, f_final, 0, EPSILON);
} }
static void construct_term(const regex_term_t *term, fsa_t *out) static void construct_plus(fsa_t *out)
{
int sub_init, sub_final;
base_quantify(out, &sub_init, &sub_final);
fsa_add_rule(out, sub_final, sub_init, EPSILON);
}
static void construct_qmark(fsa_t *out)
{
int sub_init, sub_final;
base_quantify(out, &sub_init, &sub_final);
fsa_add_rule(out, out->initial, 0, EPSILON);
}
static void construct_term(const parse_term_t *term, fsa_t *out)
{ {
switch (term->type) { switch (term->type) {
case REGEX_TERM_EMPTY: case PARSE_TERM_EMPTY:
construct_base(out, EPSILON); construct_symbol(out, EPSILON);
break; break;
case REGEX_TERM_LITERAL: case PARSE_TERM_LITERAL:
construct_base(out, term->literal); construct_symbol(out, term->literal);
break; break;
case REGEX_TERM_SUBEXPR: case PARSE_TERM_SUBEXPR:
construct_nfa(&term->subexpr, out); construct_nfa(&term->subexpr, out);
break; break;
case REGEX_TERM_WILDCARD: case PARSE_TERM_CLASS:
case REGEX_TERM_CLASS: construct_class(out, &term->class);
assert(false); break;
case PARSE_TERM_WILDCARD:
construct_wildcard(out);
break; break;
} }
switch (term->quantifier) { switch (term->quantifier) {
case REGEX_QUANTIFIER_NONE: case PARSE_QUANTIFIER_NONE:
break; break;
case REGEX_QUANTIFIER_STAR: case PARSE_QUANTIFIER_STAR:
construct_star(out); construct_star(out);
break; break;
case REGEX_QUANTIFIER_PLUS: case PARSE_QUANTIFIER_PLUS:
case REGEX_QUANTIFIER_QMARK: construct_plus(out);
assert(false); break;
case PARSE_QUANTIFIER_QMARK:
construct_qmark(out);
break; break;
} }
assert(out->states[0].final); assert(out->states[0].final);
} }
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out) static void construct_sequence(const parse_sequence_t *seq, fsa_t *out)
{ {
assert(seq->count > 0); assert(seq->count > 0);
@ -204,7 +261,7 @@ static void construct_union(fsa_t *f, const fsa_t *o)
fsa_add_rule(f, final, 0, EPSILON); fsa_add_rule(f, final, 0, EPSILON);
} }
void construct_nfa(const regex_t *regex, fsa_t *out) void construct_nfa(const parse_tree_t *regex, fsa_t *out)
{ {
assert(regex->count > 0); assert(regex->count > 0);

View File

@ -1,150 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
static void desugar_class(regex_term_t *term)
{
assert(!term->class.negated);
const int count = term->class.count;
regex_sequence_t *alternatives
= malloc(count * sizeof(regex_sequence_t));
assert(NULL != alternatives);
for (int i = 0; i < count; ++i) {
regex_term_t *terms = malloc(sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = term->class.contents[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_class_free(&term->class);
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = count;
term->subexpr.contents = alternatives;
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src);
static void deep_copy_sequence(regex_sequence_t *dst, regex_sequence_t *src)
{
dst->count = dst->capacity = src->count;
dst->contents = malloc(dst->capacity * sizeof(regex_term_t));
assert(NULL != dst->contents);
for (int i = 0; i < dst->count; ++i)
deep_copy_term(&dst->contents[i], &src->contents[i]);
}
static void deep_copy_term(regex_term_t *dst, regex_term_t *src)
{
assert(REGEX_TERM_WILDCARD != src->type);
assert(REGEX_TERM_CLASS != src->type);
memcpy(dst, src, sizeof(regex_term_t));
if (REGEX_TERM_SUBEXPR == src->type) {
dst->subexpr.capacity = src->subexpr.count;
dst->subexpr.contents
= malloc(dst->subexpr.capacity * sizeof(regex_sequence_t));
assert(NULL != dst->subexpr.contents);
for (int i = 0; i < dst->subexpr.count; ++i) {
deep_copy_sequence(
&dst->subexpr.contents[i], &src->subexpr.contents[i]);
}
}
}
static void desugar_plus(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = malloc(2 * sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
memcpy(&alternatives[0].contents[0], term, sizeof(regex_term_t));
deep_copy_term(&alternatives[0].contents[1], term);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[1].quantifier = REGEX_QUANTIFIER_STAR;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 1;
term->subexpr.contents = alternatives;
}
static void desugar_qmark(regex_term_t *term)
{
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t));
assert(NULL != alternatives);
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[0].contents);
alternatives[0].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
alternatives[0].contents[0].type = REGEX_TERM_EMPTY;
alternatives[1].count = alternatives[0].capacity = 1;
alternatives[1].contents = malloc(sizeof(regex_term_t));
assert(NULL != alternatives[1].contents);
memcpy(&alternatives[1].contents[0], term, sizeof(regex_term_t));
alternatives[1].contents[0].quantifier = REGEX_QUANTIFIER_NONE;
term->quantifier = REGEX_QUANTIFIER_NONE;
term->type = REGEX_TERM_SUBEXPR;
term->subexpr.count = term->subexpr.capacity = 2;
term->subexpr.contents = alternatives;
}
static void desugar_term(regex_term_t *term)
{
switch (term->type) {
case REGEX_TERM_WILDCARD:
assert(false);
break;
case REGEX_TERM_CLASS:
desugar_class(term);
break;
case REGEX_TERM_SUBEXPR:
desugar_regex(&term->subexpr);
break;
case REGEX_TERM_LITERAL:
case REGEX_TERM_EMPTY:
break;
}
switch (term->quantifier) {
case REGEX_QUANTIFIER_PLUS:
desugar_plus(term);
break;
case REGEX_QUANTIFIER_QMARK:
desugar_qmark(term);
break;
case REGEX_QUANTIFIER_NONE:
case REGEX_QUANTIFIER_STAR:
break;
}
}
void desugar_regex(regex_t *regex)
{
for (int i = 0; i < regex->count; ++i) {
for (int j = 0; j < regex->contents[i].count; ++j) {
desugar_term(&regex->contents[i].contents[j]);
}
}
}

View File

@ -7,8 +7,8 @@
#define CONSTRUCT_H #define CONSTRUCT_H
#include "fsa.h" #include "fsa.h"
#include "regex.h" #include "parse.h"
void construct_nfa(const regex_t *regex, fsa_t *out); void construct_nfa(const parse_tree_t *regex, fsa_t *out);
#endif #endif

View File

@ -1,13 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef DESUGAR_H
#define DESUGAR_H
#include "regex.h"
void desugar_regex(regex_t *regex);
#endif

View File

@ -6,10 +6,53 @@
#ifndef PARSE_H #ifndef PARSE_H
#define PARSE_H #define PARSE_H
#include "regex.h" #include <stdbool.h>
#define PARSE_FAIL (-1) #define PARSE_FAIL (-1)
int parse_expr(const char *input, int rem, regex_t *out); typedef struct {
bool negated;
int count, capacity;
char *contents;
} parse_class_t;
typedef enum {
PARSE_QUANTIFIER_NONE,
PARSE_QUANTIFIER_STAR,
PARSE_QUANTIFIER_PLUS,
PARSE_QUANTIFIER_QMARK,
} parse_quantifier_t;
typedef enum {
PARSE_TERM_WILDCARD,
PARSE_TERM_CLASS,
PARSE_TERM_LITERAL,
PARSE_TERM_SUBEXPR,
PARSE_TERM_EMPTY,
} parse_term_type_t;
struct _parse_term;
typedef struct {
int count, capacity;
struct _parse_term *contents;
} parse_sequence_t;
typedef struct {
int count, capacity;
parse_sequence_t *contents;
} parse_tree_t;
typedef struct _parse_term {
parse_quantifier_t quantifier;
parse_term_type_t type;
union {
parse_class_t class;
char literal;
parse_tree_t subexpr;
};
} parse_term_t;
int parse_expr(const char *input, int rem, parse_tree_t *out);
void parse_tree_free(const parse_tree_t *t);
#endif #endif

View File

@ -1,56 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#ifndef REGEX_H
#define REGEX_H
#include <stdbool.h>
typedef struct {
bool negated;
int count, capacity;
char *contents;
} regex_class_t;
typedef enum {
REGEX_QUANTIFIER_NONE,
REGEX_QUANTIFIER_STAR,
REGEX_QUANTIFIER_PLUS,
REGEX_QUANTIFIER_QMARK,
} regex_quantifier_t;
typedef enum {
REGEX_TERM_WILDCARD,
REGEX_TERM_CLASS,
REGEX_TERM_LITERAL,
REGEX_TERM_SUBEXPR,
REGEX_TERM_EMPTY,
} regex_term_type_t;
struct _regex_term;
typedef struct {
int count, capacity;
struct _regex_term *contents;
} regex_sequence_t;
typedef struct {
int count, capacity;
regex_sequence_t *contents;
} regex_t;
typedef struct _regex_term {
regex_quantifier_t quantifier;
regex_term_type_t type;
union {
regex_class_t class;
char literal;
regex_t subexpr;
};
} regex_term_t;
void regex_free(const regex_t *t);
void regex_class_free(const regex_class_t *c);
#endif

View File

@ -45,7 +45,7 @@ static int parse_literal(const char *input, int rem, char *out)
} }
} }
static int parse_class(const char *input, int rem, regex_class_t *out) static int parse_class(const char *input, int rem, parse_class_t *out)
{ {
int result, used = 0; int result, used = 0;
@ -87,7 +87,7 @@ static int parse_class(const char *input, int rem, regex_class_t *out)
return out->count > 0 ? used : -1; return out->count > 0 ? used : -1;
} }
static int parse_term(const char *input, int rem, regex_term_t *out) static int parse_term(const char *input, int rem, parse_term_t *out)
{ {
int result, used = 0; int result, used = 0;
@ -95,7 +95,7 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
return PARSE_FAIL; return PARSE_FAIL;
if ('.' == input[0]) { if ('.' == input[0]) {
out->type = REGEX_TERM_WILDCARD; out->type = PARSE_TERM_WILDCARD;
++used; ++used;
} else if ('(' == input[0]) { } else if ('(' == input[0]) {
++used; ++used;
@ -103,7 +103,7 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
result = parse_expr(input + used, rem - used, &out->subexpr); result = parse_expr(input + used, rem - used, &out->subexpr);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = REGEX_TERM_SUBEXPR; out->type = PARSE_TERM_SUBEXPR;
used += result; used += result;
if (')' != input[used]) if (')' != input[used])
@ -113,54 +113,54 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
result = parse_class(input + used, rem - used, &out->class); result = parse_class(input + used, rem - used, &out->class);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = REGEX_TERM_CLASS; out->type = PARSE_TERM_CLASS;
used += result; used += result;
} else { } else {
result = parse_literal(input + used, rem - used, &out->literal); result = parse_literal(input + used, rem - used, &out->literal);
if (PARSE_FAIL == result) if (PARSE_FAIL == result)
return PARSE_FAIL; return PARSE_FAIL;
out->type = REGEX_TERM_LITERAL; out->type = PARSE_TERM_LITERAL;
used += result; used += result;
} }
if (used < rem) { if (used < rem) {
switch (input[used]) { switch (input[used]) {
case '*': case '*':
out->quantifier = REGEX_QUANTIFIER_STAR; out->quantifier = PARSE_QUANTIFIER_STAR;
++used; ++used;
break; break;
case '+': case '+':
out->quantifier = REGEX_QUANTIFIER_PLUS; out->quantifier = PARSE_QUANTIFIER_PLUS;
++used; ++used;
break; break;
case '?': case '?':
out->quantifier = REGEX_QUANTIFIER_QMARK; out->quantifier = PARSE_QUANTIFIER_QMARK;
++used; ++used;
break; break;
default: default:
out->quantifier = REGEX_QUANTIFIER_NONE; out->quantifier = PARSE_QUANTIFIER_NONE;
} }
} else { } else {
out->quantifier = REGEX_QUANTIFIER_NONE; out->quantifier = PARSE_QUANTIFIER_NONE;
} }
return used; return used;
} }
static int parse_sequence(const char *input, int rem, regex_sequence_t *out) static int parse_sequence(const char *input, int rem, parse_sequence_t *out)
{ {
int result, used = 0; int result, used = 0;
out->count = 0; out->count = 0;
out->capacity = SEQUENCE_START_CAPACITY; out->capacity = SEQUENCE_START_CAPACITY;
out->contents = malloc(out->capacity * sizeof(regex_term_t)); out->contents = malloc(out->capacity * sizeof(parse_term_t));
assert(NULL != out->contents); assert(NULL != out->contents);
while (used < rem) { while (used < rem) {
if (out->count >= out->capacity) { if (out->count >= out->capacity) {
out->capacity *= 2; out->capacity *= 2;
out->contents = realloc( out->contents = realloc(
out->contents, out->capacity * sizeof(regex_term_t)); out->contents, out->capacity * sizeof(parse_term_t));
assert(NULL != out->contents); assert(NULL != out->contents);
} }
@ -175,13 +175,13 @@ static int parse_sequence(const char *input, int rem, regex_sequence_t *out)
return out->count > 0 ? used : -1; return out->count > 0 ? used : -1;
} }
int parse_expr(const char *input, int rem, regex_t *out) int parse_expr(const char *input, int rem, parse_tree_t *out)
{ {
int result, used = 0; int result, used = 0;
out->count = 0; out->count = 0;
out->capacity = TREE_START_CAPACITY; out->capacity = TREE_START_CAPACITY;
out->contents = malloc(out->capacity * sizeof(regex_sequence_t)); out->contents = malloc(out->capacity * sizeof(parse_sequence_t));
assert(NULL != out->contents); assert(NULL != out->contents);
result = parse_sequence(input + used, rem - used, &out->contents[0]); result = parse_sequence(input + used, rem - used, &out->contents[0]);
@ -198,7 +198,7 @@ int parse_expr(const char *input, int rem, regex_t *out)
if (out->count >= out->capacity) { if (out->count >= out->capacity) {
out->capacity *= 2; out->capacity *= 2;
out->contents = realloc( out->contents = realloc(
out->contents, out->capacity * sizeof(regex_sequence_t)); out->contents, out->capacity * sizeof(parse_sequence_t));
assert(NULL != out->contents); assert(NULL != out->contents);
} }
@ -212,3 +212,37 @@ int parse_expr(const char *input, int rem, regex_t *out)
return used; return used;
} }
static void class_free(const parse_class_t *c)
{
if (NULL != c->contents)
free(c->contents);
}
static void sequence_free(const parse_sequence_t *s)
{
if (NULL != s->contents) {
for (int i = 0; i < s->count; ++i) {
switch (s->contents[i].type) {
case PARSE_TERM_CLASS:
class_free(&s->contents[i].class);
break;
case PARSE_TERM_SUBEXPR:
parse_tree_free(&s->contents[i].subexpr);
break;
default:
break;
}
}
free(s->contents);
}
}
void parse_tree_free(const parse_tree_t *t)
{
if (NULL != t->contents) {
for (int i = 0; i < t->count; ++i)
sequence_free(&t->contents[i]);
free(t->contents);
}
}

View File

@ -1,42 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "regex.h"
#include <stdlib.h>
static void sequence_free(const regex_sequence_t *s)
{
if (NULL != s->contents) {
for (int i = 0; i < s->count; ++i) {
switch (s->contents[i].type) {
case REGEX_TERM_CLASS:
regex_class_free(&s->contents[i].class);
break;
case REGEX_TERM_SUBEXPR:
regex_free(&s->contents[i].subexpr);
break;
default:
break;
}
}
free(s->contents);
}
}
void regex_free(const regex_t *t)
{
if (NULL != t->contents) {
for (int i = 0; i < t->count; ++i)
sequence_free(&t->contents[i]);
free(t->contents);
}
}
void regex_class_free(const regex_class_t *c)
{
if (NULL != c->contents)
free(c->contents);
}

View File

@ -19,7 +19,6 @@ endfunction()
add_test_suites( add_test_suites(
construct_tests.c construct_tests.c
convert_tests.c convert_tests.c
desugar_tests.c
fsa_tests.c fsa_tests.c
integration_tests.c integration_tests.c
min_heap_tests.c min_heap_tests.c

View File

@ -34,13 +34,13 @@ static bool accepts(const fsa_t *nfa, const char *input)
static void test_empty_expression(void) static void test_empty_expression(void)
{ {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_EMPTY; terms[0].type = PARSE_TERM_EMPTY;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
const regex_t regex const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives }; = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
@ -49,20 +49,45 @@ static void test_empty_expression(void)
ASSERT_TRUE(accepts(&fsa, "")); ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "a"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_wildcard(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_WILDCARD;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_TRUE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_literal_expression(void) static void test_literal_expression(void)
{ {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
const regex_t regex const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives }; = { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
@ -71,26 +96,27 @@ static void test_literal_expression(void)
ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_sequence(void) static void test_sequence(void)
{ {
regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); parse_term_t *terms = malloc(3 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].quantifier = PARSE_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL; terms[1].type = PARSE_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
terms[2].quantifier = REGEX_QUANTIFIER_NONE; terms[2].quantifier = PARSE_QUANTIFIER_NONE;
terms[2].type = REGEX_TERM_LITERAL; terms[2].type = PARSE_TERM_LITERAL;
terms[2].literal = 'c'; terms[2].literal = 'c';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@ -101,24 +127,25 @@ static void test_sequence(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "abcd")); ASSERT_FALSE(accepts(&fsa, "abcd"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_union(void) static void test_union(void)
{ {
const char *literals = "abc"; const char *literals = "abc";
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(3 * sizeof(parse_sequence_t));
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = literals[i]; terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1; alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms; alternatives[i].contents = terms;
} }
regex_t regex = { .count = 3, .capacity = 3, .contents = alternatives }; parse_tree_t regex
= { .count = 3, .capacity = 3, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@ -129,20 +156,21 @@ static void test_union(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "aa")); ASSERT_FALSE(accepts(&fsa, "aa"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_star(void) static void test_star(void)
{ {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_STAR; terms[0].quantifier = PARSE_QUANTIFIER_STAR;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@ -152,29 +180,78 @@ static void test_star(void)
ASSERT_TRUE(accepts(&fsa, "aaaaaa")); ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_plus(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_PLUS;
terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "aaaaaa"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_qmark(void)
{
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_QMARK;
terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a';
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, ""));
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "aa"));
ASSERT_FALSE(accepts(&fsa, "b"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_subexpression(void) static void test_subexpression(void)
{ {
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *inner_terms = malloc(1 * sizeof(parse_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms[0].type = REGEX_TERM_LITERAL; inner_terms[0].type = PARSE_TERM_LITERAL;
inner_terms[0].literal = 'a'; inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives parse_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t)); = malloc(1 * sizeof(parse_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms; inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR; terms[0].type = PARSE_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1; terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives; terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@ -182,42 +259,108 @@ static void test_subexpression(void)
ASSERT_TRUE(accepts(&fsa, "a")); ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b")); ASSERT_FALSE(accepts(&fsa, "b"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_class(void)
{
char *class_contents = malloc(3);
class_contents[0] = 'a';
class_contents[1] = 'b';
class_contents[2] = 'c';
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_CLASS;
terms[0].class.negated = false;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = class_contents;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "a"));
ASSERT_TRUE(accepts(&fsa, "b"));
ASSERT_TRUE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
ASSERT_FALSE(accepts(&fsa, "d"));
parse_tree_free(&regex);
fsa_free(&fsa);
}
static void test_negated_class(void)
{
char *class_contents = malloc(3);
class_contents[0] = 'a';
class_contents[1] = 'b';
class_contents[2] = 'c';
parse_term_t *terms = malloc(1 * sizeof(parse_term_t));
terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = PARSE_TERM_CLASS;
terms[0].class.negated = true;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = class_contents;
parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
const parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa;
construct_nfa(&regex, &fsa);
ASSERT_TRUE(accepts(&fsa, "d"));
ASSERT_TRUE(accepts(&fsa, "e"));
ASSERT_FALSE(accepts(&fsa, "a"));
ASSERT_FALSE(accepts(&fsa, "b"));
ASSERT_FALSE(accepts(&fsa, "c"));
ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "aa"));
parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_sequence_containing_starred_union(void) static void test_sequence_containing_starred_union(void)
{ {
// ab(c|d)* // ab(c|d)*
regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t)); parse_term_t *inner_terms0 = malloc(1 * sizeof(parse_term_t));
inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms0[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms0[0].type = REGEX_TERM_LITERAL; inner_terms0[0].type = PARSE_TERM_LITERAL;
inner_terms0[0].literal = 'c'; inner_terms0[0].literal = 'c';
regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t)); parse_term_t *inner_terms1 = malloc(1 * sizeof(parse_term_t));
inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms1[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms1[0].type = REGEX_TERM_LITERAL; inner_terms1[0].type = PARSE_TERM_LITERAL;
inner_terms1[0].literal = 'd'; inner_terms1[0].literal = 'd';
regex_sequence_t *inner_alternatives parse_sequence_t *inner_alternatives
= malloc(2 * sizeof(regex_sequence_t)); = malloc(2 * sizeof(parse_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0; inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1; inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1; inner_alternatives[1].contents = inner_terms1;
regex_term_t *terms = malloc(3 * sizeof(regex_term_t)); parse_term_t *terms = malloc(3 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL; terms[0].type = PARSE_TERM_LITERAL;
terms[0].literal = 'a'; terms[0].literal = 'a';
terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].quantifier = PARSE_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL; terms[1].type = PARSE_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
terms[2].quantifier = REGEX_QUANTIFIER_STAR; terms[2].quantifier = PARSE_QUANTIFIER_STAR;
terms[2].type = REGEX_TERM_SUBEXPR; terms[2].type = PARSE_TERM_SUBEXPR;
terms[2].subexpr.count = terms[2].subexpr.capacity = 2; terms[2].subexpr.count = terms[2].subexpr.capacity = 2;
terms[2].subexpr.contents = inner_alternatives; terms[2].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3; alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@ -233,7 +376,7 @@ static void test_sequence_containing_starred_union(void)
ASSERT_FALSE(accepts(&fsa, "d")); ASSERT_FALSE(accepts(&fsa, "d"));
ASSERT_FALSE(accepts(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "foo"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
@ -241,23 +384,24 @@ static void
test_union_of_single_term_and_sequence_containing_starred_term(void) test_union_of_single_term_and_sequence_containing_starred_term(void)
{ {
// a|b*c // a|b*c
regex_term_t *terms0 = malloc(1 * sizeof(regex_term_t)); parse_term_t *terms0 = malloc(1 * sizeof(parse_term_t));
terms0[0].quantifier = REGEX_QUANTIFIER_NONE; terms0[0].quantifier = PARSE_QUANTIFIER_NONE;
terms0[0].type = REGEX_TERM_LITERAL; terms0[0].type = PARSE_TERM_LITERAL;
terms0[0].literal = 'a'; terms0[0].literal = 'a';
regex_term_t *terms1 = malloc(2 * sizeof(regex_term_t)); parse_term_t *terms1 = malloc(2 * sizeof(parse_term_t));
terms1[0].quantifier = REGEX_QUANTIFIER_STAR; terms1[0].quantifier = PARSE_QUANTIFIER_STAR;
terms1[0].type = REGEX_TERM_LITERAL; terms1[0].type = PARSE_TERM_LITERAL;
terms1[0].literal = 'b'; terms1[0].literal = 'b';
terms1[1].quantifier = REGEX_QUANTIFIER_NONE; terms1[1].quantifier = PARSE_QUANTIFIER_NONE;
terms1[1].type = REGEX_TERM_LITERAL; terms1[1].type = PARSE_TERM_LITERAL;
terms1[1].literal = 'c'; terms1[1].literal = 'c';
regex_sequence_t *alternatives = malloc(2 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(2 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1; alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms0; alternatives[0].contents = terms0;
alternatives[1].count = alternatives[1].capacity = 2; alternatives[1].count = alternatives[1].capacity = 2;
alternatives[1].contents = terms1; alternatives[1].contents = terms1;
regex_t regex = { .count = 2, .capacity = 2, .contents = alternatives }; parse_tree_t regex
= { .count = 2, .capacity = 2, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@ -269,38 +413,39 @@ test_union_of_single_term_and_sequence_containing_starred_term(void)
ASSERT_FALSE(accepts(&fsa, "foo")); ASSERT_FALSE(accepts(&fsa, "foo"));
ASSERT_FALSE(accepts(&fsa, "ba")); ASSERT_FALSE(accepts(&fsa, "ba"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
static void test_sequence_of_subexpr_a_or_empty_and_b(void) static void test_sequence_of_subexpr_a_or_empty_and_b(void)
{ {
// (a|ε)b // (a|ε)b
regex_term_t *inner_terms0 = malloc(1 * sizeof(regex_term_t)); parse_term_t *inner_terms0 = malloc(1 * sizeof(parse_term_t));
inner_terms0[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms0[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms0[0].type = REGEX_TERM_LITERAL; inner_terms0[0].type = PARSE_TERM_LITERAL;
inner_terms0[0].literal = 'a'; inner_terms0[0].literal = 'a';
regex_term_t *inner_terms1 = malloc(1 * sizeof(regex_term_t)); parse_term_t *inner_terms1 = malloc(1 * sizeof(parse_term_t));
inner_terms1[0].quantifier = REGEX_QUANTIFIER_NONE; inner_terms1[0].quantifier = PARSE_QUANTIFIER_NONE;
inner_terms1[0].type = REGEX_TERM_EMPTY; inner_terms1[0].type = PARSE_TERM_EMPTY;
regex_sequence_t *inner_alternatives parse_sequence_t *inner_alternatives
= malloc(2 * sizeof(regex_sequence_t)); = malloc(2 * sizeof(parse_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1; inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms0; inner_alternatives[0].contents = inner_terms0;
inner_alternatives[1].count = inner_alternatives[1].capacity = 1; inner_alternatives[1].count = inner_alternatives[1].capacity = 1;
inner_alternatives[1].contents = inner_terms1; inner_alternatives[1].contents = inner_terms1;
regex_term_t *terms = malloc(2 * sizeof(regex_term_t)); parse_term_t *terms = malloc(2 * sizeof(parse_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE; terms[0].quantifier = PARSE_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR; terms[0].type = PARSE_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 2; terms[0].subexpr.count = terms[0].subexpr.capacity = 2;
terms[0].subexpr.contents = inner_alternatives; terms[0].subexpr.contents = inner_alternatives;
terms[1].quantifier = REGEX_QUANTIFIER_NONE; terms[1].quantifier = PARSE_QUANTIFIER_NONE;
terms[1].type = REGEX_TERM_LITERAL; terms[1].type = PARSE_TERM_LITERAL;
terms[1].literal = 'b'; terms[1].literal = 'b';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t)); parse_sequence_t *alternatives = malloc(1 * sizeof(parse_sequence_t));
alternatives[0].count = alternatives[0].capacity = 2; alternatives[0].count = alternatives[0].capacity = 2;
alternatives[0].contents = terms; alternatives[0].contents = terms;
regex_t regex = { .count = 1, .capacity = 1, .contents = alternatives }; parse_tree_t regex
= { .count = 1, .capacity = 1, .contents = alternatives };
fsa_t fsa; fsa_t fsa;
construct_nfa(&regex, &fsa); construct_nfa(&regex, &fsa);
@ -310,7 +455,7 @@ static void test_sequence_of_subexpr_a_or_empty_and_b(void)
ASSERT_FALSE(accepts(&fsa, "")); ASSERT_FALSE(accepts(&fsa, ""));
ASSERT_FALSE(accepts(&fsa, "a")); ASSERT_FALSE(accepts(&fsa, "a"));
regex_free(&regex); parse_tree_free(&regex);
fsa_free(&fsa); fsa_free(&fsa);
} }
@ -321,10 +466,15 @@ int main(void)
// Base cases // Base cases
test_empty_expression(); test_empty_expression();
test_literal_expression(); test_literal_expression();
test_wildcard();
test_sequence(); test_sequence();
test_union(); test_union();
test_star(); test_star();
test_plus();
test_qmark();
test_subexpression(); test_subexpression();
test_class();
test_negated_class();
// Compound expressions // Compound expressions
test_sequence_containing_starred_union(); test_sequence_containing_starred_union();

View File

@ -1,357 +0,0 @@
/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "desugar.h"
#include "testing.h"
#include <stddef.h>
static void a_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t);
}
static void abc_is_unchanged(void)
{
regex_term_t *terms = malloc(3 * sizeof(regex_term_t));
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
terms[1].type = REGEX_TERM_LITERAL;
terms[1].literal = 'b';
terms[2].type = REGEX_TERM_LITERAL;
terms[2].literal = 'c';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 3;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(3, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[2].type);
ASSERT_EQ('c', t.contents[0].contents[2].literal);
regex_free(&t);
}
static void a_star_is_unchanged(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_STAR;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t);
}
static void a_or_b_or_c_is_unchanged(void)
{
const char *literals = "abc";
regex_sequence_t *alternatives = malloc(3 * sizeof(regex_sequence_t));
for (int i = 0; i < 3; ++i) {
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = literals[i];
alternatives[i].count = alternatives[i].capacity = 1;
alternatives[i].contents = terms;
}
regex_t t = { .count = 3, .capacity = 3, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(3, t.count);
ASSERT_NOT_NULL(t.contents);
for (int i = 0; i < 3; ++i) {
ASSERT_EQ(1, t.contents[i].count);
ASSERT_NOT_NULL(t.contents[i].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, t.contents[i].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[i].contents[0].type);
ASSERT_EQ(literals[i], t.contents[i].contents[0].literal);
}
regex_free(&t);
}
static void subexpr_a_is_unchanged(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_NONE;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
regex_free(&t);
}
static void a_plus_becomes_subexpr_aa_star(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_PLUS;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(2, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(
REGEX_QUANTIFIER_STAR, inner->contents[0].contents[1].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[1].type);
ASSERT_EQ('a', inner->contents[0].contents[1].literal);
regex_free(&t);
}
static void a_qmark_becomes_subexpr_empty_or_a(void)
{
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
terms[0].type = REGEX_TERM_LITERAL;
terms[0].literal = 'a';
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
static void class_abc_becomes_subexpr_a_or_b_or_c(void)
{
char *options = malloc(3 * sizeof(char));
options[0] = 'a';
options[1] = 'b';
options[2] = 'c';
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_CLASS;
terms[0].class.negated = false;
terms[0].class.count = terms[0].class.capacity = 3;
terms[0].class.contents = options;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_NOT_NULL(t.contents[0].contents);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(3, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('b', inner->contents[1].contents[0].literal);
ASSERT_EQ(1, inner->contents[2].count);
ASSERT_NOT_NULL(inner->contents[2].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[2].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[2].contents[0].type);
ASSERT_EQ('c', inner->contents[2].contents[0].literal);
regex_free(&t);
}
static void subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a(void)
{
regex_term_t *inner_terms = malloc(1 * sizeof(regex_term_t));
inner_terms[0].quantifier = REGEX_QUANTIFIER_QMARK;
inner_terms[0].type = REGEX_TERM_LITERAL;
inner_terms[0].literal = 'a';
regex_sequence_t *inner_alternatives
= malloc(1 * sizeof(regex_sequence_t));
inner_alternatives[0].count = inner_alternatives[0].capacity = 1;
inner_alternatives[0].contents = inner_terms;
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
terms[0].type = REGEX_TERM_SUBEXPR;
terms[0].subexpr.count = terms[0].subexpr.capacity = 1;
terms[0].subexpr.contents = inner_alternatives;
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
alternatives[0].count = alternatives[0].capacity = 1;
alternatives[0].contents = terms;
regex_t t = { .count = 1, .capacity = 1, .contents = alternatives };
desugar_regex(&t);
ASSERT_EQ(1, t.count);
ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner;
inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->count);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, inner->contents[0].contents[0].type);
inner = &inner->contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count);
ASSERT_NOT_NULL(inner->contents);
ASSERT_EQ(1, inner->contents[0].count);
ASSERT_NOT_NULL(inner->contents[0].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_EMPTY, inner->contents[0].contents[0].type);
ASSERT_EQ(1, inner->contents[1].count);
ASSERT_NOT_NULL(inner->contents[1].contents);
ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('a', inner->contents[1].contents[0].literal);
regex_free(&t);
}
int main(void)
{
TESTING_BEGIN();
a_is_unchanged();
abc_is_unchanged();
a_star_is_unchanged();
a_or_b_or_c_is_unchanged();
subexpr_a_is_unchanged();
a_plus_becomes_subexpr_aa_star();
a_qmark_becomes_subexpr_empty_or_a();
class_abc_becomes_subexpr_a_or_b_or_c();
subexpr_a_qmark_becomes_subexpr_subexpr_empty_or_a();
return TESTING_END();
}

View File

@ -74,6 +74,19 @@ static void test_system_header_include_regex(void)
fsa_free(&dfa); fsa_free(&dfa);
} }
static void test_quoted_string_regex(void)
{
fsa_t dfa;
const char *regex = "'(\\\\'|[^'])*'";
const bool success = compile(regex, strlen(regex), &dfa);
ASSERT_TRUE(success);
ASSERT_ACCEPTS(&dfa, "''");
ASSERT_ACCEPTS(&dfa, "'foo bar baz'");
ASSERT_ACCEPTS(&dfa, "'foo \\'bar\\' baz'");
ASSERT_REJECTS(&dfa, "'foo 'bar' baz'");
fsa_free(&dfa);
}
int main(void) int main(void)
{ {
TESTING_BEGIN(); TESTING_BEGIN();
@ -82,5 +95,6 @@ int main(void)
test_arbitrary_regex_1(); test_arbitrary_regex_1();
test_arbitrary_regex_2(); test_arbitrary_regex_2();
test_system_header_include_regex(); test_system_header_include_regex();
test_quoted_string_regex();
return TESTING_END(); return TESTING_END();
} }

View File

@ -10,268 +10,268 @@
static void a_has_1_alternative(void) static void a_has_1_alternative(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("a", &t); const int result = PARSE_EXPR_STRING("a", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
regex_free(&t); parse_tree_free(&t);
} }
static void a_pipe_b_has_2_alternatives(void) static void a_pipe_b_has_2_alternatives(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("a|b", &t); const int result = PARSE_EXPR_STRING("a|b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(2, t.count); ASSERT_EQ(2, t.count);
regex_free(&t); parse_tree_free(&t);
} }
static void a_pipe_b_pipe_c_has_3_alternatives(void) static void a_pipe_b_pipe_c_has_3_alternatives(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("a|b|c", &t); const int result = PARSE_EXPR_STRING("a|b|c", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(3, t.count); ASSERT_EQ(3, t.count);
regex_free(&t); parse_tree_free(&t);
} }
static void a_is_parsed_as_unquantified_literal(void) static void a_is_parsed_as_unquantified_literal(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("a", &t); const int result = PARSE_EXPR_STRING("a", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal); ASSERT_EQ('a', t.contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void b_is_parsed_as_unquantified_literal(void) static void b_is_parsed_as_unquantified_literal(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("b", &t); const int result = PARSE_EXPR_STRING("b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('b', t.contents[0].contents[0].literal); ASSERT_EQ('b', t.contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void abc_is_parsed_as_sequence_of_unquantified_literals(void) static void abc_is_parsed_as_sequence_of_unquantified_literals(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("abc", &t); const int result = PARSE_EXPR_STRING("abc", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(3, t.contents[0].count); ASSERT_EQ(3, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('a', t.contents[0].contents[0].literal); ASSERT_EQ('a', t.contents[0].contents[0].literal);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal); ASSERT_EQ('b', t.contents[0].contents[1].literal);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[2].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[2].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[2].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[2].type);
ASSERT_EQ('c', t.contents[0].contents[2].literal); ASSERT_EQ('c', t.contents[0].contents[2].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void dot_is_parsed_as_unquantified_wildcard_term(void) static void dot_is_parsed_as_unquantified_wildcard_term(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING(".", &t); const int result = PARSE_EXPR_STRING(".", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t); parse_tree_free(&t);
} }
static void backslash_dot_is_parsed_as_unquantified_literal(void) static void backslash_dot_is_parsed_as_unquantified_literal(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("\\.", &t); const int result = PARSE_EXPR_STRING("\\.", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('.', t.contents[0].contents[0].literal); ASSERT_EQ('.', t.contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void backslash_backslash_is_parsed_as_unquantified_literal(void) static void backslash_backslash_is_parsed_as_unquantified_literal(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("\\\\", &t); const int result = PARSE_EXPR_STRING("\\\\", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[0].type);
ASSERT_EQ('\\', t.contents[0].contents[0].literal); ASSERT_EQ('\\', t.contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void a_pipe_b_in_parens_is_parsed_as_subexpr_term(void) static void a_pipe_b_in_parens_is_parsed_as_subexpr_term(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("(a|b)", &t); const int result = PARSE_EXPR_STRING("(a|b)", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_SUBEXPR, t.contents[0].contents[0].type);
const regex_t *inner = &t.contents[0].contents[0].subexpr; const parse_tree_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(2, inner->count); ASSERT_EQ(2, inner->count);
ASSERT_EQ(1, inner->contents[0].count); ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ( ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); PARSE_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal); ASSERT_EQ('a', inner->contents[0].contents[0].literal);
ASSERT_EQ(1, inner->contents[1].count); ASSERT_EQ(1, inner->contents[1].count);
ASSERT_EQ( ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier); PARSE_QUANTIFIER_NONE, inner->contents[1].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[1].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[1].contents[0].type);
ASSERT_EQ('b', inner->contents[1].contents[0].literal); ASSERT_EQ('b', inner->contents[1].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(void) static void a_in_parens_b_is_parsed_as_sequence_with_subexpr_term(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("(a)b", &t); const int result = PARSE_EXPR_STRING("(a)b", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(2, t.contents[0].count); ASSERT_EQ(2, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_SUBEXPR, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_SUBEXPR, t.contents[0].contents[0].type);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[1].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, t.contents[0].contents[1].type); ASSERT_EQ(PARSE_TERM_LITERAL, t.contents[0].contents[1].type);
ASSERT_EQ('b', t.contents[0].contents[1].literal); ASSERT_EQ('b', t.contents[0].contents[1].literal);
const regex_t *inner = &t.contents[0].contents[0].subexpr; const parse_tree_t *inner = &t.contents[0].contents[0].subexpr;
ASSERT_EQ(1, inner->contents[0].count); ASSERT_EQ(1, inner->contents[0].count);
ASSERT_EQ( ASSERT_EQ(
REGEX_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier); PARSE_QUANTIFIER_NONE, inner->contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_LITERAL, inner->contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_LITERAL, inner->contents[0].contents[0].type);
ASSERT_EQ('a', inner->contents[0].contents[0].literal); ASSERT_EQ('a', inner->contents[0].contents[0].literal);
regex_free(&t); parse_tree_free(&t);
} }
static void dot_star_is_parsed_as_star_quantified_wildcard(void) static void dot_star_is_parsed_as_star_quantified_wildcard(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING(".*", &t); const int result = PARSE_EXPR_STRING(".*", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_STAR, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t); parse_tree_free(&t);
} }
static void dot_plus_is_parsed_as_plus_quantified_wildcard(void) static void dot_plus_is_parsed_as_plus_quantified_wildcard(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING(".+", &t); const int result = PARSE_EXPR_STRING(".+", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_PLUS, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_PLUS, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t); parse_tree_free(&t);
} }
static void dot_question_mark_is_parsed_as_qmrk_quantified_wildcard(void) static void dot_question_mark_is_parsed_as_qmrk_quantified_wildcard(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING(".?", &t); const int result = PARSE_EXPR_STRING(".?", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_QMARK, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_QMARK, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_WILDCARD, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_WILDCARD, t.contents[0].contents[0].type);
regex_free(&t); parse_tree_free(&t);
} }
static void a_in_brackets_is_parsed_as_class_containing_only_a(void) static void a_in_brackets_is_parsed_as_class_containing_only_a(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("[a]", &t); const int result = PARSE_EXPR_STRING("[a]", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_CLASS, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_CLASS, t.contents[0].contents[0].type);
ASSERT_FALSE(t.contents[0].contents[0].class.negated); ASSERT_FALSE(t.contents[0].contents[0].class.negated);
ASSERT_EQ(1, t.contents[0].contents[0].class.count); ASSERT_EQ(1, t.contents[0].contents[0].class.count);
ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents); ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents);
ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]); ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]);
regex_free(&t); parse_tree_free(&t);
} }
static void caret_a_in_brackets_parses_as_negated_class(void) static void caret_a_in_brackets_parses_as_negated_class(void)
{ {
regex_t t; parse_tree_t t;
const int result = PARSE_EXPR_STRING("[^a]", &t); const int result = PARSE_EXPR_STRING("[^a]", &t);
ASSERT_NE(-1, result); ASSERT_NE(-1, result);
ASSERT_EQ(1, t.count); ASSERT_EQ(1, t.count);
ASSERT_NOT_NULL(t.contents); ASSERT_NOT_NULL(t.contents);
ASSERT_EQ(1, t.contents[0].count); ASSERT_EQ(1, t.contents[0].count);
ASSERT_EQ(REGEX_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier); ASSERT_EQ(PARSE_QUANTIFIER_NONE, t.contents[0].contents[0].quantifier);
ASSERT_EQ(REGEX_TERM_CLASS, t.contents[0].contents[0].type); ASSERT_EQ(PARSE_TERM_CLASS, t.contents[0].contents[0].type);
ASSERT_TRUE(t.contents[0].contents[0].class.negated); ASSERT_TRUE(t.contents[0].contents[0].class.negated);
ASSERT_EQ(1, t.contents[0].contents[0].class.count); ASSERT_EQ(1, t.contents[0].contents[0].class.count);
ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents); ASSERT_NOT_NULL(t.contents[0].contents[0].class.contents);
ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]); ASSERT_EQ('a', t.contents[0].contents[0].class.contents[0]);
regex_free(&t); parse_tree_free(&t);
} }
int main(void) int main(void)