Move regex_t into parse.h and rename to parse_tree_t
This commit is contained in:
@@ -5,7 +5,6 @@ add_library(lib
|
||||
fsa.c
|
||||
min_heap.c
|
||||
parse.c
|
||||
regex.c
|
||||
)
|
||||
set_default_target_options(lib)
|
||||
target_include_directories(lib PUBLIC include)
|
||||
|
||||
@@ -11,13 +11,13 @@
|
||||
|
||||
bool compile(const char *regex, int len, fsa_t *dfa_out)
|
||||
{
|
||||
regex_t pt;
|
||||
parse_tree_t pt;
|
||||
if (-1 == parse_expr(regex, len, &pt))
|
||||
return false;
|
||||
|
||||
fsa_t nfa;
|
||||
construct_nfa(&pt, &nfa);
|
||||
regex_free(&pt);
|
||||
parse_tree_free(&pt);
|
||||
|
||||
convert_to_dfa(&nfa, dfa_out);
|
||||
fsa_free(&nfa);
|
||||
|
||||
@@ -128,7 +128,7 @@ static void construct_symbol(fsa_t *out, int symbol)
|
||||
fsa_add_rule(out, out->initial, 0, symbol);
|
||||
}
|
||||
|
||||
static bool in_class(const regex_class_t *class, char c)
|
||||
static bool in_class(const parse_class_t *class, char c)
|
||||
{
|
||||
for (int i = 0; i < class->count; ++i) {
|
||||
if (class->contents[i] == c)
|
||||
@@ -137,7 +137,7 @@ static bool in_class(const regex_class_t *class, char c)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void construct_class(fsa_t *out, const regex_class_t *class)
|
||||
static void construct_class(fsa_t *out, const parse_class_t *class)
|
||||
{
|
||||
construct_base(out);
|
||||
if (class->negated) {
|
||||
@@ -190,36 +190,36 @@ static void construct_qmark(fsa_t *out)
|
||||
fsa_add_rule(out, out->initial, 0, EPSILON);
|
||||
}
|
||||
|
||||
static void construct_term(const regex_term_t *term, fsa_t *out)
|
||||
static void construct_term(const parse_term_t *term, fsa_t *out)
|
||||
{
|
||||
switch (term->type) {
|
||||
case REGEX_TERM_EMPTY:
|
||||
case PARSE_TERM_EMPTY:
|
||||
construct_symbol(out, EPSILON);
|
||||
break;
|
||||
case REGEX_TERM_LITERAL:
|
||||
case PARSE_TERM_LITERAL:
|
||||
construct_symbol(out, term->literal);
|
||||
break;
|
||||
case REGEX_TERM_SUBEXPR:
|
||||
case PARSE_TERM_SUBEXPR:
|
||||
construct_nfa(&term->subexpr, out);
|
||||
break;
|
||||
case REGEX_TERM_CLASS:
|
||||
case PARSE_TERM_CLASS:
|
||||
construct_class(out, &term->class);
|
||||
break;
|
||||
case REGEX_TERM_WILDCARD:
|
||||
case PARSE_TERM_WILDCARD:
|
||||
construct_wildcard(out);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (term->quantifier) {
|
||||
case REGEX_QUANTIFIER_NONE:
|
||||
case PARSE_QUANTIFIER_NONE:
|
||||
break;
|
||||
case REGEX_QUANTIFIER_STAR:
|
||||
case PARSE_QUANTIFIER_STAR:
|
||||
construct_star(out);
|
||||
break;
|
||||
case REGEX_QUANTIFIER_PLUS:
|
||||
case PARSE_QUANTIFIER_PLUS:
|
||||
construct_plus(out);
|
||||
break;
|
||||
case REGEX_QUANTIFIER_QMARK:
|
||||
case PARSE_QUANTIFIER_QMARK:
|
||||
construct_qmark(out);
|
||||
break;
|
||||
}
|
||||
@@ -227,7 +227,7 @@ static void construct_term(const regex_term_t *term, fsa_t *out)
|
||||
assert(out->states[0].final);
|
||||
}
|
||||
|
||||
static void construct_sequence(const regex_sequence_t *seq, fsa_t *out)
|
||||
static void construct_sequence(const parse_sequence_t *seq, fsa_t *out)
|
||||
{
|
||||
assert(seq->count > 0);
|
||||
|
||||
@@ -261,7 +261,7 @@ static void construct_union(fsa_t *f, const fsa_t *o)
|
||||
fsa_add_rule(f, final, 0, EPSILON);
|
||||
}
|
||||
|
||||
void construct_nfa(const regex_t *regex, fsa_t *out)
|
||||
void construct_nfa(const parse_tree_t *regex, fsa_t *out)
|
||||
{
|
||||
assert(regex->count > 0);
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
#define CONSTRUCT_H
|
||||
|
||||
#include "fsa.h"
|
||||
#include "regex.h"
|
||||
#include "parse.h"
|
||||
|
||||
void construct_nfa(const regex_t *regex, fsa_t *out);
|
||||
void construct_nfa(const parse_tree_t *regex, fsa_t *out);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -6,10 +6,53 @@
|
||||
#ifndef PARSE_H
|
||||
#define PARSE_H
|
||||
|
||||
#include "regex.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
#define PARSE_FAIL (-1)
|
||||
|
||||
int parse_expr(const char *input, int rem, regex_t *out);
|
||||
typedef struct {
|
||||
bool negated;
|
||||
int count, capacity;
|
||||
char *contents;
|
||||
} parse_class_t;
|
||||
|
||||
typedef enum {
|
||||
PARSE_QUANTIFIER_NONE,
|
||||
PARSE_QUANTIFIER_STAR,
|
||||
PARSE_QUANTIFIER_PLUS,
|
||||
PARSE_QUANTIFIER_QMARK,
|
||||
} parse_quantifier_t;
|
||||
|
||||
typedef enum {
|
||||
PARSE_TERM_WILDCARD,
|
||||
PARSE_TERM_CLASS,
|
||||
PARSE_TERM_LITERAL,
|
||||
PARSE_TERM_SUBEXPR,
|
||||
PARSE_TERM_EMPTY,
|
||||
} parse_term_type_t;
|
||||
|
||||
struct _parse_term;
|
||||
typedef struct {
|
||||
int count, capacity;
|
||||
struct _parse_term *contents;
|
||||
} parse_sequence_t;
|
||||
|
||||
typedef struct {
|
||||
int count, capacity;
|
||||
parse_sequence_t *contents;
|
||||
} parse_tree_t;
|
||||
|
||||
typedef struct _parse_term {
|
||||
parse_quantifier_t quantifier;
|
||||
parse_term_type_t type;
|
||||
union {
|
||||
parse_class_t class;
|
||||
char literal;
|
||||
parse_tree_t subexpr;
|
||||
};
|
||||
} parse_term_t;
|
||||
|
||||
int parse_expr(const char *input, int rem, parse_tree_t *out);
|
||||
void parse_tree_free(const parse_tree_t *t);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) Camden Dixie O'Brien
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
#ifndef REGEX_H
|
||||
#define REGEX_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef struct {
|
||||
bool negated;
|
||||
int count, capacity;
|
||||
char *contents;
|
||||
} regex_class_t;
|
||||
|
||||
typedef enum {
|
||||
REGEX_QUANTIFIER_NONE,
|
||||
REGEX_QUANTIFIER_STAR,
|
||||
REGEX_QUANTIFIER_PLUS,
|
||||
REGEX_QUANTIFIER_QMARK,
|
||||
} regex_quantifier_t;
|
||||
|
||||
typedef enum {
|
||||
REGEX_TERM_WILDCARD,
|
||||
REGEX_TERM_CLASS,
|
||||
REGEX_TERM_LITERAL,
|
||||
REGEX_TERM_SUBEXPR,
|
||||
REGEX_TERM_EMPTY,
|
||||
} regex_term_type_t;
|
||||
|
||||
struct _regex_term;
|
||||
typedef struct {
|
||||
int count, capacity;
|
||||
struct _regex_term *contents;
|
||||
} regex_sequence_t;
|
||||
|
||||
typedef struct {
|
||||
int count, capacity;
|
||||
regex_sequence_t *contents;
|
||||
} regex_t;
|
||||
|
||||
typedef struct _regex_term {
|
||||
regex_quantifier_t quantifier;
|
||||
regex_term_type_t type;
|
||||
union {
|
||||
regex_class_t class;
|
||||
char literal;
|
||||
regex_t subexpr;
|
||||
};
|
||||
} regex_term_t;
|
||||
|
||||
void regex_free(const regex_t *t);
|
||||
void regex_class_free(const regex_class_t *c);
|
||||
|
||||
#endif
|
||||
68
lib/parse.c
68
lib/parse.c
@@ -45,7 +45,7 @@ static int parse_literal(const char *input, int rem, char *out)
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_class(const char *input, int rem, regex_class_t *out)
|
||||
static int parse_class(const char *input, int rem, parse_class_t *out)
|
||||
{
|
||||
int result, used = 0;
|
||||
|
||||
@@ -87,7 +87,7 @@ static int parse_class(const char *input, int rem, regex_class_t *out)
|
||||
return out->count > 0 ? used : -1;
|
||||
}
|
||||
|
||||
static int parse_term(const char *input, int rem, regex_term_t *out)
|
||||
static int parse_term(const char *input, int rem, parse_term_t *out)
|
||||
{
|
||||
int result, used = 0;
|
||||
|
||||
@@ -95,7 +95,7 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
|
||||
return PARSE_FAIL;
|
||||
|
||||
if ('.' == input[0]) {
|
||||
out->type = REGEX_TERM_WILDCARD;
|
||||
out->type = PARSE_TERM_WILDCARD;
|
||||
++used;
|
||||
} else if ('(' == input[0]) {
|
||||
++used;
|
||||
@@ -103,7 +103,7 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
|
||||
result = parse_expr(input + used, rem - used, &out->subexpr);
|
||||
if (PARSE_FAIL == result)
|
||||
return PARSE_FAIL;
|
||||
out->type = REGEX_TERM_SUBEXPR;
|
||||
out->type = PARSE_TERM_SUBEXPR;
|
||||
used += result;
|
||||
|
||||
if (')' != input[used])
|
||||
@@ -113,54 +113,54 @@ static int parse_term(const char *input, int rem, regex_term_t *out)
|
||||
result = parse_class(input + used, rem - used, &out->class);
|
||||
if (PARSE_FAIL == result)
|
||||
return PARSE_FAIL;
|
||||
out->type = REGEX_TERM_CLASS;
|
||||
out->type = PARSE_TERM_CLASS;
|
||||
used += result;
|
||||
} else {
|
||||
result = parse_literal(input + used, rem - used, &out->literal);
|
||||
if (PARSE_FAIL == result)
|
||||
return PARSE_FAIL;
|
||||
out->type = REGEX_TERM_LITERAL;
|
||||
out->type = PARSE_TERM_LITERAL;
|
||||
used += result;
|
||||
}
|
||||
|
||||
if (used < rem) {
|
||||
switch (input[used]) {
|
||||
case '*':
|
||||
out->quantifier = REGEX_QUANTIFIER_STAR;
|
||||
out->quantifier = PARSE_QUANTIFIER_STAR;
|
||||
++used;
|
||||
break;
|
||||
case '+':
|
||||
out->quantifier = REGEX_QUANTIFIER_PLUS;
|
||||
out->quantifier = PARSE_QUANTIFIER_PLUS;
|
||||
++used;
|
||||
break;
|
||||
case '?':
|
||||
out->quantifier = REGEX_QUANTIFIER_QMARK;
|
||||
out->quantifier = PARSE_QUANTIFIER_QMARK;
|
||||
++used;
|
||||
break;
|
||||
default:
|
||||
out->quantifier = REGEX_QUANTIFIER_NONE;
|
||||
out->quantifier = PARSE_QUANTIFIER_NONE;
|
||||
}
|
||||
} else {
|
||||
out->quantifier = REGEX_QUANTIFIER_NONE;
|
||||
out->quantifier = PARSE_QUANTIFIER_NONE;
|
||||
}
|
||||
|
||||
return used;
|
||||
}
|
||||
|
||||
static int parse_sequence(const char *input, int rem, regex_sequence_t *out)
|
||||
static int parse_sequence(const char *input, int rem, parse_sequence_t *out)
|
||||
{
|
||||
int result, used = 0;
|
||||
|
||||
out->count = 0;
|
||||
out->capacity = SEQUENCE_START_CAPACITY;
|
||||
out->contents = malloc(out->capacity * sizeof(regex_term_t));
|
||||
out->contents = malloc(out->capacity * sizeof(parse_term_t));
|
||||
assert(NULL != out->contents);
|
||||
|
||||
while (used < rem) {
|
||||
if (out->count >= out->capacity) {
|
||||
out->capacity *= 2;
|
||||
out->contents = realloc(
|
||||
out->contents, out->capacity * sizeof(regex_term_t));
|
||||
out->contents, out->capacity * sizeof(parse_term_t));
|
||||
assert(NULL != out->contents);
|
||||
}
|
||||
|
||||
@@ -175,13 +175,13 @@ static int parse_sequence(const char *input, int rem, regex_sequence_t *out)
|
||||
return out->count > 0 ? used : -1;
|
||||
}
|
||||
|
||||
int parse_expr(const char *input, int rem, regex_t *out)
|
||||
int parse_expr(const char *input, int rem, parse_tree_t *out)
|
||||
{
|
||||
int result, used = 0;
|
||||
|
||||
out->count = 0;
|
||||
out->capacity = TREE_START_CAPACITY;
|
||||
out->contents = malloc(out->capacity * sizeof(regex_sequence_t));
|
||||
out->contents = malloc(out->capacity * sizeof(parse_sequence_t));
|
||||
assert(NULL != out->contents);
|
||||
|
||||
result = parse_sequence(input + used, rem - used, &out->contents[0]);
|
||||
@@ -198,7 +198,7 @@ int parse_expr(const char *input, int rem, regex_t *out)
|
||||
if (out->count >= out->capacity) {
|
||||
out->capacity *= 2;
|
||||
out->contents = realloc(
|
||||
out->contents, out->capacity * sizeof(regex_sequence_t));
|
||||
out->contents, out->capacity * sizeof(parse_sequence_t));
|
||||
assert(NULL != out->contents);
|
||||
}
|
||||
|
||||
@@ -212,3 +212,37 @@ int parse_expr(const char *input, int rem, regex_t *out)
|
||||
|
||||
return used;
|
||||
}
|
||||
|
||||
static void class_free(const parse_class_t *c)
|
||||
{
|
||||
if (NULL != c->contents)
|
||||
free(c->contents);
|
||||
}
|
||||
|
||||
static void sequence_free(const parse_sequence_t *s)
|
||||
{
|
||||
if (NULL != s->contents) {
|
||||
for (int i = 0; i < s->count; ++i) {
|
||||
switch (s->contents[i].type) {
|
||||
case PARSE_TERM_CLASS:
|
||||
class_free(&s->contents[i].class);
|
||||
break;
|
||||
case PARSE_TERM_SUBEXPR:
|
||||
parse_tree_free(&s->contents[i].subexpr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(s->contents);
|
||||
}
|
||||
}
|
||||
|
||||
void parse_tree_free(const parse_tree_t *t)
|
||||
{
|
||||
if (NULL != t->contents) {
|
||||
for (int i = 0; i < t->count; ++i)
|
||||
sequence_free(&t->contents[i]);
|
||||
free(t->contents);
|
||||
}
|
||||
}
|
||||
|
||||
42
lib/regex.c
42
lib/regex.c
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) Camden Dixie O'Brien
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
#include "regex.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
static void sequence_free(const regex_sequence_t *s)
|
||||
{
|
||||
if (NULL != s->contents) {
|
||||
for (int i = 0; i < s->count; ++i) {
|
||||
switch (s->contents[i].type) {
|
||||
case REGEX_TERM_CLASS:
|
||||
regex_class_free(&s->contents[i].class);
|
||||
break;
|
||||
case REGEX_TERM_SUBEXPR:
|
||||
regex_free(&s->contents[i].subexpr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(s->contents);
|
||||
}
|
||||
}
|
||||
|
||||
void regex_free(const regex_t *t)
|
||||
{
|
||||
if (NULL != t->contents) {
|
||||
for (int i = 0; i < t->count; ++i)
|
||||
sequence_free(&t->contents[i]);
|
||||
free(t->contents);
|
||||
}
|
||||
}
|
||||
|
||||
void regex_class_free(const regex_class_t *c)
|
||||
{
|
||||
if (NULL != c->contents)
|
||||
free(c->contents);
|
||||
}
|
||||
Reference in New Issue
Block a user