256 lines
4.9 KiB
C
256 lines
4.9 KiB
C
/*
|
|
* Copyright (c) Camden Dixie O'Brien
|
|
* SPDX-License-Identifier: AGPL-3.0-only
|
|
*/
|
|
|
|
#include "parse.h"
|
|
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
|
|
#define CLASS_START_CAPACITY 4
|
|
#define SEQUENCE_START_CAPACITY 8
|
|
#define TREE_START_CAPACITY 4
|
|
|
|
static bool is_special(char c)
|
|
{
|
|
switch (c) {
|
|
case '|':
|
|
case '.':
|
|
case '\\':
|
|
case '(':
|
|
case ')':
|
|
case '*':
|
|
case '+':
|
|
case '?':
|
|
case '[':
|
|
case ']':
|
|
case '^':
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static int parse_literal(const char *input, int rem, char *out)
|
|
{
|
|
if (rem > 0 && !is_special(input[0])) {
|
|
*out = input[0];
|
|
return 1;
|
|
} else if (rem > 1 && '\\' == input[0]) {
|
|
*out = input[1];
|
|
return 2;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static int parse_class(const char *input, int rem, parse_class_t *out)
|
|
{
|
|
int result, used = 0;
|
|
|
|
if (used >= rem || '[' != input[used])
|
|
return -1;
|
|
++used;
|
|
|
|
if (used < rem && '^' == input[used]) {
|
|
out->negated = true;
|
|
++used;
|
|
} else {
|
|
out->negated = false;
|
|
}
|
|
|
|
out->count = 0;
|
|
out->capacity = CLASS_START_CAPACITY;
|
|
out->contents = malloc(out->capacity);
|
|
if (NULL == out->contents)
|
|
return -1;
|
|
|
|
while (used < rem) {
|
|
if (out->count >= out->capacity) {
|
|
out->capacity *= 2;
|
|
out->contents = realloc(out->contents, out->capacity);
|
|
if (NULL == out->contents)
|
|
return -1;
|
|
}
|
|
|
|
result = parse_literal(
|
|
input + used, rem - used, &out->contents[out->count]);
|
|
if (result < 0)
|
|
break;
|
|
used += result;
|
|
++out->count;
|
|
}
|
|
|
|
if (used >= rem || ']' != input[used])
|
|
return -1;
|
|
++used;
|
|
|
|
return out->count > 0 ? used : -1;
|
|
}
|
|
|
|
static int parse_term(const char *input, int rem, parse_term_t *out)
|
|
{
|
|
int result, used = 0;
|
|
|
|
if (1 > rem)
|
|
return -1;
|
|
|
|
if ('.' == input[0]) {
|
|
out->type = PARSE_TERM_WILDCARD;
|
|
++used;
|
|
} else if ('(' == input[0]) {
|
|
++used;
|
|
|
|
result = parse_expr(input + used, rem - used, &out->subexpr);
|
|
if (result < 0)
|
|
return -1;
|
|
out->type = PARSE_TERM_SUBEXPR;
|
|
used += result;
|
|
|
|
if (')' != input[used])
|
|
return -1;
|
|
++used;
|
|
} else if ('[' == input[0]) {
|
|
result = parse_class(input + used, rem - used, &out->class);
|
|
if (result < 0)
|
|
return -1;
|
|
out->type = PARSE_TERM_CLASS;
|
|
used += result;
|
|
} else {
|
|
result = parse_literal(input + used, rem - used, &out->literal);
|
|
if (result < 0)
|
|
return -1;
|
|
out->type = PARSE_TERM_LITERAL;
|
|
used += result;
|
|
}
|
|
|
|
if (used < rem) {
|
|
switch (input[used]) {
|
|
case '*':
|
|
out->quantifier = PARSE_QUANTIFIER_STAR;
|
|
++used;
|
|
break;
|
|
case '+':
|
|
out->quantifier = PARSE_QUANTIFIER_PLUS;
|
|
++used;
|
|
break;
|
|
case '?':
|
|
out->quantifier = PARSE_QUANTIFIER_QMRK;
|
|
++used;
|
|
break;
|
|
default:
|
|
out->quantifier = PARSE_QUANTIFIER_NONE;
|
|
}
|
|
} else {
|
|
out->quantifier = PARSE_QUANTIFIER_NONE;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
|
|
static int parse_sequence(const char *input, int rem, parse_sequence_t *out)
|
|
{
|
|
int result, used = 0;
|
|
|
|
out->len = 0;
|
|
out->capacity = SEQUENCE_START_CAPACITY;
|
|
out->contents = malloc(out->capacity * sizeof(parse_term_t));
|
|
if (NULL == out->contents)
|
|
return -1;
|
|
|
|
while (used < rem) {
|
|
if (out->len >= out->capacity) {
|
|
out->capacity *= 2;
|
|
out->contents = realloc(
|
|
out->contents, out->capacity * sizeof(parse_term_t));
|
|
if (NULL == out->contents)
|
|
return -1;
|
|
}
|
|
|
|
result
|
|
= parse_term(input + used, rem - used, &out->contents[out->len]);
|
|
if (result < 0)
|
|
break;
|
|
++out->len;
|
|
used += result;
|
|
}
|
|
|
|
return out->len > 0 ? used : -1;
|
|
}
|
|
|
|
int parse_expr(const char *input, int rem, parse_tree_t *out)
|
|
{
|
|
int result, used = 0;
|
|
|
|
out->count = 0;
|
|
out->capacity = TREE_START_CAPACITY;
|
|
out->alternatives = malloc(out->capacity * sizeof(parse_sequence_t));
|
|
if (NULL == out->alternatives)
|
|
return -1;
|
|
|
|
result = parse_sequence(input + used, rem - used, &out->alternatives[0]);
|
|
if (result < 0)
|
|
return -1;
|
|
++out->count;
|
|
used += result;
|
|
|
|
while (used < rem) {
|
|
if ('|' != input[used])
|
|
break;
|
|
++used;
|
|
|
|
if (out->count >= out->capacity) {
|
|
out->capacity *= 2;
|
|
out->alternatives = realloc(
|
|
out->alternatives, out->capacity * sizeof(parse_sequence_t));
|
|
if (NULL == out->alternatives)
|
|
return -1;
|
|
}
|
|
|
|
result = parse_sequence(
|
|
input + used, rem - used, &out->alternatives[out->count]);
|
|
if (result < 0)
|
|
break;
|
|
++out->count;
|
|
used += result;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
|
|
void parse_free_tree_children(const parse_tree_t *t)
|
|
{
|
|
if (NULL != t->alternatives) {
|
|
for (int i = 0; i < t->count; ++i)
|
|
parse_free_sequence_children(&t->alternatives[i]);
|
|
free(t->alternatives);
|
|
}
|
|
}
|
|
|
|
void parse_free_sequence_children(const parse_sequence_t *s)
|
|
{
|
|
if (NULL != s->contents) {
|
|
for (int i = 0; i < s->len; ++i) {
|
|
switch (s->contents[i].type) {
|
|
case PARSE_TERM_CLASS:
|
|
parse_free_class_children(&s->contents[i].class);
|
|
break;
|
|
case PARSE_TERM_SUBEXPR:
|
|
parse_free_tree_children(&s->contents[i].subexpr);
|
|
break;
|
|
case PARSE_TERM_WILDCARD:
|
|
case PARSE_TERM_LITERAL:
|
|
break;
|
|
}
|
|
}
|
|
free(s->contents);
|
|
}
|
|
}
|
|
|
|
void parse_free_class_children(const parse_class_t *c)
|
|
{
|
|
if (NULL != c->contents)
|
|
free(c->contents);
|
|
}
|