206 lines
3.8 KiB
C
206 lines
3.8 KiB
C
/*
|
|
* Copyright (c) Camden Dixie O'Brien
|
|
* SPDX-License-Identifier: AGPL-3.0-only
|
|
*/
|
|
|
|
#include "parser.h"
|
|
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
|
|
#define CLASS_START_CAPACITY 4
|
|
#define SEQUENCE_START_CAPACITY 8
|
|
|
|
static bool is_special(char c)
|
|
{
|
|
switch (c) {
|
|
case '|':
|
|
case '.':
|
|
case '\\':
|
|
case '(':
|
|
case ')':
|
|
case '*':
|
|
case '+':
|
|
case '?':
|
|
case '[':
|
|
case ']':
|
|
case '^':
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static int parse_literal(const char *input, int rem, char *out)
|
|
{
|
|
if (rem > 0 && !is_special(input[0])) {
|
|
*out = input[0];
|
|
return 1;
|
|
} else if (rem > 1 && '\\' == input[0]) {
|
|
*out = input[1];
|
|
return 2;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static int parse_class(const char *input, int rem, class_t *out)
|
|
{
|
|
int result, used = 0;
|
|
|
|
if (used >= rem || '[' != input[used])
|
|
return -1;
|
|
++used;
|
|
|
|
if (used < rem && '^' == input[used]) {
|
|
out->negated = true;
|
|
++used;
|
|
} else {
|
|
out->negated = false;
|
|
}
|
|
|
|
out->count = 0;
|
|
out->capacity = CLASS_START_CAPACITY;
|
|
out->contents = malloc(out->capacity);
|
|
if (NULL == out->contents)
|
|
return -1;
|
|
|
|
while (used < rem) {
|
|
if (out->count >= out->capacity) {
|
|
out->capacity *= 2;
|
|
out->contents = realloc(out->contents, out->capacity);
|
|
if (NULL == out->contents)
|
|
return -1;
|
|
}
|
|
|
|
result = parse_literal(
|
|
input + used, rem - used, &out->contents[out->count]);
|
|
if (result < 0)
|
|
break;
|
|
used += result;
|
|
++out->count;
|
|
}
|
|
|
|
if (used >= rem || ']' != input[used])
|
|
return -1;
|
|
++used;
|
|
|
|
return out->count > 0 ? used : -1;
|
|
}
|
|
|
|
static int parse_term(const char *input, int rem, term_t *out)
|
|
{
|
|
int result, used = 0;
|
|
|
|
if (1 > rem)
|
|
return -1;
|
|
|
|
if ('.' == input[0]) {
|
|
out->type = TERM_TYPE_WILDCARD;
|
|
++used;
|
|
} else if ('(' == input[0]) {
|
|
++used;
|
|
|
|
result = parse_regex(input + used, rem - used, &out->regex);
|
|
if (result < 0)
|
|
return -1;
|
|
out->type = TERM_TYPE_REGEX;
|
|
used += result;
|
|
|
|
if (')' != input[used])
|
|
return -1;
|
|
++used;
|
|
} else if ('[' == input[0]) {
|
|
result = parse_class(input + used, rem - used, &out->class);
|
|
if (result < 0)
|
|
return -1;
|
|
out->type = TERM_TYPE_CLASS;
|
|
used += result;
|
|
} else {
|
|
result = parse_literal(input + used, rem - used, &out->literal);
|
|
if (result < 0)
|
|
return -1;
|
|
out->type = TERM_TYPE_LITERAL;
|
|
used += result;
|
|
}
|
|
|
|
if (used < rem) {
|
|
switch (input[used]) {
|
|
case '*':
|
|
out->quantifier = QUANTIFIER_ZERO_OR_MORE;
|
|
++used;
|
|
break;
|
|
case '+':
|
|
out->quantifier = QUANTIFIER_ONE_OR_MORE;
|
|
++used;
|
|
break;
|
|
case '?':
|
|
out->quantifier = QUANTIFIER_ZERO_OR_ONE;
|
|
++used;
|
|
break;
|
|
default:
|
|
out->quantifier = QUANTIFIER_NONE;
|
|
}
|
|
} else {
|
|
out->quantifier = QUANTIFIER_NONE;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
|
|
static int parse_sequence(const char *input, int rem, sequence_t *out)
|
|
{
|
|
int result, used = 0;
|
|
|
|
out->len = 0;
|
|
out->capacity = SEQUENCE_START_CAPACITY;
|
|
out->contents = malloc(out->capacity * sizeof(term_t));
|
|
if (NULL == out->contents)
|
|
return -1;
|
|
|
|
while (used < rem) {
|
|
if (out->len >= out->capacity) {
|
|
out->capacity *= 2;
|
|
out->contents
|
|
= realloc(out->contents, out->capacity * sizeof(term_t));
|
|
if (NULL == out->contents)
|
|
return -1;
|
|
}
|
|
|
|
result
|
|
= parse_term(input + used, rem - used, &out->contents[out->len]);
|
|
if (result < 0)
|
|
break;
|
|
++out->len;
|
|
used += result;
|
|
}
|
|
|
|
return out->len > 0 ? used : -1;
|
|
}
|
|
|
|
int parse_regex(const char *input, int rem, regex_t *out)
|
|
{
|
|
int result, used = 0;
|
|
|
|
result = parse_sequence(input + used, rem - used, &out->sequence);
|
|
if (result < 0)
|
|
return -1;
|
|
used += result;
|
|
|
|
if (used < rem && '|' == input[used]) {
|
|
++used;
|
|
|
|
out->alternative = malloc(sizeof(regex_t));
|
|
if (NULL == out->alternative)
|
|
return -1;
|
|
result = parse_regex(input + used, rem - used, out->alternative);
|
|
if (result < 0)
|
|
return -1;
|
|
used += result;
|
|
} else {
|
|
out->alternative = NULL;
|
|
}
|
|
|
|
return used;
|
|
}
|