regex-engine/lib/parser.c

113 lines
2.0 KiB
C

/*
* Copyright (c) Camden Dixie O'Brien
* SPDX-License-Identifier: AGPL-3.0-only
*/
#include "parser.h"
#include <stdbool.h>
#include <stdlib.h>
#define SEQUENCE_START_CAPACITY 8
static bool is_special(char c)
{
switch (c) {
case '|':
case '.':
case '\\':
return true;
default:
return false;
}
}
static int parse_literal(const char *input, int rem, char *out)
{
if (rem > 0 && !is_special(input[0])) {
*out = input[0];
return 1;
} else if (rem > 1 && '\\' == input[0]) {
*out = input[1];
return 2;
} else {
return -1;
}
}
static int parse_term(const char *input, int rem, term_t *out)
{
int result, used = 0;
if (used < rem && '.' == input[0]) {
out->type = TERM_TYPE_WILDCARD;
++used;
} else {
result = parse_literal(input + used, rem - used, &out->literal);
if (result < 0)
return -1;
out->type = TERM_TYPE_LITERAL;
used += result;
}
out->quantifier = QUANTIFIER_NONE;
return used;
}
static int parse_sequence(const char *input, int rem, sequence_t *out)
{
int result, used = 0;
out->len = 0;
out->capacity = SEQUENCE_START_CAPACITY;
out->contents = calloc(out->capacity, sizeof(term_t));
if (NULL == out->contents)
return -1;
while (used < rem) {
if (out->len >= out->capacity) {
out->capacity *= 2;
out->contents
= realloc(out->contents, out->capacity * sizeof(term_t));
if (NULL == out->contents)
return -1;
}
result
= parse_term(input + used, rem - used, &out->contents[out->len]);
if (result < 0)
break;
++out->len;
used += result;
}
return out->len > 0 ? used : -1;
}
int parse_regex(const char *input, int rem, regex_t *out)
{
int result, used = 0;
result = parse_sequence(input + used, rem - used, &out->sequence);
if (result < 0)
return -1;
used += result;
if (used < rem) {
if (input[used] != '|')
return -1;
++used;
out->alternative = calloc(1, sizeof(regex_t));
if (NULL == out->alternative)
return -1;
result = parse_regex(input + used, rem - used, out->alternative);
if (result < 0)
return -1;
used += result;
}
return used;
}