Implement character class parsing

This commit is contained in:
2024-10-25 18:40:38 +01:00
parent 40db26a62d
commit b2f474336a
2 changed files with 88 additions and 0 deletions

View File

@@ -8,6 +8,7 @@
#include <stdbool.h>
#include <stdlib.h>
#define CLASS_START_CAPACITY 4
#define SEQUENCE_START_CAPACITY 8
static bool is_special(char c)
@@ -21,6 +22,9 @@ static bool is_special(char c)
case '*':
case '+':
case '?':
case '[':
case ']':
case '^':
return true;
default:
return false;
@@ -40,6 +44,48 @@ static int parse_literal(const char *input, int rem, char *out)
}
}
static int parse_class(const char *input, int rem, class_t *out)
{
int result, used = 0;
if (used >= rem || '[' != input[used])
return -1;
++used;
if (used < rem && '^' == input[used]) {
out->negated = true;
++used;
}
out->count = 0;
out->capacity = CLASS_START_CAPACITY;
out->contents = malloc(out->capacity);
if (NULL == out->contents)
return -1;
while (used < rem) {
if (out->count >= out->capacity) {
out->capacity *= 2;
out->contents = realloc(out->contents, out->capacity);
if (NULL == out->contents)
return -1;
}
result = parse_literal(
input + used, rem - used, &out->contents[out->count]);
if (result < 0)
break;
used += result;
++out->count;
}
if (used >= rem || ']' != input[used])
return -1;
++used;
return out->count > 0 ? used : -1;
}
static int parse_term(const char *input, int rem, term_t *out)
{
int result, used = 0;
@@ -62,6 +108,12 @@ static int parse_term(const char *input, int rem, term_t *out)
if (')' != input[used])
return -1;
++used;
} else if ('[' == input[0]) {
result = parse_class(input + used, rem - used, &out->class);
if (result < 0)
return -1;
out->type = TERM_TYPE_CLASS;
used += result;
} else {
result = parse_literal(input + used, rem - used, &out->literal);
if (result < 0)