/* * Copyright (c) Camden Dixie O'Brien * SPDX-License-Identifier: AGPL-3.0-only */ #include "parser.h" #include #include #define CLASS_START_CAPACITY 4 #define SEQUENCE_START_CAPACITY 8 #define PARSE_TREE_START_CAPACITY 4 static bool is_special(char c) { switch (c) { case '|': case '.': case '\\': case '(': case ')': case '*': case '+': case '?': case '[': case ']': case '^': return true; default: return false; } } static int parse_literal(const char *input, int rem, char *out) { if (rem > 0 && !is_special(input[0])) { *out = input[0]; return 1; } else if (rem > 1 && '\\' == input[0]) { *out = input[1]; return 2; } else { return -1; } } static int parse_class(const char *input, int rem, class_t *out) { int result, used = 0; if (used >= rem || '[' != input[used]) return -1; ++used; if (used < rem && '^' == input[used]) { out->negated = true; ++used; } else { out->negated = false; } out->count = 0; out->capacity = CLASS_START_CAPACITY; out->contents = malloc(out->capacity); if (NULL == out->contents) return -1; while (used < rem) { if (out->count >= out->capacity) { out->capacity *= 2; out->contents = realloc(out->contents, out->capacity); if (NULL == out->contents) return -1; } result = parse_literal( input + used, rem - used, &out->contents[out->count]); if (result < 0) break; used += result; ++out->count; } if (used >= rem || ']' != input[used]) return -1; ++used; return out->count > 0 ? used : -1; } static int parse_term(const char *input, int rem, term_t *out) { int result, used = 0; if (1 > rem) return -1; if ('.' == input[0]) { out->type = TERM_TYPE_WILDCARD; ++used; } else if ('(' == input[0]) { ++used; result = parse_expr(input + used, rem - used, &out->subexpr); if (result < 0) return -1; out->type = TERM_TYPE_SUBEXPR; used += result; if (')' != input[used]) return -1; ++used; } else if ('[' == input[0]) { result = parse_class(input + used, rem - used, &out->class); if (result < 0) return -1; out->type = TERM_TYPE_CLASS; used += result; } else { result = parse_literal(input + used, rem - used, &out->literal); if (result < 0) return -1; out->type = TERM_TYPE_LITERAL; used += result; } if (used < rem) { switch (input[used]) { case '*': out->quantifier = QUANTIFIER_ZERO_OR_MORE; ++used; break; case '+': out->quantifier = QUANTIFIER_ONE_OR_MORE; ++used; break; case '?': out->quantifier = QUANTIFIER_ZERO_OR_ONE; ++used; break; default: out->quantifier = QUANTIFIER_NONE; } } else { out->quantifier = QUANTIFIER_NONE; } return used; } static int parse_sequence(const char *input, int rem, sequence_t *out) { int result, used = 0; out->len = 0; out->capacity = SEQUENCE_START_CAPACITY; out->contents = malloc(out->capacity * sizeof(term_t)); if (NULL == out->contents) return -1; while (used < rem) { if (out->len >= out->capacity) { out->capacity *= 2; out->contents = realloc(out->contents, out->capacity * sizeof(term_t)); if (NULL == out->contents) return -1; } result = parse_term(input + used, rem - used, &out->contents[out->len]); if (result < 0) break; ++out->len; used += result; } return out->len > 0 ? used : -1; } int parse_expr(const char *input, int rem, parse_tree_t *out) { int result, used = 0; out->count = 0; out->capacity = PARSE_TREE_START_CAPACITY; out->alternatives = malloc(out->capacity * sizeof(sequence_t)); if (NULL == out->alternatives) return -1; result = parse_sequence(input + used, rem - used, &out->alternatives[0]); if (result < 0) return -1; ++out->count; used += result; while (used < rem) { if ('|' != input[used]) break; ++used; if (out->count >= out->capacity) { out->capacity *= 2; out->alternatives = realloc( out->alternatives, out->capacity * sizeof(sequence_t)); if (NULL == out->alternatives) return -1; } result = parse_sequence( input + used, rem - used, &out->alternatives[out->count]); if (result < 0) break; ++out->count; used += result; } return used; } static void class_free(class_t *c) { if (NULL != c->contents) free(c->contents); } static void sequence_free(sequence_t *s) { if (NULL != s->contents) { for (int i = 0; i < s->len; ++i) { switch (s->contents[i].type) { case TERM_TYPE_CLASS: class_free(&s->contents[i].class); break; case TERM_TYPE_SUBEXPR: parse_tree_free_children(&s->contents[i].subexpr); break; case TERM_TYPE_WILDCARD: case TERM_TYPE_LITERAL: break; } } free(s->contents); } } void parse_tree_free_children(parse_tree_t *t) { if (NULL != t->alternatives) { for (int i = 0; i < t->count; ++i) sequence_free(&t->alternatives[i]); free(t->alternatives); } }