Support classes in construct step
This commit is contained in:
parent
3c4146468e
commit
38b5b48289
@ -115,13 +115,17 @@ static void prepend_fsa(fsa_t *f, const fsa_t *o)
|
|||||||
f->count = count;
|
f->count = count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void construct_base(fsa_t *out, int symbol)
|
static void construct_base(fsa_t *out)
|
||||||
{
|
{
|
||||||
fsa_init(out);
|
fsa_init(out);
|
||||||
const int id = fsa_add_state(out);
|
|
||||||
fsa_add_rule(out, id, out->initial, symbol);
|
|
||||||
out->initial = id;
|
|
||||||
out->states[0].final = true;
|
out->states[0].final = true;
|
||||||
|
out->initial = fsa_add_state(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void construct_symbol(fsa_t *out, int symbol)
|
||||||
|
{
|
||||||
|
construct_base(out);
|
||||||
|
fsa_add_rule(out, out->initial, 0, symbol);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void construct_star(fsa_t *out)
|
static void construct_star(fsa_t *out)
|
||||||
@ -129,7 +133,7 @@ static void construct_star(fsa_t *out)
|
|||||||
fsa_t f;
|
fsa_t f;
|
||||||
memcpy(&f, out, sizeof(fsa_t));
|
memcpy(&f, out, sizeof(fsa_t));
|
||||||
|
|
||||||
construct_base(out, EPSILON);
|
construct_symbol(out, EPSILON);
|
||||||
int f_initial, f_final;
|
int f_initial, f_final;
|
||||||
add_fsa(out, &f, &f_initial, &f_final);
|
add_fsa(out, &f, &f_initial, &f_final);
|
||||||
fsa_add_rule(out, out->initial, f_initial, EPSILON);
|
fsa_add_rule(out, out->initial, f_initial, EPSILON);
|
||||||
@ -137,20 +141,46 @@ static void construct_star(fsa_t *out)
|
|||||||
fsa_add_rule(out, f_final, 0, EPSILON);
|
fsa_add_rule(out, f_final, 0, EPSILON);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool in_class(const regex_class_t *class, char c)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < class->count; ++i) {
|
||||||
|
if (class->contents[i] == c)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void construct_class(fsa_t *out, const regex_class_t *class)
|
||||||
|
{
|
||||||
|
construct_base(out);
|
||||||
|
if (class->negated) {
|
||||||
|
for (int i = 0; i < CHAR_COUNT; ++i) {
|
||||||
|
if (!in_class(class, i))
|
||||||
|
fsa_add_rule(out, out->initial, 0, i);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < class->count; ++i)
|
||||||
|
fsa_add_rule(out, out->initial, 0, class->contents[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void construct_term(const regex_term_t *term, fsa_t *out)
|
static void construct_term(const regex_term_t *term, fsa_t *out)
|
||||||
{
|
{
|
||||||
switch (term->type) {
|
switch (term->type) {
|
||||||
case REGEX_TERM_EMPTY:
|
case REGEX_TERM_EMPTY:
|
||||||
construct_base(out, EPSILON);
|
construct_symbol(out, EPSILON);
|
||||||
break;
|
break;
|
||||||
case REGEX_TERM_LITERAL:
|
case REGEX_TERM_LITERAL:
|
||||||
construct_base(out, term->literal);
|
construct_symbol(out, term->literal);
|
||||||
break;
|
break;
|
||||||
case REGEX_TERM_SUBEXPR:
|
case REGEX_TERM_SUBEXPR:
|
||||||
construct_nfa(&term->subexpr, out);
|
construct_nfa(&term->subexpr, out);
|
||||||
break;
|
break;
|
||||||
case REGEX_TERM_WILDCARD:
|
|
||||||
case REGEX_TERM_CLASS:
|
case REGEX_TERM_CLASS:
|
||||||
|
construct_class(out, &term->class);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case REGEX_TERM_WILDCARD:
|
||||||
assert(false);
|
assert(false);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -186,6 +186,71 @@ static void test_subexpression(void)
|
|||||||
fsa_free(&fsa);
|
fsa_free(&fsa);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_class(void)
|
||||||
|
{
|
||||||
|
char *class_contents = malloc(3);
|
||||||
|
class_contents[0] = 'a';
|
||||||
|
class_contents[1] = 'b';
|
||||||
|
class_contents[2] = 'c';
|
||||||
|
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
||||||
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
terms[0].type = REGEX_TERM_CLASS;
|
||||||
|
terms[0].class.negated = false;
|
||||||
|
terms[0].class.count = terms[0].class.capacity = 3;
|
||||||
|
terms[0].class.contents = class_contents;
|
||||||
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
||||||
|
alternatives[0].count = alternatives[0].capacity = 1;
|
||||||
|
alternatives[0].contents = terms;
|
||||||
|
const regex_t regex
|
||||||
|
= { .count = 1, .capacity = 1, .contents = alternatives };
|
||||||
|
|
||||||
|
fsa_t fsa;
|
||||||
|
construct_nfa(®ex, &fsa);
|
||||||
|
|
||||||
|
ASSERT_TRUE(accepts(&fsa, "a"));
|
||||||
|
ASSERT_TRUE(accepts(&fsa, "b"));
|
||||||
|
ASSERT_TRUE(accepts(&fsa, "c"));
|
||||||
|
ASSERT_FALSE(accepts(&fsa, ""));
|
||||||
|
ASSERT_FALSE(accepts(&fsa, "aa"));
|
||||||
|
ASSERT_FALSE(accepts(&fsa, "d"));
|
||||||
|
|
||||||
|
regex_free(®ex);
|
||||||
|
fsa_free(&fsa);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_negated_class(void)
|
||||||
|
{
|
||||||
|
char *class_contents = malloc(3);
|
||||||
|
class_contents[0] = 'a';
|
||||||
|
class_contents[1] = 'b';
|
||||||
|
class_contents[2] = 'c';
|
||||||
|
regex_term_t *terms = malloc(1 * sizeof(regex_term_t));
|
||||||
|
terms[0].quantifier = REGEX_QUANTIFIER_NONE;
|
||||||
|
terms[0].type = REGEX_TERM_CLASS;
|
||||||
|
terms[0].class.negated = true;
|
||||||
|
terms[0].class.count = terms[0].class.capacity = 3;
|
||||||
|
terms[0].class.contents = class_contents;
|
||||||
|
regex_sequence_t *alternatives = malloc(1 * sizeof(regex_sequence_t));
|
||||||
|
alternatives[0].count = alternatives[0].capacity = 1;
|
||||||
|
alternatives[0].contents = terms;
|
||||||
|
const regex_t regex
|
||||||
|
= { .count = 1, .capacity = 1, .contents = alternatives };
|
||||||
|
|
||||||
|
fsa_t fsa;
|
||||||
|
construct_nfa(®ex, &fsa);
|
||||||
|
|
||||||
|
ASSERT_TRUE(accepts(&fsa, "d"));
|
||||||
|
ASSERT_TRUE(accepts(&fsa, "e"));
|
||||||
|
ASSERT_FALSE(accepts(&fsa, "a"));
|
||||||
|
ASSERT_FALSE(accepts(&fsa, "b"));
|
||||||
|
ASSERT_FALSE(accepts(&fsa, "c"));
|
||||||
|
ASSERT_FALSE(accepts(&fsa, ""));
|
||||||
|
ASSERT_FALSE(accepts(&fsa, "aa"));
|
||||||
|
|
||||||
|
regex_free(®ex);
|
||||||
|
fsa_free(&fsa);
|
||||||
|
}
|
||||||
|
|
||||||
static void test_sequence_containing_starred_union(void)
|
static void test_sequence_containing_starred_union(void)
|
||||||
{
|
{
|
||||||
// ab(c|d)*
|
// ab(c|d)*
|
||||||
@ -325,6 +390,8 @@ int main(void)
|
|||||||
test_union();
|
test_union();
|
||||||
test_star();
|
test_star();
|
||||||
test_subexpression();
|
test_subexpression();
|
||||||
|
test_class();
|
||||||
|
test_negated_class();
|
||||||
|
|
||||||
// Compound expressions
|
// Compound expressions
|
||||||
test_sequence_containing_starred_union();
|
test_sequence_containing_starred_union();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user