Implement parsing

This commit is contained in:
2025-08-09 19:43:17 +01:00
parent f97cea9290
commit a03ef58eca
5 changed files with 377 additions and 0 deletions

View File

@@ -1,6 +1,7 @@
add_library(imp
am.c
memory_stream.c
parse.c
store.c
token.c
)

27
lib/include/parse.h Normal file
View File

@@ -0,0 +1,27 @@
#ifndef PARSE_H
#define PARSE_H
#include "am.h"
#include "store.h"
#include "token.h"
#define PARSE_MAX_DEPTH 128U
typedef enum {
PARSE_STATE_INIT,
PARSE_STATE_LIST,
PARSE_STATE_DONE,
PARSE_STATE_ERROR,
} parse_state_t;
typedef struct {
am_t *am;
store_t *store;
parse_state_t state;
parse_state_t *sp, stack[PARSE_MAX_DEPTH];
} parse_ctx_t;
void parse_init(am_t *am, store_t *store, parse_ctx_t *out);
parse_state_t parse_proc(parse_ctx_t *ctx, const token_t *token);
#endif

119
lib/parse.c Normal file
View File

@@ -0,0 +1,119 @@
#include "parse.h"
#include <assert.h>
#include <string.h>
void parse_init(am_t *am, store_t *store, parse_ctx_t *out)
{
out->am = am;
out->store = store;
out->state = PARSE_STATE_INIT;
out->sp = out->stack + PARSE_MAX_DEPTH - 1;
}
static void push_state(parse_ctx_t *ctx, parse_state_t state)
{
assert(ctx->sp >= ctx->stack);
*ctx->sp-- = state;
}
static parse_state_t pop_state(parse_ctx_t *ctx)
{
assert(ctx->sp < ctx->stack + PARSE_MAX_DEPTH - 1);
return *++ctx->sp;
}
static void load_integer(parse_ctx_t *ctx, expr_t **expr, int64_t integer)
{
*expr = store_alloc(ctx->store);
(*expr)->is_atom = true;
(*expr)->atom.type = ATOM_TYPE_INTEGER;
(*expr)->atom.integer = integer;
}
static void
load_symbol(parse_ctx_t *ctx, expr_t **expr, const symbol_t *symbol)
{
*expr = store_alloc(ctx->store);
(*expr)->is_atom = true;
(*expr)->atom.type = ATOM_TYPE_SYMBOL;
memcpy(&(*expr)->atom.symbol, symbol, sizeof(symbol_t));
}
static expr_t **append(parse_ctx_t *ctx, expr_t *expr)
{
while (!expr->is_atom)
expr = expr->pair.cdr;
assert(expr->atom.type == ATOM_TYPE_EMPTY_LIST);
expr->is_atom = false;
expr->pair.cdr = store_alloc(ctx->store);
expr->pair.cdr->is_atom = true;
expr->pair.cdr->atom.type = ATOM_TYPE_EMPTY_LIST;
return &expr->pair.car;
}
parse_state_t parse_proc(parse_ctx_t *ctx, const token_t *token)
{
switch (ctx->state) {
case PARSE_STATE_INIT:
switch (token->type) {
case TOKEN_TYPE_INTEGER:
load_integer(ctx, &ctx->am->expr, token->integer);
ctx->state = PARSE_STATE_DONE;
break;
case TOKEN_TYPE_SYMBOL:
load_symbol(ctx, &ctx->am->expr, &token->symbol);
ctx->state = PARSE_STATE_DONE;
break;
case TOKEN_TYPE_OPEN_PAREN:
push_state(ctx, PARSE_STATE_DONE);
ctx->am->expr = store_alloc(ctx->store);
ctx->am->expr->is_atom = true;
ctx->am->expr->atom.type = ATOM_TYPE_EMPTY_LIST;
ctx->state = PARSE_STATE_LIST;
break;
case TOKEN_TYPE_CLOSE_PAREN:
ctx->state = PARSE_STATE_ERROR;
break;
}
break;
case PARSE_STATE_LIST:
switch (token->type) {
expr_t **end_car;
case TOKEN_TYPE_INTEGER:
end_car = append(ctx, ctx->am->expr);
load_integer(ctx, end_car, token->integer);
break;
case TOKEN_TYPE_SYMBOL:
end_car = append(ctx, ctx->am->expr);
load_symbol(ctx, end_car, &token->symbol);
break;
case TOKEN_TYPE_OPEN_PAREN:
am_push(ctx->am);
push_state(ctx, PARSE_STATE_LIST);
ctx->am->expr = store_alloc(ctx->store);
ctx->am->expr->is_atom = true;
ctx->am->expr->atom.type = ATOM_TYPE_EMPTY_LIST;
ctx->state = PARSE_STATE_LIST;
break;
case TOKEN_TYPE_CLOSE_PAREN:
ctx->state = pop_state(ctx);
if (ctx->state == PARSE_STATE_LIST) {
expr_t *expr = ctx->am->expr;
am_pop(ctx->am);
end_car = append(ctx, ctx->am->expr);
*end_car = expr;
}
break;
}
break;
case PARSE_STATE_DONE:
case PARSE_STATE_ERROR:
break;
}
assert(ctx->state != PARSE_STATE_INIT);
return ctx->state;
}

View File

@@ -10,6 +10,7 @@ endfunction()
add_test_suites(
am_tests.c
parse_tests.c
store_tests.c
token_tests.c
)

229
tests/parse_tests.c Normal file
View File

@@ -0,0 +1,229 @@
#include "parse.h"
#include "unity.h"
static store_t store;
static am_t am;
static parse_ctx_t ctx;
#define NELEMS(arr) (sizeof(arr) / sizeof(arr[0]))
void setUp(void)
{
store_init(&store);
am_init(&am);
parse_init(&am, &store, &ctx);
}
void tearDown(void)
{
}
static void test_integer_123(void)
{
const token_t token = { .type = TOKEN_TYPE_INTEGER, .integer = 123 };
const parse_state_t state = parse_proc(&ctx, &token);
TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state);
TEST_ASSERT_NOT_NULL(am.expr);
TEST_ASSERT_TRUE(am.expr->is_atom);
TEST_ASSERT_EQUAL(ATOM_TYPE_INTEGER, am.expr->atom.type);
TEST_ASSERT_EQUAL(123, am.expr->atom.integer);
}
static void test_integer_321(void)
{
const token_t token = { .type = TOKEN_TYPE_INTEGER, .integer = 321 };
const parse_state_t state = parse_proc(&ctx, &token);
TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state);
TEST_ASSERT_NOT_NULL(am.expr);
TEST_ASSERT_TRUE(am.expr->is_atom);
TEST_ASSERT_EQUAL(ATOM_TYPE_INTEGER, am.expr->atom.type);
TEST_ASSERT_EQUAL(321, am.expr->atom.integer);
}
static void test_symbol_foo(void)
{
const token_t token = {
.type = TOKEN_TYPE_SYMBOL,
.symbol = { .buf = "foo", .len = 3 },
};
const parse_state_t state = parse_proc(&ctx, &token);
TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state);
TEST_ASSERT_NOT_NULL(am.expr);
TEST_ASSERT_TRUE(am.expr->is_atom);
TEST_ASSERT_EQUAL(ATOM_TYPE_SYMBOL, am.expr->atom.type);
TEST_ASSERT_EQUAL(3, am.expr->atom.symbol.len);
TEST_ASSERT_EQUAL_MEMORY("foo", am.expr->atom.symbol.buf, 3);
}
static void test_symbol_quux(void)
{
const token_t token = {
.type = TOKEN_TYPE_SYMBOL,
.symbol = { .buf = "quux", .len = 4 },
};
const parse_state_t state = parse_proc(&ctx, &token);
TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state);
TEST_ASSERT_NOT_NULL(am.expr);
TEST_ASSERT_TRUE(am.expr->is_atom);
TEST_ASSERT_EQUAL(ATOM_TYPE_SYMBOL, am.expr->atom.type);
TEST_ASSERT_EQUAL(4, am.expr->atom.symbol.len);
TEST_ASSERT_EQUAL_MEMORY("quux", am.expr->atom.symbol.buf, 4);
}
static void test_open_paren_close_paren(void)
{
// ()
const token_t tokens[] = {
{ .type = TOKEN_TYPE_OPEN_PAREN },
{ .type = TOKEN_TYPE_CLOSE_PAREN },
};
parse_state_t state;
state = parse_proc(&ctx, tokens + 0);
TEST_ASSERT_EQUAL(PARSE_STATE_LIST, state);
state = parse_proc(&ctx, tokens + 1);
TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state);
TEST_ASSERT_NOT_NULL(am.expr);
TEST_ASSERT_TRUE(am.expr->is_atom);
TEST_ASSERT_EQUAL(ATOM_TYPE_EMPTY_LIST, am.expr->atom.type);
}
static void test_open_paren_foo_42_close_paren(void)
{
// (foo 1) -> (foo . (1 . ()))
const token_t tokens[] = {
{ .type = TOKEN_TYPE_OPEN_PAREN },
{
.type = TOKEN_TYPE_SYMBOL,
.symbol = { .buf = "foo", .len = 3 },
},
{ .type = TOKEN_TYPE_INTEGER, .integer = 42 },
{ .type = TOKEN_TYPE_CLOSE_PAREN },
};
parse_state_t state;
for (unsigned i = 0; i < NELEMS(tokens) - 1; ++i) {
state = parse_proc(&ctx, tokens + i);
TEST_ASSERT_EQUAL(PARSE_STATE_LIST, state);
}
state = parse_proc(&ctx, tokens + NELEMS(tokens) - 1);
TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state);
TEST_ASSERT_NOT_NULL(am.expr);
TEST_ASSERT_FALSE(am.expr->is_atom);
TEST_ASSERT_NOT_NULL(am.expr->pair.car);
TEST_ASSERT_TRUE(am.expr->pair.car->is_atom);
TEST_ASSERT_EQUAL(ATOM_TYPE_SYMBOL, am.expr->pair.car->atom.type);
TEST_ASSERT_EQUAL(3, am.expr->pair.car->atom.symbol.len);
TEST_ASSERT_EQUAL_MEMORY("foo", am.expr->pair.car->atom.symbol.buf, 3);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr);
TEST_ASSERT_FALSE(am.expr->pair.cdr->is_atom);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.car);
TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.car->is_atom);
TEST_ASSERT_EQUAL(
ATOM_TYPE_INTEGER, am.expr->pair.cdr->pair.car->atom.type);
TEST_ASSERT_EQUAL(42, am.expr->pair.cdr->pair.car->atom.integer);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.cdr);
TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.cdr->is_atom);
TEST_ASSERT_EQUAL(
ATOM_TYPE_EMPTY_LIST, am.expr->pair.cdr->pair.cdr->atom.type);
}
static void test_open_paren_1_open_paren_2_close_paren_3_close_paren(void)
{
// (1 (2) 3) -> (1 . ((2 . ()) . (3 . ())))
const token_t tokens[] = {
{ .type = TOKEN_TYPE_OPEN_PAREN },
{ .type = TOKEN_TYPE_INTEGER, .integer = 1 },
{ .type = TOKEN_TYPE_OPEN_PAREN },
{ .type = TOKEN_TYPE_INTEGER, .integer = 2 },
{ .type = TOKEN_TYPE_CLOSE_PAREN },
{ .type = TOKEN_TYPE_INTEGER, .integer = 3 },
{ .type = TOKEN_TYPE_CLOSE_PAREN },
};
parse_state_t state;
for (unsigned i = 0; i < NELEMS(tokens) - 1; ++i) {
state = parse_proc(&ctx, tokens + i);
TEST_ASSERT_EQUAL(PARSE_STATE_LIST, state);
}
state = parse_proc(&ctx, tokens + NELEMS(tokens) - 1);
TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state);
TEST_ASSERT_NOT_NULL(am.expr);
TEST_ASSERT_FALSE(am.expr->is_atom);
TEST_ASSERT_NOT_NULL(am.expr->pair.car);
TEST_ASSERT_TRUE(am.expr->pair.car->is_atom);
TEST_ASSERT_EQUAL(ATOM_TYPE_INTEGER, am.expr->pair.car->atom.type);
TEST_ASSERT_EQUAL(1, am.expr->pair.car->atom.integer);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr);
TEST_ASSERT_FALSE(am.expr->pair.cdr->is_atom);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.car);
TEST_ASSERT_FALSE(am.expr->pair.cdr->pair.car->is_atom);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.car->pair.car);
TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.car->pair.car->is_atom);
TEST_ASSERT_EQUAL(
ATOM_TYPE_INTEGER, am.expr->pair.cdr->pair.car->pair.car->atom.type);
TEST_ASSERT_EQUAL(
2, am.expr->pair.cdr->pair.car->pair.car->atom.integer);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.car->pair.cdr);
TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.car->pair.cdr->is_atom);
TEST_ASSERT_EQUAL(
ATOM_TYPE_EMPTY_LIST,
am.expr->pair.cdr->pair.car->pair.cdr->atom.type);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.cdr);
TEST_ASSERT_FALSE(am.expr->pair.cdr->pair.cdr->is_atom);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.cdr->pair.car);
TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.cdr->pair.car->is_atom);
TEST_ASSERT_EQUAL(
ATOM_TYPE_INTEGER, am.expr->pair.cdr->pair.cdr->pair.car->atom.type);
TEST_ASSERT_EQUAL(
3, am.expr->pair.cdr->pair.cdr->pair.car->atom.integer);
TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.cdr->pair.cdr);
TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.cdr->pair.cdr->is_atom);
TEST_ASSERT_EQUAL(
ATOM_TYPE_EMPTY_LIST,
am.expr->pair.cdr->pair.cdr->pair.cdr->atom.type);
}
static void test_close_paren(void)
{
const token_t token = { .type = TOKEN_TYPE_CLOSE_PAREN };
const parse_state_t state = parse_proc(&ctx, &token);
TEST_ASSERT_EQUAL(PARSE_STATE_ERROR, state);
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_integer_123);
RUN_TEST(test_integer_321);
RUN_TEST(test_symbol_foo);
RUN_TEST(test_symbol_quux);
RUN_TEST(test_open_paren_close_paren);
RUN_TEST(test_open_paren_foo_42_close_paren);
RUN_TEST(test_open_paren_1_open_paren_2_close_paren_3_close_paren);
RUN_TEST(test_close_paren);
return UNITY_END();
}