diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 2c31a8d..5a9a232 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(imp am.c memory_stream.c + parse.c store.c token.c ) diff --git a/lib/include/parse.h b/lib/include/parse.h new file mode 100644 index 0000000..fa372fe --- /dev/null +++ b/lib/include/parse.h @@ -0,0 +1,27 @@ +#ifndef PARSE_H +#define PARSE_H + +#include "am.h" +#include "store.h" +#include "token.h" + +#define PARSE_MAX_DEPTH 128U + +typedef enum { + PARSE_STATE_INIT, + PARSE_STATE_LIST, + PARSE_STATE_DONE, + PARSE_STATE_ERROR, +} parse_state_t; + +typedef struct { + am_t *am; + store_t *store; + parse_state_t state; + parse_state_t *sp, stack[PARSE_MAX_DEPTH]; +} parse_ctx_t; + +void parse_init(am_t *am, store_t *store, parse_ctx_t *out); +parse_state_t parse_proc(parse_ctx_t *ctx, const token_t *token); + +#endif diff --git a/lib/parse.c b/lib/parse.c new file mode 100644 index 0000000..d6a44a3 --- /dev/null +++ b/lib/parse.c @@ -0,0 +1,119 @@ +#include "parse.h" + +#include +#include + +void parse_init(am_t *am, store_t *store, parse_ctx_t *out) +{ + out->am = am; + out->store = store; + out->state = PARSE_STATE_INIT; + out->sp = out->stack + PARSE_MAX_DEPTH - 1; +} + +static void push_state(parse_ctx_t *ctx, parse_state_t state) +{ + assert(ctx->sp >= ctx->stack); + *ctx->sp-- = state; +} + +static parse_state_t pop_state(parse_ctx_t *ctx) +{ + assert(ctx->sp < ctx->stack + PARSE_MAX_DEPTH - 1); + return *++ctx->sp; +} + +static void load_integer(parse_ctx_t *ctx, expr_t **expr, int64_t integer) +{ + *expr = store_alloc(ctx->store); + (*expr)->is_atom = true; + (*expr)->atom.type = ATOM_TYPE_INTEGER; + (*expr)->atom.integer = integer; +} + +static void +load_symbol(parse_ctx_t *ctx, expr_t **expr, const symbol_t *symbol) +{ + *expr = store_alloc(ctx->store); + (*expr)->is_atom = true; + (*expr)->atom.type = ATOM_TYPE_SYMBOL; + memcpy(&(*expr)->atom.symbol, symbol, sizeof(symbol_t)); +} + +static expr_t **append(parse_ctx_t *ctx, expr_t *expr) +{ + while (!expr->is_atom) + expr = expr->pair.cdr; + assert(expr->atom.type == ATOM_TYPE_EMPTY_LIST); + expr->is_atom = false; + expr->pair.cdr = store_alloc(ctx->store); + expr->pair.cdr->is_atom = true; + expr->pair.cdr->atom.type = ATOM_TYPE_EMPTY_LIST; + return &expr->pair.car; +} + +parse_state_t parse_proc(parse_ctx_t *ctx, const token_t *token) +{ + switch (ctx->state) { + case PARSE_STATE_INIT: + switch (token->type) { + case TOKEN_TYPE_INTEGER: + load_integer(ctx, &ctx->am->expr, token->integer); + ctx->state = PARSE_STATE_DONE; + break; + case TOKEN_TYPE_SYMBOL: + load_symbol(ctx, &ctx->am->expr, &token->symbol); + ctx->state = PARSE_STATE_DONE; + break; + case TOKEN_TYPE_OPEN_PAREN: + push_state(ctx, PARSE_STATE_DONE); + ctx->am->expr = store_alloc(ctx->store); + ctx->am->expr->is_atom = true; + ctx->am->expr->atom.type = ATOM_TYPE_EMPTY_LIST; + ctx->state = PARSE_STATE_LIST; + break; + case TOKEN_TYPE_CLOSE_PAREN: + ctx->state = PARSE_STATE_ERROR; + break; + } + break; + + case PARSE_STATE_LIST: + switch (token->type) { + expr_t **end_car; + case TOKEN_TYPE_INTEGER: + end_car = append(ctx, ctx->am->expr); + load_integer(ctx, end_car, token->integer); + break; + case TOKEN_TYPE_SYMBOL: + end_car = append(ctx, ctx->am->expr); + load_symbol(ctx, end_car, &token->symbol); + break; + case TOKEN_TYPE_OPEN_PAREN: + am_push(ctx->am); + push_state(ctx, PARSE_STATE_LIST); + ctx->am->expr = store_alloc(ctx->store); + ctx->am->expr->is_atom = true; + ctx->am->expr->atom.type = ATOM_TYPE_EMPTY_LIST; + ctx->state = PARSE_STATE_LIST; + break; + case TOKEN_TYPE_CLOSE_PAREN: + ctx->state = pop_state(ctx); + if (ctx->state == PARSE_STATE_LIST) { + expr_t *expr = ctx->am->expr; + am_pop(ctx->am); + end_car = append(ctx, ctx->am->expr); + *end_car = expr; + } + break; + } + break; + + case PARSE_STATE_DONE: + case PARSE_STATE_ERROR: + break; + } + + assert(ctx->state != PARSE_STATE_INIT); + return ctx->state; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 439cb1a..051a173 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,6 +10,7 @@ endfunction() add_test_suites( am_tests.c + parse_tests.c store_tests.c token_tests.c ) diff --git a/tests/parse_tests.c b/tests/parse_tests.c new file mode 100644 index 0000000..bdffe60 --- /dev/null +++ b/tests/parse_tests.c @@ -0,0 +1,229 @@ +#include "parse.h" +#include "unity.h" + +static store_t store; +static am_t am; +static parse_ctx_t ctx; + +#define NELEMS(arr) (sizeof(arr) / sizeof(arr[0])) + +void setUp(void) +{ + store_init(&store); + am_init(&am); + parse_init(&am, &store, &ctx); +} + +void tearDown(void) +{ +} + +static void test_integer_123(void) +{ + const token_t token = { .type = TOKEN_TYPE_INTEGER, .integer = 123 }; + + const parse_state_t state = parse_proc(&ctx, &token); + TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state); + + TEST_ASSERT_NOT_NULL(am.expr); + TEST_ASSERT_TRUE(am.expr->is_atom); + TEST_ASSERT_EQUAL(ATOM_TYPE_INTEGER, am.expr->atom.type); + TEST_ASSERT_EQUAL(123, am.expr->atom.integer); +} + +static void test_integer_321(void) +{ + const token_t token = { .type = TOKEN_TYPE_INTEGER, .integer = 321 }; + + const parse_state_t state = parse_proc(&ctx, &token); + TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state); + + TEST_ASSERT_NOT_NULL(am.expr); + TEST_ASSERT_TRUE(am.expr->is_atom); + TEST_ASSERT_EQUAL(ATOM_TYPE_INTEGER, am.expr->atom.type); + TEST_ASSERT_EQUAL(321, am.expr->atom.integer); +} + +static void test_symbol_foo(void) +{ + const token_t token = { + .type = TOKEN_TYPE_SYMBOL, + .symbol = { .buf = "foo", .len = 3 }, + }; + + const parse_state_t state = parse_proc(&ctx, &token); + TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state); + + TEST_ASSERT_NOT_NULL(am.expr); + TEST_ASSERT_TRUE(am.expr->is_atom); + TEST_ASSERT_EQUAL(ATOM_TYPE_SYMBOL, am.expr->atom.type); + TEST_ASSERT_EQUAL(3, am.expr->atom.symbol.len); + TEST_ASSERT_EQUAL_MEMORY("foo", am.expr->atom.symbol.buf, 3); +} + +static void test_symbol_quux(void) +{ + const token_t token = { + .type = TOKEN_TYPE_SYMBOL, + .symbol = { .buf = "quux", .len = 4 }, + }; + + const parse_state_t state = parse_proc(&ctx, &token); + TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state); + + TEST_ASSERT_NOT_NULL(am.expr); + TEST_ASSERT_TRUE(am.expr->is_atom); + TEST_ASSERT_EQUAL(ATOM_TYPE_SYMBOL, am.expr->atom.type); + TEST_ASSERT_EQUAL(4, am.expr->atom.symbol.len); + TEST_ASSERT_EQUAL_MEMORY("quux", am.expr->atom.symbol.buf, 4); +} + +static void test_open_paren_close_paren(void) +{ + // () + const token_t tokens[] = { + { .type = TOKEN_TYPE_OPEN_PAREN }, + { .type = TOKEN_TYPE_CLOSE_PAREN }, + }; + parse_state_t state; + + state = parse_proc(&ctx, tokens + 0); + TEST_ASSERT_EQUAL(PARSE_STATE_LIST, state); + state = parse_proc(&ctx, tokens + 1); + TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state); + + TEST_ASSERT_NOT_NULL(am.expr); + TEST_ASSERT_TRUE(am.expr->is_atom); + TEST_ASSERT_EQUAL(ATOM_TYPE_EMPTY_LIST, am.expr->atom.type); +} + +static void test_open_paren_foo_42_close_paren(void) +{ + // (foo 1) -> (foo . (1 . ())) + const token_t tokens[] = { + { .type = TOKEN_TYPE_OPEN_PAREN }, + { + .type = TOKEN_TYPE_SYMBOL, + .symbol = { .buf = "foo", .len = 3 }, + }, + { .type = TOKEN_TYPE_INTEGER, .integer = 42 }, + { .type = TOKEN_TYPE_CLOSE_PAREN }, + }; + parse_state_t state; + + for (unsigned i = 0; i < NELEMS(tokens) - 1; ++i) { + state = parse_proc(&ctx, tokens + i); + TEST_ASSERT_EQUAL(PARSE_STATE_LIST, state); + } + state = parse_proc(&ctx, tokens + NELEMS(tokens) - 1); + TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state); + + TEST_ASSERT_NOT_NULL(am.expr); + TEST_ASSERT_FALSE(am.expr->is_atom); + + TEST_ASSERT_NOT_NULL(am.expr->pair.car); + TEST_ASSERT_TRUE(am.expr->pair.car->is_atom); + TEST_ASSERT_EQUAL(ATOM_TYPE_SYMBOL, am.expr->pair.car->atom.type); + TEST_ASSERT_EQUAL(3, am.expr->pair.car->atom.symbol.len); + TEST_ASSERT_EQUAL_MEMORY("foo", am.expr->pair.car->atom.symbol.buf, 3); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr); + TEST_ASSERT_FALSE(am.expr->pair.cdr->is_atom); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.car); + TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.car->is_atom); + TEST_ASSERT_EQUAL( + ATOM_TYPE_INTEGER, am.expr->pair.cdr->pair.car->atom.type); + TEST_ASSERT_EQUAL(42, am.expr->pair.cdr->pair.car->atom.integer); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.cdr); + TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.cdr->is_atom); + TEST_ASSERT_EQUAL( + ATOM_TYPE_EMPTY_LIST, am.expr->pair.cdr->pair.cdr->atom.type); +} + +static void test_open_paren_1_open_paren_2_close_paren_3_close_paren(void) +{ + // (1 (2) 3) -> (1 . ((2 . ()) . (3 . ()))) + const token_t tokens[] = { + { .type = TOKEN_TYPE_OPEN_PAREN }, + { .type = TOKEN_TYPE_INTEGER, .integer = 1 }, + { .type = TOKEN_TYPE_OPEN_PAREN }, + { .type = TOKEN_TYPE_INTEGER, .integer = 2 }, + { .type = TOKEN_TYPE_CLOSE_PAREN }, + { .type = TOKEN_TYPE_INTEGER, .integer = 3 }, + { .type = TOKEN_TYPE_CLOSE_PAREN }, + }; + parse_state_t state; + + for (unsigned i = 0; i < NELEMS(tokens) - 1; ++i) { + state = parse_proc(&ctx, tokens + i); + TEST_ASSERT_EQUAL(PARSE_STATE_LIST, state); + } + state = parse_proc(&ctx, tokens + NELEMS(tokens) - 1); + TEST_ASSERT_EQUAL(PARSE_STATE_DONE, state); + + TEST_ASSERT_NOT_NULL(am.expr); + TEST_ASSERT_FALSE(am.expr->is_atom); + + TEST_ASSERT_NOT_NULL(am.expr->pair.car); + TEST_ASSERT_TRUE(am.expr->pair.car->is_atom); + TEST_ASSERT_EQUAL(ATOM_TYPE_INTEGER, am.expr->pair.car->atom.type); + TEST_ASSERT_EQUAL(1, am.expr->pair.car->atom.integer); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr); + TEST_ASSERT_FALSE(am.expr->pair.cdr->is_atom); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.car); + TEST_ASSERT_FALSE(am.expr->pair.cdr->pair.car->is_atom); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.car->pair.car); + TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.car->pair.car->is_atom); + TEST_ASSERT_EQUAL( + ATOM_TYPE_INTEGER, am.expr->pair.cdr->pair.car->pair.car->atom.type); + TEST_ASSERT_EQUAL( + 2, am.expr->pair.cdr->pair.car->pair.car->atom.integer); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.car->pair.cdr); + TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.car->pair.cdr->is_atom); + TEST_ASSERT_EQUAL( + ATOM_TYPE_EMPTY_LIST, + am.expr->pair.cdr->pair.car->pair.cdr->atom.type); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.cdr); + TEST_ASSERT_FALSE(am.expr->pair.cdr->pair.cdr->is_atom); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.cdr->pair.car); + TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.cdr->pair.car->is_atom); + TEST_ASSERT_EQUAL( + ATOM_TYPE_INTEGER, am.expr->pair.cdr->pair.cdr->pair.car->atom.type); + TEST_ASSERT_EQUAL( + 3, am.expr->pair.cdr->pair.cdr->pair.car->atom.integer); + + TEST_ASSERT_NOT_NULL(am.expr->pair.cdr->pair.cdr->pair.cdr); + TEST_ASSERT_TRUE(am.expr->pair.cdr->pair.cdr->pair.cdr->is_atom); + TEST_ASSERT_EQUAL( + ATOM_TYPE_EMPTY_LIST, + am.expr->pair.cdr->pair.cdr->pair.cdr->atom.type); +} + +static void test_close_paren(void) +{ + const token_t token = { .type = TOKEN_TYPE_CLOSE_PAREN }; + const parse_state_t state = parse_proc(&ctx, &token); + TEST_ASSERT_EQUAL(PARSE_STATE_ERROR, state); +} + +int main(void) +{ + UNITY_BEGIN(); + RUN_TEST(test_integer_123); + RUN_TEST(test_integer_321); + RUN_TEST(test_symbol_foo); + RUN_TEST(test_symbol_quux); + RUN_TEST(test_open_paren_close_paren); + RUN_TEST(test_open_paren_foo_42_close_paren); + RUN_TEST(test_open_paren_1_open_paren_2_close_paren_3_close_paren); + RUN_TEST(test_close_paren); + return UNITY_END(); +}