Implement tokenisation
This commit is contained in:
6
lib/CMakeLists.txt
Normal file
6
lib/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
add_library(imp
|
||||
memory_stream.c
|
||||
token.c
|
||||
)
|
||||
target_include_directories(imp PUBLIC include)
|
||||
configure_target(imp)
|
||||
13
lib/include/expr.h
Normal file
13
lib/include/expr.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef EXPR_H
|
||||
#define EXPR_H
|
||||
|
||||
#define MAX_SYMBOL_LEN 32U
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct {
|
||||
char buf[MAX_SYMBOL_LEN];
|
||||
size_t len;
|
||||
} symbol_t;
|
||||
|
||||
#endif
|
||||
15
lib/include/memory_stream.h
Normal file
15
lib/include/memory_stream.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#ifndef MEMORY_STREAM_H
|
||||
#define MEMORY_STREAM_H
|
||||
|
||||
#include "stream.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct {
|
||||
stream_t stream;
|
||||
const uint8_t *src, *end;
|
||||
} memory_stream_t;
|
||||
|
||||
void memory_stream_init(memory_stream_t *s, const uint8_t *src, size_t size);
|
||||
|
||||
#endif
|
||||
20
lib/include/stream.h
Normal file
20
lib/include/stream.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef STREAM_H
|
||||
#define STREAM_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define STREAM_GET_BYTE(stream, out) stream->get_byte(stream, out)
|
||||
#define STREAM_PEEK_BYTE(stream, out) stream->peek_byte(stream, out)
|
||||
|
||||
typedef enum {
|
||||
STREAM_STATUS_OK,
|
||||
STREAM_STATUS_ERROR,
|
||||
STREAM_STATUS_END,
|
||||
} stream_status_t;
|
||||
|
||||
typedef struct stream {
|
||||
stream_status_t (*get_byte)(struct stream *s, uint8_t *out);
|
||||
stream_status_t (*peek_byte)(struct stream *s, uint8_t *out);
|
||||
} stream_t;
|
||||
|
||||
#endif
|
||||
26
lib/include/token.h
Normal file
26
lib/include/token.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#ifndef TOKEN_H
|
||||
#define TOKEN_H
|
||||
|
||||
#include "expr.h"
|
||||
#include "stream.h"
|
||||
|
||||
typedef enum {
|
||||
TOKEN_TYPE_INTEGER,
|
||||
TOKEN_TYPE_SYMBOL,
|
||||
TOKEN_TYPE_OPEN_PAREN,
|
||||
TOKEN_TYPE_CLOSE_PAREN,
|
||||
} token_type_t;
|
||||
|
||||
typedef struct {
|
||||
token_type_t type;
|
||||
union {
|
||||
int64_t integer;
|
||||
symbol_t symbol;
|
||||
};
|
||||
} token_t;
|
||||
|
||||
typedef enum { TOKEN_OK, TOKEN_FAILED } token_status_t;
|
||||
|
||||
token_status_t token_read(stream_t *input, token_t *out);
|
||||
|
||||
#endif
|
||||
34
lib/memory_stream.c
Normal file
34
lib/memory_stream.c
Normal file
@@ -0,0 +1,34 @@
|
||||
#include "memory_stream.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
static stream_status_t proc_byte(stream_t *s, uint8_t *out, bool consume)
|
||||
{
|
||||
memory_stream_t *ss = (memory_stream_t *)s;
|
||||
if (ss->src < ss->end) {
|
||||
*out = *ss->src;
|
||||
if (consume)
|
||||
++ss->src;
|
||||
return STREAM_STATUS_OK;
|
||||
} else {
|
||||
return STREAM_STATUS_END;
|
||||
}
|
||||
}
|
||||
|
||||
static stream_status_t get_byte(stream_t *s, uint8_t *out)
|
||||
{
|
||||
return proc_byte(s, out, true);
|
||||
}
|
||||
|
||||
static stream_status_t peek_byte(stream_t *s, uint8_t *out)
|
||||
{
|
||||
return proc_byte(s, out, false);
|
||||
}
|
||||
|
||||
void memory_stream_init(memory_stream_t *s, const uint8_t *mem, size_t size)
|
||||
{
|
||||
s->stream.get_byte = get_byte;
|
||||
s->stream.peek_byte = peek_byte;
|
||||
s->src = mem;
|
||||
s->end = mem + size;
|
||||
}
|
||||
71
lib/token.c
Normal file
71
lib/token.c
Normal file
@@ -0,0 +1,71 @@
|
||||
#include "token.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef enum {
|
||||
STATE_INIT,
|
||||
STATE_INTEGER,
|
||||
STATE_SYMBOL,
|
||||
STATE_FINISHED,
|
||||
} state_t;
|
||||
|
||||
static bool is_delim(uint8_t byte)
|
||||
{
|
||||
return isspace(byte) || byte == '(' || byte == ')';
|
||||
}
|
||||
|
||||
token_status_t token_read(stream_t *input, token_t *out)
|
||||
{
|
||||
state_t state = STATE_INIT;
|
||||
uint8_t byte;
|
||||
stream_status_t status;
|
||||
|
||||
while (state != STATE_FINISHED) {
|
||||
status = STREAM_PEEK_BYTE(input, &byte);
|
||||
if (status != STREAM_STATUS_OK
|
||||
|| (state != STATE_INIT && is_delim(byte)))
|
||||
break;
|
||||
|
||||
status = STREAM_GET_BYTE(input, &byte);
|
||||
if (status != STREAM_STATUS_OK)
|
||||
break;
|
||||
switch (state) {
|
||||
case STATE_INIT:
|
||||
if (byte == '(') {
|
||||
out->type = TOKEN_TYPE_OPEN_PAREN;
|
||||
state = STATE_FINISHED;
|
||||
} else if (byte == ')') {
|
||||
out->type = TOKEN_TYPE_CLOSE_PAREN;
|
||||
state = STATE_FINISHED;
|
||||
} else if (isdigit(byte)) {
|
||||
out->type = TOKEN_TYPE_INTEGER;
|
||||
out->integer = byte - '0';
|
||||
state = STATE_INTEGER;
|
||||
} else if (!isspace(byte)) {
|
||||
out->type = TOKEN_TYPE_SYMBOL;
|
||||
out->symbol.buf[0] = byte;
|
||||
out->symbol.len = 1;
|
||||
state = STATE_SYMBOL;
|
||||
}
|
||||
break;
|
||||
|
||||
case STATE_INTEGER:
|
||||
assert(isdigit(byte));
|
||||
out->integer *= 10;
|
||||
out->integer += byte - '0';
|
||||
break;
|
||||
|
||||
case STATE_SYMBOL:
|
||||
assert(out->symbol.len < MAX_SYMBOL_LEN);
|
||||
out->symbol.buf[out->symbol.len++] = byte;
|
||||
break;
|
||||
|
||||
case STATE_FINISHED:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return state != STATE_INIT ? TOKEN_OK : TOKEN_FAILED;
|
||||
}
|
||||
Reference in New Issue
Block a user