const HEADER = [ 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00 ]; const LINE_END = "\n" class Tokenizer { constructor() { this.delims = new Set([" ", "\r", "\n", "\t"]); this.skips = new Set([" ", "\r", "\t"]); this.comment_start = ";"; this.string_quote = '"'; this.buffer = []; this.comment = false; this.string = false; } skip() { const idx = this.buffer.findIndex((cp) => !this.skips.has(cp)); this.buffer = idx == -1 ? [] : this.buffer.slice(idx); } next_string() { const idx = this.buffer.findIndex((cp) => cp == this.string_quote); if (idx == -1) { this.string = true; } else { const string = this.buffer.slice(0, idx).join(""); this.buffer = this.buffer.slice(idx + 1); this.string = false; return { string: string }; } } next() { if (this.string) return this.next_string(); this.skip(); if (this.buffer[0] == LINE_END) return this.buffer.shift(); if (this.buffer[0] == this.string_quote) { this.buffer.shift(); return this.next_string(); } const idx = this.buffer.findIndex((cp) => this.delims.has(cp)); if (idx != -1) { const token = this.buffer.slice(0, idx).join(""); this.buffer = this.buffer.slice(idx); return token; } } *handle(src) { this.buffer.push(...src); let token; while (token = this.next()) { if (token == this.comment_start) this.comment = true; else if (this.comment && token == LINE_END) this.comment = false; else if (!this.comment) yield token; } } } const State = Object.freeze({ TOP: 0, EXPORT: 1, FUNC: 2, RESULT: 3, PARAM_NAME: 4, PARAM_TYPE: 5, LOCAL_NAME: 6, LOCAL_TYPE: 7, MEM_NAME: 8, MEM_INITIAL: 9, MEM_MAX: 10, MEM_FLAGS: 11, IMPORT_NAME: 12, IMPORT_MOD: 13, IMPORT_FIELD: 14, GLOBAL_NAME: 15, GLOBAL_TYPE: 16, GLOBAL_INIT: 17, AT_MEM: 18, AT_ADDR: 19, BYTE: 20, WORD: 21, UTF8: 22, ALIGN: 23, DEF_NAME: 24, DEF_VALUE: 25, }); const Action = Object.freeze({ APPEND: 0, EXPORT: 1, FUNC: 2, RESULT: 3, PARAM: 4, SYMBOL: 5, LOCAL: 6, MEM: 7, IMPORT: 8, GLOBAL: 9, AT: 10, DATA: 11, ALIGN: 12, DEF: 13, }); const types = { "func": 0x60, "i32": 0x7f, }; const opcodes = { "end": 0x0b, "local.get": 0x20, "local.set": 0x21, "local.tee": 0x22, "global.get": 0x23, "global.set": 0x24, "i32.const": 0x41, "i32.mul": 0x6c, }; const mem_flags = { "max": 1, "shared": 2, "64": 4, }; const const_opcodes = { [types["i32"]]: opcodes["i32.const"], }; class Parser { constructor(encoder) { this.encoder = encoder; this.tokens = []; this.tokenizer = new Tokenizer(); this.state = State.TOP; this.directives = { ".export": State.EXPORT, ".func": State.FUNC, ".result": State.RESULT, ".param": State.PARAM_NAME, ".local": State.LOCAL_NAME, ".mem": State.MEM_NAME, ".import": State.IMPORT_NAME, ".global": State.GLOBAL_NAME, ".at": State.AT_MEM, ".byte": State.BYTE, ".word": State.WORD, ".utf8": State.UTF8, ".align": State.ALIGN, ".def": State.DEF_NAME, }; this.handlers = { [State.TOP]: (token) => this.token_top(token), [State.EXPORT]: (token) => this.token_export(token), [State.FUNC]: (token) => this.token_func(token), [State.RESULT]: (token) => this.token_result(token), [State.PARAM_NAME]: (token) => this.token_param_name(token), [State.PARAM_TYPE]: (token) => this.token_param_type(token), [State.LOCAL_NAME]: (token) => this.token_local_name(token), [State.LOCAL_TYPE]: (token) => this.token_local_type(token), [State.MEM_NAME]: (token) => this.token_mem_name(token), [State.MEM_INIT]: (token) => this.token_mem_init(token), [State.MEM_MAX]: (token) => this.token_mem_max(token), [State.MEM_FLAGS]: (token) => this.token_mem_flags(token), [State.IMPORT_NAME]: (token) => this.token_import_name(token), [State.IMPORT_MOD]: (token) => this.token_import_mod(token), [State.IMPORT_FIELD]: (token) => this.token_import_field(token), [State.GLOBAL_NAME]: (token) => this.token_global_name(token), [State.GLOBAL_TYPE]: (token) => this.token_global_type(token), [State.GLOBAL_INIT]: (token) => this.token_global_init(token), [State.AT_MEM]: (token) => this.token_at_mem(token), [State.AT_ADDR]: (token) => this.token_at_addr(token), [State.BYTE]: (token) => this.token_byte(token), [State.WORD]: (token) => this.token_word(token), [State.UTF8]: (token) => this.token_utf8(token), [State.ALIGN]: (token) => this.token_align(token), [State.DEF_NAME]: (token) => this.token_def_name(token), [State.DEF_VALUE]: (token) => this.token_def_value(token), }; this.results = []; this.params = {}; this.locals = {}; } integer(token) { let base; switch (token.slice(-1)) { case "b": base = 2; break; case "o": base = 8; break; case "h": base = 16; break; default: base = 10; break; } const x = parseInt(token, base); return Number.isNaN(x) ? null : x; } translate_code(token) { return opcodes[token] ?? this.integer(token); } translate_type(token) { return types[token]; } token_top(token) { if (token == LINE_END) return; let state; if (state = this.directives[token]) { this.state = state; return; } const code = this.translate_code(token); if (code) return { type: Action.APPEND, code }; else return { type: Action.SYMBOL, symbol: token }; } token_export(token) { this.state = State.TOP; return { type: Action.EXPORT, name: token }; } token_func(token) { this.state = State.TOP; return { type: Action.FUNC, name: token }; } token_result(token) { if (token == LINE_END) { const action = { type: Action.RESULT, results: this.results }; this.state = State.TOP; this.results = []; return action; } else { this.results.push(this.translate_type(token)); } } token_param_name(token) { if (token == LINE_END) { const action = { type: Action.PARAM, params: this.params }; this.state = State.TOP; this.params = {}; return action; } else { this.current_param = token; this.state = State.PARAM_TYPE; } } token_param_type(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected newline in .param: expected type"); this.state = State.TOP; this.params = {}; } else { this.params[this.current_param] = types[token]; this.current_param = undefined; this.state = State.PARAM_NAME; } } token_local_name(token) { if (token == LINE_END) { const action = { type: Action.LOCAL, locals: this.locals }; this.state = State.TOP; this.locals = {}; return action; } else { this.current_local = token; this.state = State.LOCAL_TYPE; } } token_local_type(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected newline in .local: expected type"); this.state = State.TOP; this.locals = {}; } else { this.locals[this.current_local] = types[token]; this.current_local = undefined; this.state = State.LOCAL_NAME; } } token_mem_name(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected newline in .mem: expected name"); this.state = State.TOP; } else { this.mem = { flags: 0 }; this.mem_name = token; this.state = State.MEM_INIT; } } token_mem_init(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected newline in .mem: expected initial size"); this.mem = undefined; this.mem_name = undefined; this.state = State.TOP; } else { this.mem.init = this.integer(token) ?? console.error( `ERROR: Invalid initial size ${token} in .mem`); this.state = State.MEM_MAX; } } token_mem_max(token) { if (token == LINE_END) { return this.mem_action(); } else { this.mem.max = this.integer(token) ?? console.error( `ERROR: Invalid maximum size ${token} in .mem`); this.mem.flags |= mem_flags.max; this.state = State.MEM_FLAGS; } } token_mem_flags(token) { if (token == LINE_END) { return this.mem_action(); } else { for (const flag of token.split(",")) { this.mem.flags |= mem_flags[flag] ?? console.error( `ERROR: Invalid flag ${flag} in .mem`); } this.state = State.TOP; return this.mem_action(); } } token_import_name(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected end of line in .import: expected name"); this.state = State.TOP; } else { this.import = { name: token }; this.state = State.IMPORT_MOD; } } token_import_mod(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected end of line in .import: expected name"); this.import = undefined; this.state = State.TOP; } else if (token.string == undefined) { console.error( `ERROR: Unexpected token ${token} in .import: expected` + " module string"); this.import = undefined; this.state = State.TOP; } else { this.import.mod = token.string; this.state = State.IMPORT_FIELD; } } token_import_field(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected end of line in .import: expected name"); this.import = undefined; this.state = State.TOP; } else if (token.string == undefined) { console.error( "ERROR: Unexpected token in .import: expected field string"); this.import = undefined; this.state = State.TOP; } else { this.import.field = token.string; const action = { type: Action.IMPORT, import: this.import }; this.import = undefined; this.state = State.TOP; return action; } } token_global_name(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected end of line in .global: expected name"); this.state = State.TOP; } else { this.global = {}; this.global_name = token; this.state = State.GLOBAL_TYPE; } } token_global_type(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected newline in .global: expected type"); this.global = undefined; this.global_name = undefined; this.state = State.TOP; } else { this.global.type = types[token] ?? console.error( `ERROR: Unexpected token ${token} in .global: ` + "expected type"); this.state = State.GLOBAL_INIT; } } token_global_init(token) { if (token == LINE_END) { console.error( "ERROR: Unexpected newline in .global: expected" + " initial value"); this.global = undefined; this.global_name = undefined; this.state = State.TOP; } else { const value = this.integer(token) ?? console.error( `ERROR: Unexpected token ${token} in .global: expected` + " initial value"); const const_opcode = const_opcodes[this.global.type]; this.global.init = [ const_opcode, value, opcodes["end"] ]; const action = { type: Action.GLOBAL, global: { [this.global_name]: this.global } }; this.global = undefined; this.global_name = undefined; this.state = State.TOP; return action; } } token_at_mem(token) { this.at = { mem: token }; this.state = State.AT_ADDR; } token_at_addr(token) { const value = this.integer(token); if (value == null) { console.error( `ERROR: Unexpected token ${token} in .mem: ` + "expected address"); this.at = undefined; return; } this.at.addr = value; const action = { type: Action.AT, at: this.at }; this.at = undefined; this.state = State.TOP; return action; } token_byte(token) { if (token == LINE_END) { this.state = State.TOP; return; } const action = { type: Action.DATA, size: 1 }; const value = this.integer(token); if (value == null) { console.error( `ERROR: Unexpected token ${token}, expected value`); return; } else { if (value > 0xff) console.error(`WARNING: Value ${token} is truncated`); action.value = [ value & 0xff ]; } return action; } token_word(token) { if (token == LINE_END) { this.state = State.TOP; return; } const action = { type: Action.DATA, size: 2 }; const value = this.integer(token); if (value == null) { console.error( `ERROR: Unexpected token ${token}, expected value`); return; } else { if (value > 0xffff) console.error(`WARNING: Value ${token} is truncated`); action.value = [ value & 0xff, (value >> 8) & 0xff ]; } return action; } token_utf8(token) { if (token == LINE_END) { this.state = State.TOP; return; } else if (token.string == undefined) { console.error( `ERROR: Unexpected token ${token}, expected string`); return; } const value = this.encoder.encode(token.string); const action = { type: Action.DATA, size: value.length, value }; return action; } token_align(token) { const action = { type: Action.ALIGN }; if (token == LINE_END) { action.alignment = 4; } else { action.alignment = this.integer(token); if (action.alignment == null) { console.error( `ERROR: Unexpected token ${token}, expected alignment`); this.state = State.TOP; return action; } } this.state = State.TOP return action; } token_def_name(token) { if (token == LINE_END) { console.error("ERROR: Unexpected end of line, expected name"); this.state = State.TOP; return; } this.def_name = token; this.state = State.DEF_VALUE; } token_def_value(token) { if (token == LINE_END) { console.error("ERROR: Unexpected end of line, expected value"); this.def_name = undefined; this.state = State.TOP; return; } const value = this.integer(token); if (value == null) { console.error( `ERROR: Unexpected token ${token}, expected value`); this.def_name = undefined; this.state = State.TOP; return; } const action = { type: Action.DEF, def: { name: this.def_name, value }, }; this.def_name = undefined; this.state = State.TOP; return action; } mem_action() { const action = { type: Action.MEM, mem: { [this.mem_name]: { ...this.mem } } }; this.mem = undefined; this.mem_name = undefined; this.state = State.TOP; return action; } *handle(src) { let action; for (const token of this.tokenizer.handle(src)) { if (action = this.handlers[this.state](token)) yield action; } } } const Section = Object.freeze({ TYPE: 0x01, IMPORT: 0x02, FUNC: 0x03, MEM: 0x05, GLOBAL: 0x06, EXPORT: 0x07, CODE: 0x0a, DATA: 0x0b, }); const Kind = Object.freeze({ FUNC: 0x00, MEM: 0x02, }); export class Assembler { constructor() { this.encoder = new TextEncoder("utf-8"); this.decoder = new TextDecoder("utf-8"); this.parser = new Parser(this.encoder); this.handlers = { [Action.APPEND]: (action) => this.action_append(action), [Action.EXPORT]: (action) => this.action_export(action), [Action.FUNC]: (action) => this.action_func(action), [Action.RESULT]: (action) => this.action_result(action), [Action.PARAM]: (action) => this.action_param(action), [Action.SYMBOL]: (action) => this.action_symbol(action), [Action.LOCAL]: (action) => this.action_local(action), [Action.MEM]: (action) => this.action_mem(action), [Action.IMPORT]: (action) => this.action_import(action), [Action.GLOBAL]: (action) => this.action_global(action), [Action.AT]: (action) => this.action_at(action), [Action.DATA]: (action) => this.action_data(action), [Action.ALIGN]: (action) => this.action_align(action), [Action.DEF]: (action) => this.action_def(action), }; this.exports = []; this.funcs = {}; this.mems = {}; this.imports = []; this.globals = {}; this.pos = { mem: 0, addr: 0 }; this.data = []; this.defs = {}; } action_append(action) { this.funcs[this.current_func].body.push(action.code); } action_export(action) { const index = Object.keys(this.funcs).indexOf(action.name); this.exports[action.name] = { kind: Kind.FUNC, index }; } action_func(action) { this.funcs[action.name] = { params: {}, results: [], locals: {}, body: [], } this.current_func = action.name; } action_result(action) { this.funcs[this.current_func].results.push(...action.results); } action_param(action) { Object.assign(this.funcs[this.current_func].params, action.params); } action_local(action) { Object.assign(this.funcs[this.current_func].locals, action.locals); } action_symbol(action) { const func = this.funcs[this.current_func]; const index = this.lookup_param(func, action.symbol) ?? this.lookup_local(func, action.symbol) ?? this.lookup_global(action.symbol); if (index == null) { console.error(`ERROR: Unable to resolve symbol {action.symbol}`); index = 0; } func.body.push(index); } action_mem(action) { Object.assign(this.mems, action.mem); } action_import(action) { const mem = this.mems[action.import.name]; mem.imported = true; this.imports.push({ mod: action.import.mod, field: action.import.field, kind: Kind.MEM, flags: mem.flags, init: mem.init, max: mem.max, }) } action_global(action) { Object.assign(this.globals, action.global); } action_at(action) { const mem = Object.keys(this.mems).indexOf(action.at.mem); if (mem == -1) { console.error(`ERROR: No memory named {action.at.mem}`); return; } this.pos.mem = mem; this.pos.addr = action.at.addr; this.data.push({ loc: { ...this.pos }, data: [] }) } action_data(action) { const data = this.data.at(-1).data; data.push(...action.value); this.pos.addr += action.size; } action_align(action) { const alignment = action.alignment; const data = this.data.at(-1).data; while (this.pos.addr % alignment != 0) { data.push(0); ++this.pos.addr; } } action_def(action) { this.defs[action.def.name] = action.def.value; } push(chunk) { const text = this.decoder.decode(chunk, { stream: true }); for (const action of this.parser.handle(text)) this.handlers[action.type](action); } lookup_param(func, symbol) { const index = Object.keys(func.params).indexOf(symbol); return index == -1 ? null : index; } lookup_local(func, symbol) { const param_count = Object.entries(func.params).length; const index = param_count + Object.keys(func.locals).indexOf(symbol); return index == -1 ? null : index; } lookup_global(symbol) { const index = Object.keys(this.globals).indexOf(symbol); return index == -1 ? null : index; } wasm_section_type() { const funcs = Object.values(this.funcs); const contents = funcs.map(({ params, results }) => { const param_types = Object.values(params); return [ types["func"], param_types.length, ...param_types, results.length, ...results, ]; }); return [ contents.length ].concat(...contents); } wasm_section_import() { if (this.imports.length == 0) return null; const contents = this.imports.map((imp) => { const mod_utf8 = this.encoder.encode(imp.mod); const field_utf8 = this.encoder.encode(imp.field); return [ mod_utf8.length, ...mod_utf8, field_utf8.length, ...field_utf8, imp.kind, ...this.mem_wasm(imp), ]; }); return [ this.imports.length ].concat(...contents); } wasm_section_func() { const func_count = Object.entries(this.funcs).length; return [ func_count, ...Array(func_count).keys() ]; } wasm_section_mem() { const mems = Object.values(this.mems).filter( ({imported}) => !imported); if (mems.length == 0) return null; const contents = mems.map((mem) => this.mem_wasm(mem)); return [ mems.length ].concat(...contents); } wasm_section_global() { const globals = Object.values(this.globals); if (globals.length == 0) return null; const contents = globals.map( ({ type, init }) => [ type, 1, ...init ]); return [ globals.length ].concat(...contents); } wasm_section_export() { const exports = Object.entries(this.exports); const contents = exports.map(([ name, { kind, index }]) => { const name_utf8 = this.encoder.encode(name); return [ name_utf8.length, ...name_utf8, kind, index, ]; }); return [ exports.length ].concat(...contents); } wasm_section_code() { const funcs = Object.values(this.funcs); const contents = funcs.map(({ body, locals }) => { const local_types = Object.values(locals); const local_count = local_types.length; if (local_count == 0) { return [ body.length + 2, 0, ...body, opcodes["end"] ] } else { return [ body.length + local_count + 3, local_count, local_count, ...local_types, ...body, opcodes["end"] ]; } }); return [ contents.length ].concat(...contents); } wasm_section_data() { if (this.data.length == 0) return null; const contents = this.data.map(({ loc, data }) => { return [ ...(loc.mem == 0 ? [ 0 ] : [ 2, loc.mem ]), opcodes["i32.const"], loc.addr, opcodes["end"], data.length, ...data, ] }); return [ contents.length ].concat(...contents); } wasm() { const template = [ [ Section.TYPE, () => this.wasm_section_type() ], [ Section.IMPORT, () => this.wasm_section_import() ], [ Section.FUNC, () => this.wasm_section_func() ], [ Section.MEM, () => this.wasm_section_mem() ], [ Section.GLOBAL, () => this.wasm_section_global() ], [ Section.EXPORT, () => this.wasm_section_export() ], [ Section.CODE, () => this.wasm_section_code() ], [ Section.DATA, () => this.wasm_section_data() ], ]; const sections = template.map(([ code, generator ]) => { const body = generator(); return body == null ? [] : [ code, body.length, ...body ]; }); return new Uint8Array(HEADER.concat(...sections)); } mem_wasm({ flags, init, max }) { if (flags & mem_flags.max) return [ flags, init, max ]; else return [ flags, init ]; } }