From e995ffbfba3df1e44e2662a7b40c2041f8610b92 Mon Sep 17 00:00:00 2001 From: Jason Miller Date: Sat, 3 Oct 2020 20:43:10 -0400 Subject: [PATCH 1/4] Expose requires[] as start/end offsets, like es-module-lexer. --- Makefile | 2 +- include-wasm/cjs-module-lexer.h | 45 ++++++++++++++++++++++++++++++++- include/cjs-module-lexer.h | 2 +- src/lexer.c | 7 ++++- src/lexer.js | 6 +++-- test/_unit.js | 19 ++++++++++++++ 6 files changed, 75 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c22e5ea..403c3e9 100755 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ lib/lexer.wasm: include-wasm/cjs-module-lexer.h src/lexer.c @mkdir -p lib ../wasi-sdk-11.0/bin/clang src/lexer.c -I include-wasm --sysroot=../wasi-sdk-11.0/share/wasi-sysroot -o lib/lexer.wasm -nostartfiles \ -Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,--export=__heap_base,\ - --export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ree \ + --export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ree,--export=rrq,--export=rqs,--export=rqe \ -Wno-logical-op-parentheses -Wno-parentheses \ -Oz diff --git a/include-wasm/cjs-module-lexer.h b/include-wasm/cjs-module-lexer.h index 72ab1a5..9d4a6a4 100755 --- a/include-wasm/cjs-module-lexer.h +++ b/include-wasm/cjs-module-lexer.h @@ -30,6 +30,9 @@ Slice* export_write_head = NULL; Slice* first_reexport = NULL; Slice* reexport_read_head = NULL; Slice* reexport_write_head = NULL; +Slice* first_require = NULL; +Slice* require_read_head = NULL; +Slice* require_write_head = NULL; void* analysis_base; void* analysis_head; @@ -48,6 +51,9 @@ const uint16_t* sa (uint32_t utf16Len) { first_reexport = NULL; reexport_write_head = NULL; reexport_read_head = NULL; + first_require = NULL; + require_write_head = NULL; + require_read_head = NULL; return source; } @@ -72,6 +78,14 @@ uint32_t res () { uint32_t ree () { return reexport_read_head->end - source; } +// getRequireStart +uint32_t rqs () { + return require_read_head->start - source; +} +// getRequireEnd +uint32_t rqe () { + return require_read_head->end - source; +} // readExport bool re () { if (export_read_head == NULL) @@ -92,6 +106,16 @@ bool rre () { return false; return true; } +// readRequire +bool rrq () { + if (require_read_head == NULL) + require_read_head = first_require; + else + require_read_head = require_read_head->next; + if (require_read_head == NULL) + return false; + return true; +} bool parse (uint32_t point); @@ -119,9 +143,28 @@ void _addReexport (const uint16_t* start, const uint16_t* end) { reexport->end = end; reexport->next = NULL; } +void _addRequire (const uint16_t* start, const uint16_t* end) { + Slice* require = (Slice*)(analysis_head); + analysis_head = analysis_head + sizeof(Slice); + if (require_write_head == NULL) + first_require = require; + else + require_write_head->next = require; + require_write_head = require; + require->start = start; + require->end = end; + require->next = NULL; +} void (*addExport)(const uint16_t*, const uint16_t*) = &_addExport; void (*addReexport)(const uint16_t*, const uint16_t*) = &_addReexport; -bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end)); +void (*addRequire)(const uint16_t*, const uint16_t*) = &_addRequire; +bool parseCJS ( + uint16_t* source, + uint32_t sourceLen, + void (*addExport)(const uint16_t* start, const uint16_t* end), + void (*addReexport)(const uint16_t* start, const uint16_t* end), + void (*addRequire)(const uint16_t* start, const uint16_t* end) +); void tryBacktrackAddStarExportBinding (uint16_t* pos); bool tryParseRequire (bool directStarExport); diff --git a/include/cjs-module-lexer.h b/include/cjs-module-lexer.h index d83907b..edfb061 100755 --- a/include/cjs-module-lexer.h +++ b/include/cjs-module-lexer.h @@ -27,7 +27,7 @@ typedef struct StarExportBinding StarExportBinding; void bail (uint32_t err); -bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*)); +bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*addRequire)(const uint16_t*, const uint16_t*)); void tryBacktrackAddStarExportBinding (uint16_t* pos); bool tryParseRequire (bool directStarExport); diff --git a/src/lexer.c b/src/lexer.c index b0cde97..275c984 100755 --- a/src/lexer.c +++ b/src/lexer.c @@ -41,15 +41,18 @@ const StarExportBinding* STAR_EXPORT_STACK_END = &starExportStack_[MAX_STAR_EXPO void (*addExport)(const uint16_t*, const uint16_t*); void (*addReexport)(const uint16_t*, const uint16_t*); +void (*addRequire)(const uint16_t*, const uint16_t*); // Note: parsing is based on the _assumption_ that the source is already valid -bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*)) { +bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_addRequire)(const uint16_t*, const uint16_t*)) { source = _source; sourceLen = _sourceLen; if (_addExport) addExport = _addExport; if (_addReexport) addReexport = _addReexport; + if (_addRequire) + addRequire = _addRequire; templateStackDepth = 0; openTokenDepth = 0; @@ -669,6 +672,7 @@ bool tryParseRequire (bool directStarExport) { uint16_t* reexportEnd = pos++; ch = commentWhitespace(); if (ch == ')') { + addRequire(reexportStart, reexportEnd); if (directStarExport) { addReexport(reexportStart, reexportEnd); } @@ -684,6 +688,7 @@ bool tryParseRequire (bool directStarExport) { uint16_t* reexportEnd = pos++; ch = commentWhitespace(); if (ch == ')') { + addRequire(reexportStart, reexportEnd); if (directStarExport) { addReexport(reexportStart, reexportEnd); } diff --git a/src/lexer.js b/src/lexer.js index f5e01c6..e11918a 100755 --- a/src/lexer.js +++ b/src/lexer.js @@ -17,7 +17,7 @@ export function parse (source, name = '@') { if (!wasm.parseCJS(addr, source.length, 0, 0)) throw Object.assign(new Error(`Parse error ${name}${wasm.e()}:${source.slice(0, wasm.e()).split('\n').length}:${wasm.e() - source.lastIndexOf('\n', wasm.e() - 1)}`), { idx: wasm.e() }); - let exports = new Set(), reexports = new Set(); + let exports = new Set(), reexports = new Set(), requires = []; while (wasm.rre()) reexports.add(source.slice(wasm.res(), wasm.ree())); while (wasm.re()) { @@ -25,8 +25,10 @@ export function parse (source, name = '@') { if (!strictReserved.has(exptStr)) exports.add(exptStr); } + while (wasm.rrq()) + requires.push({ s: wasm.rqs(), e: wasm.rqe() }); - return { exports: [...exports], reexports: [...reexports] }; + return { exports: [...exports], reexports: [...reexports], requires }; } function copy (src, outBuf16) { diff --git a/test/_unit.js b/test/_unit.js index 0f3cf05..416317d 100755 --- a/test/_unit.js +++ b/test/_unit.js @@ -428,6 +428,25 @@ suite('Lexer', () => { assert.equal(reexports[1], './another'); }); + + test('Requires', () => { + const source = ` + const a = require("module/a"); + const b = require("./module-b.js"); + `; + const { exports, reexports, requires } = parse(source); + assert.equal(requires.length, 2); + + assert.deepEqual(requires[0], { s: 26, e: 34 }); + assert.equal(source.slice(requires[0].s, requires[0].e), `module/a`); + + assert.deepEqual(requires[1], { s: 63, e: 76 }); + assert.equal(source.slice(requires[1].s, requires[1].e), `./module-b.js`); + + assert.equal(exports.length, 0); + assert.equal(reexports.length, 0); + }); + test('Single parse cases', () => { parse(`'asdf'`); parse(`/asdf/`); From eab3372b3037dc543f40ee3d59db9049ca06ceec Mon Sep 17 00:00:00 2001 From: Jason Miller Date: Sat, 3 Oct 2020 20:43:18 -0400 Subject: [PATCH 2/4] (js version too!) --- lexer.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lexer.js b/lexer.js index 9b172bf..1ab966d 100755 --- a/lexer.js +++ b/lexer.js @@ -11,7 +11,8 @@ let openTokenDepth, starExportMap, lastStarExportSpecifier, _exports, - reexports; + reexports, + requires; function resetState () { openTokenDepth = 0; @@ -28,6 +29,7 @@ function resetState () { _exports = new Set(); reexports = new Set(); + requires = []; } const strictReserved = new Set(['implements', 'interface', 'let', 'package', 'private', 'protected', 'public', 'static', 'yield', 'enum']); @@ -42,7 +44,7 @@ module.exports = function parseCJS (source, name = '@') { e.loc = pos; throw e; } - const result = { exports: [..._exports], reexports: [...reexports] }; + const result = { exports: [..._exports], reexports: [...reexports], requires }; resetState(); return result; } @@ -645,6 +647,7 @@ function tryParseRequire (directStarExport) { const reexportEnd = pos++; ch = commentWhitespace(); if (ch === 41/*)*/) { + requires.push({ s: reexportStart, e: reexportEnd }); if (directStarExport) { reexports.add(source.slice(reexportStart, reexportEnd)); } @@ -659,6 +662,7 @@ function tryParseRequire (directStarExport) { const reexportEnd = pos++; ch = commentWhitespace(); if (ch === 41/*)*/) { + requires.push({ s: reexportStart, e: reexportEnd }); if (directStarExport) { reexports.add(source.slice(reexportStart, reexportEnd)); } From b3ba4893df2578e45a34dbbe73bf091d636e8937 Mon Sep 17 00:00:00 2001 From: Jason Miller Date: Sat, 3 Oct 2020 20:54:29 -0400 Subject: [PATCH 3/4] One more test --- test/_unit.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/_unit.js b/test/_unit.js index 416317d..664f929 100755 --- a/test/_unit.js +++ b/test/_unit.js @@ -17,7 +17,7 @@ suite('Lexer', () => { beforeEach(async () => await loadParser()); test('TypeScript reexports', () => { - var { exports, reexports } = parse(` + var { exports, reexports, requires } = parse(` "use strict"; function __export(m) { for (var p in m) if (!exports.hasOwnProperty(p)) exports[p] = m[p]; @@ -35,6 +35,12 @@ suite('Lexer', () => { assert.equal(reexports[1], 'external2'); assert.equal(reexports[2], 'external3'); assert.equal(reexports[3], 'external4'); + + assert.equal(requires.length, 4); + assert.deepEqual(requires[0], { s: 229, e: 238 }); + assert.deepEqual(requires[1], { s: 273, e: 282 }); + assert.deepEqual(requires[2], { s: 315, e: 324 }); + assert.deepEqual(requires[3], { s: 364, e: 373 }); }); test('Rollup Babel reexports', () => { From dd3d34344f6272222ec2f9f28cf9c0533f72c115 Mon Sep 17 00:00:00 2001 From: Jason Miller Date: Sat, 10 Oct 2020 10:36:06 -0400 Subject: [PATCH 4/4] Include quotes in s/e Co-authored-by: Guy Bedford --- lexer.js | 4 ++-- src/lexer.c | 4 ++-- test/_unit.js | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lexer.js b/lexer.js index e97add9..489202b 100755 --- a/lexer.js +++ b/lexer.js @@ -656,7 +656,7 @@ function tryParseRequire (requireType) { const reexportEnd = pos++; ch = commentWhitespace(); if (ch === 41/*)*/) { - requires.push({ s: reexportStart, e: reexportEnd }); + requires.push({ s: reexportStart - 1, e: reexportEnd + 1 }); switch (requireType) { case ExportAssign: lastExportsAssignSpecifier = source.slice(reexportStart, reexportEnd); @@ -675,7 +675,7 @@ function tryParseRequire (requireType) { const reexportEnd = pos++; ch = commentWhitespace(); if (ch === 41/*)*/) { - requires.push({ s: reexportStart, e: reexportEnd }); + requires.push({ s: reexportStart - 1, e: reexportEnd + 1 }); switch (requireType) { case ExportAssign: lastExportsAssignSpecifier = source.slice(reexportStart, reexportEnd); diff --git a/src/lexer.c b/src/lexer.c index 6fefe17..974c0f5 100755 --- a/src/lexer.c +++ b/src/lexer.c @@ -679,7 +679,7 @@ bool tryParseRequire (enum RequireType requireType) { uint16_t* reexportEnd = pos++; ch = commentWhitespace(); if (ch == ')') { - addRequire(reexportStart, reexportEnd); + addRequire(reexportStart - 1, reexportEnd + 1); switch (requireType) { case ExportStar: addReexport(reexportStart, reexportEnd); @@ -700,7 +700,7 @@ bool tryParseRequire (enum RequireType requireType) { uint16_t* reexportEnd = pos++; ch = commentWhitespace(); if (ch == ')') { - addRequire(reexportStart, reexportEnd); + addRequire(reexportStart - 1, reexportEnd + 1); switch (requireType) { case ExportStar: addReexport(reexportStart, reexportEnd); diff --git a/test/_unit.js b/test/_unit.js index 3892e89..d376ac3 100755 --- a/test/_unit.js +++ b/test/_unit.js @@ -37,10 +37,10 @@ suite('Lexer', () => { assert.equal(reexports[3], 'external4'); assert.equal(requires.length, 4); - assert.deepEqual(requires[0], { s: 229, e: 238 }); - assert.deepEqual(requires[1], { s: 273, e: 282 }); - assert.deepEqual(requires[2], { s: 315, e: 324 }); - assert.deepEqual(requires[3], { s: 364, e: 373 }); + assert.deepEqual(requires[0], { s: 228, e: 239 }); + assert.deepEqual(requires[1], { s: 272, e: 283 }); + assert.deepEqual(requires[2], { s: 314, e: 325 }); + assert.deepEqual(requires[3], { s: 363, e: 374 }); }); test('Rollup Babel reexports', () => { @@ -442,11 +442,11 @@ suite('Lexer', () => { const { exports, reexports, requires } = parse(source); assert.equal(requires.length, 2); - assert.deepEqual(requires[0], { s: 26, e: 34 }); - assert.equal(source.slice(requires[0].s, requires[0].e), `module/a`); + assert.deepEqual(requires[0], { s: 25, e: 35 }); + assert.equal(source.slice(requires[0].s, requires[0].e), `"module/a"`); - assert.deepEqual(requires[1], { s: 63, e: 76 }); - assert.equal(source.slice(requires[1].s, requires[1].e), `./module-b.js`); + assert.deepEqual(requires[1], { s: 62, e: 77 }); + assert.equal(source.slice(requires[1].s, requires[1].e), `"./module-b.js"`); assert.equal(exports.length, 0); assert.equal(reexports.length, 0);