From 5f93a7a5798a3d4b4539d17544f2b52b405a65a0 Mon Sep 17 00:00:00 2001 From: Guy Bedford Date: Wed, 28 Apr 2021 20:19:00 +0200 Subject: [PATCH 1/5] support string encodings --- lexer.js | 46 ++++++++++++++++++++++++++++------------------ lib/lexer.wasm | Bin 24123 -> 24116 bytes lib/lexer.wat | 24 ++++++++++-------------- src/lexer.c | 12 ++++++------ src/lexer.js | 20 ++++++++++++++++---- test/_unit.js | 4 ++++ 6 files changed, 64 insertions(+), 42 deletions(-) diff --git a/lexer.js b/lexer.js index 286a170..107f0b1 100755 --- a/lexer.js +++ b/lexer.js @@ -47,13 +47,23 @@ function parseCJS (source, name = '@') { e.loc = pos; throw e; } - const result = { exports: [..._exports].filter(expt => !unsafeGetters.has(expt)), reexports: [...reexports] }; + const result = { exports: [..._exports].filter(expt => expt !== undefined && !unsafeGetters.has(expt)), reexports: [...reexports].filter(reexpt => reexpt !== undefined) }; resetState(); return result; } -function addExport (name) { - _exports.add(name); +function decode (name) { + if (name[0] === '"' || name[0] === "'") { + try { + return (0, eval)(name); + } + catch (e) { + console.log(e); + } + } + else { + return name; + } } function parseSource (cjsSource) { @@ -354,7 +364,7 @@ function tryParseObjectDefineOrKeys (keys) { pos += 5; ch = commentWhitespace(); if (ch !== 58/*:*/) break; - addExport(expt); + _exports.add(decode(expt)); pos = revertPos; return; } @@ -420,13 +430,13 @@ function tryParseObjectDefineOrKeys (keys) { pos++; ch = commentWhitespace(); if (ch !== 41/*)*/) break; - addExport(expt); + _exports.add(decode(expt)); return; } break; } if (expt) { - unsafeGetters.add(expt); + unsafeGetters.add(decode(expt)); } } else if (keys && ch === 107/*k*/ && source.startsWith('eys', pos + 1)) { @@ -794,7 +804,7 @@ function tryParseObjectDefineOrKeys (keys) { const starExportSpecifier = starExportMap[id]; if (starExportSpecifier) { - reexports.add(starExportSpecifier); + reexports.add(decode(starExportSpecifier)); pos = revertPos; return; } @@ -856,7 +866,7 @@ function tryParseExportsDotAssign (assign) { const endPos = pos; ch = commentWhitespace(); if (ch === 61/*=*/) { - addExport(source.slice(startPos, endPos)); + _exports.add(decode(source.slice(startPos, endPos))); return; } } @@ -867,16 +877,16 @@ function tryParseExportsDotAssign (assign) { pos++; ch = commentWhitespace(); if (ch === 39/*'*/ || ch === 34/*"*/) { - pos++; const startPos = pos; + pos++; stringLiteral(ch); - const endPos = pos++; + const endPos = ++pos; ch = commentWhitespace(); if (ch !== 93/*]*/) break; pos++; ch = commentWhitespace(); if (ch !== 61/*=*/) break; - addExport(source.slice(startPos, endPos)); + _exports.add(decode(source.slice(startPos, endPos))); } break; } @@ -911,21 +921,21 @@ function tryParseRequire (requireType) { if (ch === 40/*(*/) { pos++; ch = commentWhitespace(); - const reexportStart = pos + 1; + const reexportStart = pos; if (ch === 39/*'*/ || ch === 34/*"*/) { stringLiteral(ch); - const reexportEnd = pos++; + const reexportEnd = ++pos; ch = commentWhitespace(); if (ch === 41/*)*/) { switch (requireType) { case ExportAssign: - reexports.add(source.slice(reexportStart, reexportEnd)); + reexports.add(decode(source.slice(reexportStart, reexportEnd))); return true; case ExportStar: - reexports.add(source.slice(reexportStart, reexportEnd)); + reexports.add(decode(source.slice(reexportStart, reexportEnd))); return true; default: - lastStarExportSpecifier = source.slice(reexportStart, reexportEnd); + lastStarExportSpecifier = decode(source.slice(reexportStart, reexportEnd)); return true; } } @@ -954,7 +964,7 @@ function tryParseLiteralExports () { } ch = source.charCodeAt(pos); } - addExport(source.slice(startPos, endPos)); + _exports.add(decode(source.slice(startPos, endPos))); } else if (ch === 46/*.*/ && source.startsWith('..', pos + 1)) { pos += 3; @@ -981,7 +991,7 @@ function tryParseLiteralExports () { return; } ch = source.charCodeAt(pos); - addExport(source.slice(startPos, endPos)); + _exports.add(decode(source.slice(startPos, endPos))); } } else { diff --git a/lib/lexer.wasm b/lib/lexer.wasm index 83e000d372643b53622df3fe0c3738bbb2099b09..cfebb0a80c00a55b03207d485993502e6be0ac29 100755 GIT binary patch delta 175 zcmdnJhjGgu#tkiuj4w8~GV*aTexE#ryD7|(DNBjjjA_pTMget4O?O@fCIEb1{#E%;cYn+=5I3 lGL8%&ai9$#yO=gF*N|nGQDOpez}6}-ps8Tq{6Ifi4**(L73Gi7aFpdrgX*`Hs61>}y+H}#|S08zvv3IG5A diff --git a/lib/lexer.wat b/lib/lexer.wat index 9988dc9..8857c5a 100644 --- a/lib/lexer.wat +++ b/lib/lexer.wat @@ -1222,32 +1222,28 @@ i32.const 0 i32.const 0 i32.load offset=20540 - local.tee 3 i32.const 2 i32.add + local.tee 3 i32.store offset=20540 call 39 i32.const 41 i32.ne br_if 0 (;@2;) - local.get 4 - i32.const 2 - i32.add - local.set 1 block ;; label = @3 local.get 0 i32.const -1 i32.add - local.tee 2 + local.tee 1 i32.const 1 i32.gt_u br_if 0 (;@3;) block ;; label = @4 block ;; label = @5 - local.get 2 + local.get 1 br_table 1 (;@4;) 0 (;@5;) 1 (;@4;) end - local.get 1 + local.get 4 local.get 3 i32.const 0 i32.load offset=4000 @@ -1255,7 +1251,7 @@ i32.const 1 return end - local.get 1 + local.get 4 local.get 3 i32.const 0 i32.load offset=4000 @@ -1265,7 +1261,7 @@ end i32.const 0 i32.load offset=20532 - local.get 1 + local.get 4 i32.store i32.const 0 i32.load offset=20532 @@ -4490,18 +4486,18 @@ i32.const 0 i32.const 0 i32.load offset=20540 + local.tee 0 i32.const 2 i32.add - local.tee 0 i32.store offset=20540 local.get 2 call 28 i32.const 0 i32.const 0 i32.load offset=20540 - local.tee 2 i32.const 2 i32.add + local.tee 2 i32.store offset=20540 call 39 i32.const 93 @@ -5386,18 +5382,18 @@ i32.const 0 i32.const 0 i32.load offset=20540 + local.tee 2 i32.const 2 i32.add - local.tee 2 i32.store offset=20540 local.get 0 call 28 i32.const 0 i32.const 0 i32.load offset=20540 - local.tee 3 i32.const 2 i32.add + local.tee 3 i32.store offset=20540 call 39 local.tee 0 diff --git a/src/lexer.c b/src/lexer.c index d7a84d8..c22cbed 100755 --- a/src/lexer.c +++ b/src/lexer.c @@ -891,10 +891,10 @@ void tryParseExportsDotAssign (bool assign) { pos++; ch = commentWhitespace(); if (ch == '\'' || ch == '"') { - pos++; uint16_t* startPos = pos; + pos++; stringLiteral(ch); - uint16_t* endPos = pos++; + uint16_t* endPos = ++pos; ch = commentWhitespace(); if (ch != ']') break; pos++; @@ -934,10 +934,10 @@ bool tryParseRequire (enum RequireType requireType) { if (ch == '(') { pos++; ch = commentWhitespace(); - uint16_t* reexportStart = pos + 1; + uint16_t* reexportStart = pos; if (ch == '\'' || ch == '"') { stringLiteral(ch); - uint16_t* reexportEnd = pos++; + uint16_t* reexportEnd = ++pos; ch = commentWhitespace(); if (ch == ')') { switch (requireType) { @@ -981,9 +981,9 @@ void tryParseLiteralExports () { addExport(startPos, endPos); } else if (ch == '\'' || ch == '"') { - uint16_t* startPos = ++pos; + uint16_t* startPos = pos++; stringLiteral(ch); - uint16_t* endPos = pos++; + uint16_t* endPos = ++pos; ch = commentWhitespace(); if (ch == ':') { pos++; diff --git a/src/lexer.js b/src/lexer.js index 0a47e50..a09d95b 100755 --- a/src/lexer.js +++ b/src/lexer.js @@ -22,18 +22,30 @@ export function parse (source, name = '@') { let exports = new Set(), reexports = new Set(), unsafeGetters = new Set(); while (wasm.rre()) - reexports.add(source.slice(wasm.res(), wasm.ree())); + reexports.add(decode(source.slice(wasm.res(), wasm.ree()))); while (wasm.ru()) - unsafeGetters.add(source.slice(wasm.us(), wasm.ue())); + unsafeGetters.add(decode(source.slice(wasm.us(), wasm.ue()))); while (wasm.re()) { - let exptStr = source.slice(wasm.es(), wasm.ee()); - if (!unsafeGetters.has(exptStr)) + let exptStr = decode(source.slice(wasm.es(), wasm.ee())); + if (exptStr !== undefined && !unsafeGetters.has(exptStr)) exports.add(exptStr); } return { exports: [...exports], reexports: [...reexports] }; } +function decode (str) { + if (str[0] === '"' || str[0] === '\'') { + try { + return (0, eval)(str); + } + catch {} + } + else { + return str; + } +} + function copyBE (src, outBuf16) { const len = src.length; let i = 0; diff --git a/test/_unit.js b/test/_unit.js index 50a9678..01ccdf0 100755 --- a/test/_unit.js +++ b/test/_unit.js @@ -479,6 +479,8 @@ suite('Lexer', () => { const { exports } = parse(` module.exports = { 'ab cd': foo }; exports['not identifier'] = 'asdf'; + exports['\u{D83C}\u{DF10}'] = 1; + exports['\u{D83C}'] = 1; exports['@notidentifier'] = 'asdf'; Object.defineProperty(exports, "%notidentifier", { value: x }); Object.defineProperty(exports, 'hm🤔', { value: x }); @@ -490,6 +492,8 @@ suite('Lexer', () => { assert.deepStrictEqual(exports, [ 'ab cd', 'not identifier', + '\u{D83C}\u{DF10}', + '\u{D83C}', '@notidentifier', '%notidentifier', 'hm🤔', From 5064ba6e73a20428e51a433bf5b03136b1e49b06 Mon Sep 17 00:00:00 2001 From: Guy Bedford Date: Wed, 28 Apr 2021 20:22:38 +0200 Subject: [PATCH 2/5] unlog --- lexer.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lexer.js b/lexer.js index 107f0b1..8a48e9d 100755 --- a/lexer.js +++ b/lexer.js @@ -57,9 +57,7 @@ function decode (name) { try { return (0, eval)(name); } - catch (e) { - console.log(e); - } + catch {} } else { return name; From ab394fe950738b42862e89cffb7001ca8843a2c7 Mon Sep 17 00:00:00 2001 From: Guy Bedford Date: Wed, 28 Apr 2021 20:27:35 +0200 Subject: [PATCH 3/5] fixup undefined checks --- src/lexer.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lexer.js b/src/lexer.js index a09d95b..675f9d9 100755 --- a/src/lexer.js +++ b/src/lexer.js @@ -21,8 +21,11 @@ export function parse (source, name = '@') { let exports = new Set(), reexports = new Set(), unsafeGetters = new Set(); - while (wasm.rre()) - reexports.add(decode(source.slice(wasm.res(), wasm.ree()))); + while (wasm.rre()) { + const reexptStr = decode(source.slice(wasm.res(), wasm.ree())); + if (reexptStr) + reexports.add(reexptStr); + } while (wasm.ru()) unsafeGetters.add(decode(source.slice(wasm.us(), wasm.ue()))); while (wasm.re()) { @@ -31,7 +34,7 @@ export function parse (source, name = '@') { exports.add(exptStr); } - return { exports: [...exports], reexports: [...reexports] }; + return { exports: [...exports], reexports: [...reexports].filter(reept => reept !== undefined) }; } function decode (str) { From 3dc8d1c9152cf9b7b10195d8b483d8313ece13cf Mon Sep 17 00:00:00 2001 From: Guy Bedford Date: Wed, 28 Apr 2021 21:36:32 +0200 Subject: [PATCH 4/5] escaping fixes --- lexer.js | 7 +++---- lib/lexer.wasm | Bin 24116 -> 24093 bytes lib/lexer.wat | 22 +++++++--------------- src/lexer.c | 5 ++--- src/lexer.js | 2 +- test/_unit.js | 6 ++++-- 6 files changed, 17 insertions(+), 25 deletions(-) diff --git a/lexer.js b/lexer.js index 8a48e9d..f134392 100755 --- a/lexer.js +++ b/lexer.js @@ -332,7 +332,7 @@ function tryParseObjectDefineOrKeys (keys) { pos++; ch = commentWhitespace(); if (ch !== 39/*'*/ && ch !== 34/*"*/) break; - const exportPos = ++pos; + const exportPos = pos + 1; stringLiteral(ch); expt = source.slice(exportPos, pos); pos++; @@ -876,7 +876,6 @@ function tryParseExportsDotAssign (assign) { ch = commentWhitespace(); if (ch === 39/*'*/ || ch === 34/*"*/) { const startPos = pos; - pos++; stringLiteral(ch); const endPos = ++pos; ch = commentWhitespace(); @@ -976,9 +975,9 @@ function tryParseLiteralExports () { ch = commentWhitespace(); } else if (ch === 39/*'*/ || ch === 34/*"*/) { - const startPos = ++pos; + const startPos = pos; stringLiteral(ch); - const endPos = pos++; + const endPos = ++pos; ch = commentWhitespace(); if (ch === 58/*:*/) { pos++; diff --git a/lib/lexer.wasm b/lib/lexer.wasm index cfebb0a80c00a55b03207d485993502e6be0ac29..5da6822dbf53dfc33e9146e030be986e4dc264cf 100755 GIT binary patch delta 103 zcmdn8hjH#6#tkiuj2AbzGCt*DTr@eG@2;u_)1C#4icAU&0y2&ajtpR~5`!aCmKjJw tfeFY`WZGQ7pDV$*ZSpg_sd*G5xaLEn&xq1M+{3BQZ diff --git a/lib/lexer.wat b/lib/lexer.wat index 8857c5a..496d7b5 100644 --- a/lib/lexer.wat +++ b/lib/lexer.wat @@ -1614,12 +1614,8 @@ br_if 3 (;@1;) end i32.const 0 - i32.const 0 i32.load offset=20540 - i32.const 2 - i32.add - local.tee 2 - i32.store offset=20540 + local.set 2 local.get 0 call 28 i32.const 0 @@ -1629,6 +1625,10 @@ i32.const 2 i32.add i32.store offset=20540 + local.get 2 + i32.const 2 + i32.add + local.set 2 call 39 i32.const 44 i32.ne @@ -4484,12 +4484,8 @@ br_if 2 (;@1;) end i32.const 0 - i32.const 0 i32.load offset=20540 - local.tee 0 - i32.const 2 - i32.add - i32.store offset=20540 + local.set 0 local.get 2 call 28 i32.const 0 @@ -5380,12 +5376,8 @@ br_if 4 (;@3;) end i32.const 0 - i32.const 0 i32.load offset=20540 - local.tee 2 - i32.const 2 - i32.add - i32.store offset=20540 + local.set 2 local.get 0 call 28 i32.const 0 diff --git a/src/lexer.c b/src/lexer.c index c22cbed..c324558 100755 --- a/src/lexer.c +++ b/src/lexer.c @@ -339,7 +339,7 @@ void tryParseObjectDefineOrKeys (bool keys) { pos++; ch = commentWhitespace(); if (ch != '\'' && ch != '"') break; - exportStart = ++pos; + exportStart = pos + 1; stringLiteral(ch); exportEnd = pos; pos++; @@ -892,7 +892,6 @@ void tryParseExportsDotAssign (bool assign) { ch = commentWhitespace(); if (ch == '\'' || ch == '"') { uint16_t* startPos = pos; - pos++; stringLiteral(ch); uint16_t* endPos = ++pos; ch = commentWhitespace(); @@ -981,7 +980,7 @@ void tryParseLiteralExports () { addExport(startPos, endPos); } else if (ch == '\'' || ch == '"') { - uint16_t* startPos = pos++; + uint16_t* startPos = pos; stringLiteral(ch); uint16_t* endPos = ++pos; ch = commentWhitespace(); diff --git a/src/lexer.js b/src/lexer.js index 675f9d9..2e8fe85 100755 --- a/src/lexer.js +++ b/src/lexer.js @@ -34,7 +34,7 @@ export function parse (source, name = '@') { exports.add(exptStr); } - return { exports: [...exports], reexports: [...reexports].filter(reept => reept !== undefined) }; + return { exports: [...exports], reexports: [...reexports] }; } function decode (str) { diff --git a/test/_unit.js b/test/_unit.js index 01ccdf0..238df40 100755 --- a/test/_unit.js +++ b/test/_unit.js @@ -479,8 +479,9 @@ suite('Lexer', () => { const { exports } = parse(` module.exports = { 'ab cd': foo }; exports['not identifier'] = 'asdf'; - exports['\u{D83C}\u{DF10}'] = 1; - exports['\u{D83C}'] = 1; + exports['\\u{D83C}\\u{DF10}'] = 1; + exports['\\u{D83C}'] = 1; + exports['\\''] = 1; exports['@notidentifier'] = 'asdf'; Object.defineProperty(exports, "%notidentifier", { value: x }); Object.defineProperty(exports, 'hm🤔', { value: x }); @@ -494,6 +495,7 @@ suite('Lexer', () => { 'not identifier', '\u{D83C}\u{DF10}', '\u{D83C}', + '\'', '@notidentifier', '%notidentifier', 'hm🤔', From 2d8cfafbb05cb13415e3669c87d7d9409f946aed Mon Sep 17 00:00:00 2001 From: Guy Bedford Date: Wed, 28 Apr 2021 21:40:33 +0200 Subject: [PATCH 5/5] consistent string handling --- lexer.js | 5 ++--- lib/lexer.wasm | Bin 24093 -> 24081 bytes lib/lexer.wat | 9 +-------- src/lexer.c | 5 ++--- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/lexer.js b/lexer.js index f134392..abfcc5f 100755 --- a/lexer.js +++ b/lexer.js @@ -332,10 +332,9 @@ function tryParseObjectDefineOrKeys (keys) { pos++; ch = commentWhitespace(); if (ch !== 39/*'*/ && ch !== 34/*"*/) break; - const exportPos = pos + 1; + const exportPos = pos; stringLiteral(ch); - expt = source.slice(exportPos, pos); - pos++; + expt = source.slice(exportPos, ++pos); ch = commentWhitespace(); if (ch !== 44/*,*/) break; pos++; diff --git a/lib/lexer.wasm b/lib/lexer.wasm index 5da6822dbf53dfc33e9146e030be986e4dc264cf..4244f7eb1dd96209a9cb703095cabc7ac685d656 100755 GIT binary patch delta 36 ucmV+<0NekayaAEC0kDJt0mZY00p$z