From b7715083a3c599f5caa17b87f0563ed9173f28f0 Mon Sep 17 00:00:00 2001 From: yogeshwaran-c Date: Tue, 24 Feb 2026 02:31:08 +0530 Subject: [PATCH] fix(compiler): parse named HTML entities containing digits The lexer's isNamedEntityEnd function stopped scanning entity names when encountering a digit character, causing 24 valid HTML named entities with digits in their names (e.g. ¹, ½, ▓) to be treated as plain text instead of decoded to their corresponding Unicode characters. Fixes #51323 --- packages/compiler/src/ml_parser/lexer.ts | 7 ++++++- .../test/ml_parser/html_parser_spec.ts | 11 ++++++++++ .../compiler/test/ml_parser/lexer_spec.ts | 21 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts index 3c8b04ab61b4..013b2eb1505f 100644 --- a/packages/compiler/src/ml_parser/lexer.ts +++ b/packages/compiler/src/ml_parser/lexer.ts @@ -1431,7 +1431,12 @@ function isDigitEntityEnd(code: number): boolean { } function isNamedEntityEnd(code: number): boolean { - return code === chars.$SEMICOLON || code === chars.$EOF || !chars.isAsciiLetter(code); + // Named entities may contain digits (e.g. ¹, ½, ▓). + return ( + code === chars.$SEMICOLON || + code === chars.$EOF || + !(chars.isAsciiLetter(code) || chars.isDigit(code)) + ); } function isExpansionCaseStart(peek: number): boolean { diff --git a/packages/compiler/test/ml_parser/html_parser_spec.ts b/packages/compiler/test/ml_parser/html_parser_spec.ts index 47b956aea40e..2ea1422e793d 100644 --- a/packages/compiler/test/ml_parser/html_parser_spec.ts +++ b/packages/compiler/test/ml_parser/html_parser_spec.ts @@ -68,6 +68,17 @@ describe('HtmlParser', () => { ]); }); + it('should parse named HTML entities containing digits', () => { + expect(humanizeDom(parser.parse('
¹
', 'TestComp'))).toEqual([ + [html.Element, 'div', 0], + [html.Text, '\u00B9', 1, [''], ['\u00B9', '¹'], ['']], + ]); + expect(humanizeDom(parser.parse('
½
', 'TestComp'))).toEqual([ + [html.Element, 'div', 0], + [html.Text, '\u00BD', 1, [''], ['\u00BD', '½'], ['']], + ]); + }); + it('should normalize line endings within CDATA', () => { const parsed = parser.parse('', 'TestComp'); expect(humanizeDom(parsed)).toEqual([ diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts index 9d7bbd1c5c74..e3ebac0e8270 100644 --- a/packages/compiler/test/ml_parser/lexer_spec.ts +++ b/packages/compiler/test/ml_parser/lexer_spec.ts @@ -2094,6 +2094,27 @@ describe('HtmlLexer', () => { ]); }); + it('should parse named entities containing digits', () => { + expect(tokenizeAndHumanizeParts('¹')).toEqual([ + [TokenType.TEXT, ''], + [TokenType.ENCODED_ENTITY, '\u00B9', '¹'], + [TokenType.TEXT, ''], + [TokenType.EOF], + ]); + expect(tokenizeAndHumanizeParts('½')).toEqual([ + [TokenType.TEXT, ''], + [TokenType.ENCODED_ENTITY, '\u00BD', '½'], + [TokenType.TEXT, ''], + [TokenType.EOF], + ]); + expect(tokenizeAndHumanizeParts('▓')).toEqual([ + [TokenType.TEXT, ''], + [TokenType.ENCODED_ENTITY, '\u2593', '▓'], + [TokenType.TEXT, ''], + [TokenType.EOF], + ]); + }); + it('should parse hexadecimal entities', () => { expect(tokenizeAndHumanizeParts('AA')).toEqual([ [TokenType.TEXT, ''],