diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 10127e58897a58..966614ae41fcf9 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -312,9 +312,8 @@ class left(MotionCommand): def do(self) -> None: r = self.reader for _ in range(r.get_arg()): - p = r.pos - 1 - if p >= 0: - r.pos = p + if r.pos > 0: + r.pos = r.prev_grapheme_boundary() else: self.reader.error("start of buffer") @@ -324,9 +323,8 @@ def do(self) -> None: r = self.reader b = r.buffer for _ in range(r.get_arg()): - p = r.pos + 1 - if p <= len(b): - r.pos = p + if r.pos < len(b): + r.pos = r.next_grapheme_boundary() else: self.reader.error("end of buffer") @@ -409,8 +407,9 @@ def do(self) -> None: b = r.buffer for i in range(r.get_arg()): if r.pos > 0: - r.pos -= 1 - del b[r.pos] + prev = r.prev_grapheme_boundary() + del b[prev:r.pos] + r.pos = prev r.dirty = True else: self.reader.error("can't backspace at start") @@ -433,7 +432,8 @@ def do(self) -> None: for i in range(r.get_arg()): if r.pos != len(b): - del b[r.pos] + next_pos = r.next_grapheme_boundary() + del b[r.pos:next_pos] r.dirty = True else: self.reader.error("end of buffer") diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index 9ab92f64d1ef63..b28b4ef659f64b 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -22,6 +22,7 @@ from __future__ import annotations import sys +import unicodedata import _colorize from contextlib import contextmanager @@ -458,6 +459,40 @@ def eol(self, p: int | None = None) -> int: p += 1 return p + def prev_grapheme_boundary(self, pos: int | None = None) -> int: + """Return the position at the start of the grapheme cluster + preceding pos (or self.pos). + + For plain ASCII this is just pos - 1. For combining characters + (e.g. 'e' + U+0301 COMBINING ACUTE ACCENT) it skips the whole + cluster so that one Backspace/Left deletes the visual character. + """ + if pos is None: + pos = self.pos + bol = self.bol(pos) + if pos <= bol: + return pos + line = "".join(self.buffer[bol:pos]) + # Find the last grapheme cluster in the line up to pos + *_, last = unicodedata.iter_graphemes(line) # type: ignore[attr-defined] + return bol + last.start # type: ignore[no-any-return] + + def next_grapheme_boundary(self, pos: int | None = None) -> int: + """Return the position just past the grapheme cluster starting + at pos (or self.pos). + + For plain ASCII this is just pos + 1. For combining characters + it skips the whole cluster. + """ + if pos is None: + pos = self.pos + eol = self.eol(pos) + if pos >= eol: + return pos + tail = "".join(self.buffer[pos:eol]) + first = next(unicodedata.iter_graphemes(tail)) # type: ignore[attr-defined] + return pos + first.end # type: ignore[no-any-return] + def max_column(self, y: int) -> int: """Return the last x-offset for line y""" return self.screeninfo[y][0] + sum(self.screeninfo[y][1]) diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 23b8fa6b9c7625..e9d0ea5d42a8d8 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -334,8 +334,14 @@ def do(self) -> None: if pi is not None and pi < indent: repeat = indent - pi break - r.pos -= repeat - del b[r.pos : r.pos + repeat] + if repeat == 1: + # Use grapheme-aware deletion for non-dedent case + prev = r.prev_grapheme_boundary() + del b[prev:r.pos] + r.pos = prev + else: + r.pos -= repeat + del b[r.pos : r.pos + repeat] r.dirty = True else: self.reader.error("can't backspace at start") diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index b1b6ae16a1e592..ca13416a286bae 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -558,3 +558,87 @@ def test_control_characters(self): reader, _ = handle_all_events(events) self.assert_screen_equal(reader, 'flag = "🏳️\\u200d🌈"', clean=True) self.assert_screen_equal(reader, 'flag {o}={z} {s}"🏳️\\u200d🌈"{z}'.format(**colors)) + + # -- grapheme cluster (combining character) tests -- + + def test_backspace_combining_character(self): + # 'e' + combining acute accent U+0301 = one visual char + events = itertools.chain( + code_to_events("e\u0301"), + [Event(evt="key", data="backspace", raw=bytearray(b"\x7f"))], + ) + reader, _ = handle_all_events(events) + self.assertEqual(reader.buffer, []) + self.assertEqual(reader.pos, 0) + + def test_backspace_combining_in_middle(self): + # "ae\u0301z" → backspace should remove "e\u0301", leaving "az" + events = itertools.chain( + code_to_events("ae\u0301z"), + [ + Event(evt="key", data="left", raw=bytearray(b"\x1bOD")), + Event(evt="key", data="backspace", raw=bytearray(b"\x7f")), + ], + ) + reader, _ = handle_all_events(events) + self.assertEqual(reader.buffer, ["a", "z"]) + self.assertEqual(reader.pos, 1) + + def test_delete_combining_character(self): + # Cursor at start, delete should remove entire "e\u0301" + events = itertools.chain( + code_to_events("e\u0301"), + [ + Event(evt="key", data="home", raw=bytearray(b"\x1b[H")), + Event(evt="key", data="delete", raw=bytearray(b"\x7f")), + ], + ) + reader, _ = handle_all_events(events) + self.assertEqual(reader.buffer, []) + self.assertEqual(reader.pos, 0) + + def test_left_skips_combining_character(self): + # After typing "e\u0301", left should move past both codepoints + events = itertools.chain( + code_to_events("ae\u0301"), + [Event(evt="key", data="left", raw=bytearray(b"\x1bOD"))], + ) + reader, _ = handle_all_events(events) + # Should land before 'e', not between 'e' and combining accent + self.assertEqual(reader.pos, 1) + + def test_right_skips_combining_character(self): + # Move to start, then right should skip "e\u0301" as one unit + events = itertools.chain( + code_to_events("e\u0301z"), + [ + Event(evt="key", data="home", raw=bytearray(b"\x1b[H")), + Event(evt="key", data="right", raw=bytearray(b"\x1bOC")), + ], + ) + reader, _ = handle_all_events(events) + # Should be past both 'e' and combining accent, before 'z' + self.assertEqual(reader.pos, 2) + + def test_backspace_plain_ascii(self): + # Regression: plain ASCII should still work as before + events = itertools.chain( + code_to_events("abc"), + [Event(evt="key", data="backspace", raw=bytearray(b"\x7f"))], + ) + reader, _ = handle_all_events(events) + self.assertEqual(reader.buffer, ["a", "b"]) + self.assertEqual(reader.pos, 2) + + def test_left_right_plain_ascii(self): + # Regression: plain ASCII left/right still move one char at a time + events = itertools.chain( + code_to_events("abc"), + [ + Event(evt="key", data="left", raw=bytearray(b"\x1bOD")), + Event(evt="key", data="left", raw=bytearray(b"\x1bOD")), + Event(evt="key", data="right", raw=bytearray(b"\x1bOC")), + ], + ) + reader, _ = handle_all_events(events) + self.assertEqual(reader.pos, 2)