From 8dbd96d4e978ab7dba06f5c281c0e0e411b0efca Mon Sep 17 00:00:00 2001 From: Ma Lin Date: Tue, 19 Apr 2022 12:04:44 +0800 Subject: [PATCH 1/3] add pathlib test-cases --- Lib/test/test_pathlib.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 66e44479239cfc..8676e8884d6ccc 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1388,6 +1388,7 @@ class _BasePathTest(object): # | |-- dirD # | | `-- fileD # | `-- fileC + # | `-- novel.txt # |-- dirE # No permissions # |-- fileA # |-- linkA -> fileA @@ -1412,6 +1413,8 @@ def cleanup(): f.write(b"this is file B\n") with open(join('dirC', 'fileC'), 'wb') as f: f.write(b"this is file C\n") + with open(join('dirC', 'novel.txt'), 'w', encoding='ascii') as f: + f.write("this is a novel\n") with open(join('dirC', 'dirD', 'fileD'), 'wb') as f: f.write(b"this is file D\n") os.chmod(join('dirE'), 0) @@ -1679,6 +1682,9 @@ def _check(glob, expected): p = P(BASE, "dirC") _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) _check(p.rglob("*/*"), ["dirC/dirD/fileD"]) + # gh-91616, a re module regression + _check(p.rglob("*.txt"), ["dirC/novel.txt"]) + _check(p.rglob("*.*"), ["dirC/novel.txt"]) @os_helper.skip_unless_symlink def test_rglob_symlink_loop(self): @@ -1689,7 +1695,7 @@ def test_rglob_symlink_loop(self): expect = {'brokenLink', 'dirA', 'dirA/linkC', 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', 'dirC/fileC', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', 'dirC/fileC', 'dirC/novel.txt', 'dirE', 'fileA', 'linkA', From 06f0a3c909a74ef1c7c2aae771bb0feaf2d7e548 Mon Sep 17 00:00:00 2001 From: Ma Lin Date: Sun, 17 Apr 2022 11:46:22 +0800 Subject: [PATCH 2/3] re module, fix .fullmatch() mismatch when using Atomic Grouping / Possessive Quantifiers These jumps should use DO_JUMP0() instead of DO_JUMP(): - JUMP_POSS_REPEAT_1 - JUMP_POSS_REPEAT_2 - JUMP_ATOMIC_GROUP --- Lib/test/test_re.py | 44 +++++++++++++++++++ ...2-04-17-12-27-25.gh-issue-91616.gSQg69.rst | 2 + Modules/_sre/sre_lib.h | 14 +++--- 3 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-04-17-12-27-25.gh-issue-91616.gSQg69.rst diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 959582e2f12575..0d53f1c58e1e36 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2173,6 +2173,10 @@ def test_fullmatch_possessive_quantifiers(self): self.assertIsNone(re.fullmatch(r'a*+', 'ab')) self.assertIsNone(re.fullmatch(r'a?+', 'ab')) self.assertIsNone(re.fullmatch(r'a{1,3}+', 'ab')) + self.assertTrue(re.fullmatch(r'a++b', 'ab')) + self.assertTrue(re.fullmatch(r'a*+b', 'ab')) + self.assertTrue(re.fullmatch(r'a?+b', 'ab')) + self.assertTrue(re.fullmatch(r'a{1,3}+b', 'ab')) self.assertTrue(re.fullmatch(r'(?:ab)++', 'ab')) self.assertTrue(re.fullmatch(r'(?:ab)*+', 'ab')) @@ -2182,6 +2186,10 @@ def test_fullmatch_possessive_quantifiers(self): self.assertIsNone(re.fullmatch(r'(?:ab)*+', 'abc')) self.assertIsNone(re.fullmatch(r'(?:ab)?+', 'abc')) self.assertIsNone(re.fullmatch(r'(?:ab){1,3}+', 'abc')) + self.assertTrue(re.fullmatch(r'(?:ab)++c', 'abc')) + self.assertTrue(re.fullmatch(r'(?:ab)*+c', 'abc')) + self.assertTrue(re.fullmatch(r'(?:ab)?+c', 'abc')) + self.assertTrue(re.fullmatch(r'(?:ab){1,3}+c', 'abc')) def test_findall_possessive_quantifiers(self): self.assertEqual(re.findall(r'a++', 'aab'), ['aa']) @@ -2217,6 +2225,10 @@ def test_fullmatch_atomic_grouping(self): self.assertIsNone(re.fullmatch(r'(?>a*)', 'ab')) self.assertIsNone(re.fullmatch(r'(?>a?)', 'ab')) self.assertIsNone(re.fullmatch(r'(?>a{1,3})', 'ab')) + self.assertTrue(re.fullmatch(r'(?>a+)b', 'ab')) + self.assertTrue(re.fullmatch(r'(?>a*)b', 'ab')) + self.assertTrue(re.fullmatch(r'(?>a?)b', 'ab')) + self.assertTrue(re.fullmatch(r'(?>a{1,3})b', 'ab')) self.assertTrue(re.fullmatch(r'(?>(?:ab)+)', 'ab')) self.assertTrue(re.fullmatch(r'(?>(?:ab)*)', 'ab')) @@ -2226,6 +2238,10 @@ def test_fullmatch_atomic_grouping(self): self.assertIsNone(re.fullmatch(r'(?>(?:ab)*)', 'abc')) self.assertIsNone(re.fullmatch(r'(?>(?:ab)?)', 'abc')) self.assertIsNone(re.fullmatch(r'(?>(?:ab){1,3})', 'abc')) + self.assertTrue(re.fullmatch(r'(?>(?:ab)+)c', 'abc')) + self.assertTrue(re.fullmatch(r'(?>(?:ab)*)c', 'abc')) + self.assertTrue(re.fullmatch(r'(?>(?:ab)?)c', 'abc')) + self.assertTrue(re.fullmatch(r'(?>(?:ab){1,3})c', 'abc')) def test_findall_atomic_grouping(self): self.assertEqual(re.findall(r'(?>a+)', 'aab'), ['aa']) @@ -2238,6 +2254,34 @@ def test_findall_atomic_grouping(self): self.assertEqual(re.findall(r'(?>(?:ab)?)', 'ababc'), ['ab', 'ab', '', '']) self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab']) + def test_bug_gh91616(self): + # These 3 jumps should use DO_JUMP0() instead of DO_JUMP() + + # JUMP_POSS_REPEAT_1 + self.assertTrue(re.fullmatch(r'(a*?b){1}+c', "abc")) + self.assertTrue( re.fullmatch(r'(.b|a){1}+c', 'abc')) + self.assertIsNone(re.fullmatch(r'(a|.b){1}+c', 'abc')) + self.assertTrue(re.fullmatch(r'(?:(ab)*+){1}+c', 'abc')) + self.assertTrue(re.fullmatch(r'(?:(ab)?+){1}+c', 'abc')) + + # JUMP_POSS_REPEAT_2 + self.assertTrue(re.fullmatch(r'(a*?b)*+c', "abc")) + self.assertTrue( re.fullmatch(r'(.b|a)*+c', 'abc')) + self.assertIsNone(re.fullmatch(r'(a|.b)*+c', 'abc')) + self.assertTrue(re.fullmatch(r'(?:(ab)*+)*+c', 'abc')) + self.assertTrue(re.fullmatch(r'(?:(ab)?+)*+c', 'abc')) + + # JUMP_ATOMIC_GROUP + self.assertTrue(re.fullmatch(r'(?>a*?b)c', "abc")) + self.assertTrue( re.fullmatch(r'(?>.b|a)c', 'abc')) + self.assertIsNone(re.fullmatch(r'(?>a|.b)c', 'abc')) + self.assertTrue(re.fullmatch(r'(?>(ab)*+)c', 'abc')) + self.assertTrue(re.fullmatch(r'(?>(ab)?+)c', 'abc')) + + # test-cases provided by gh-91616 + self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) + self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\Z', "a.txt")) + def get_debug_out(pat): with captured_stdout() as out: diff --git a/Misc/NEWS.d/next/Library/2022-04-17-12-27-25.gh-issue-91616.gSQg69.rst b/Misc/NEWS.d/next/Library/2022-04-17-12-27-25.gh-issue-91616.gSQg69.rst new file mode 100644 index 00000000000000..8f147237aed6bd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-17-12-27-25.gh-issue-91616.gSQg69.rst @@ -0,0 +1,2 @@ +:mod:`re` module, fix :meth:`~re.Pattern.fullmatch` mismatch when using Atomic +Grouping or Possessive Quantifiers. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 3472e65b87ae6f..ea4c5d81dc66ac 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1254,8 +1254,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) /* Check for minimum required matches. */ while (ctx->count < (Py_ssize_t)pattern[1]) { /* not enough matches */ - DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, - &pattern[3]); + DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, + &pattern[3]); if (ret) { RETURN_ON_ERROR(ret); ctx->count++; @@ -1301,8 +1301,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) /* We have not reached the maximin matches, so try to match once more. */ - DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, - &pattern[3]); + DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, + &pattern[3]); /* Check to see if the last attempted match succeeded. */ @@ -1343,15 +1343,15 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); /* Set the global Input pointer to this context's Input - pointer */ + pointer */ state->ptr = ptr; /* Evaluate the Atomic Group in a new context, terminating when the end of the group, represented by a SUCCESS op code, is reached. */ /* Group Pattern begins at an offset of 1 code. */ - DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group, - &pattern[1]); + DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, + &pattern[1]); /* Test Exit Condition */ RETURN_ON_ERROR(ret); From 9d0133fb37aa5657043ee0e406379bab9f9bea8d Mon Sep 17 00:00:00 2001 From: Ma Lin Date: Tue, 19 Apr 2022 19:32:20 +0800 Subject: [PATCH 3/3] address comments --- Lib/test/test_pathlib.py | 7 ++++--- Lib/test/test_re.py | 26 +------------------------- 2 files changed, 5 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 8676e8884d6ccc..b8b08bf0ce1bb5 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1413,8 +1413,8 @@ def cleanup(): f.write(b"this is file B\n") with open(join('dirC', 'fileC'), 'wb') as f: f.write(b"this is file C\n") - with open(join('dirC', 'novel.txt'), 'w', encoding='ascii') as f: - f.write("this is a novel\n") + with open(join('dirC', 'novel.txt'), 'wb') as f: + f.write(b"this is a novel\n") with open(join('dirC', 'dirD', 'fileD'), 'wb') as f: f.write(b"this is file D\n") os.chmod(join('dirE'), 0) @@ -1695,7 +1695,8 @@ def test_rglob_symlink_loop(self): expect = {'brokenLink', 'dirA', 'dirA/linkC', 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', 'dirC/fileC', 'dirC/novel.txt', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', + 'dirC/fileC', 'dirC/novel.txt', 'dirE', 'fileA', 'linkA', diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0d53f1c58e1e36..17d6b8f274574e 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2255,31 +2255,7 @@ def test_findall_atomic_grouping(self): self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab']) def test_bug_gh91616(self): - # These 3 jumps should use DO_JUMP0() instead of DO_JUMP() - - # JUMP_POSS_REPEAT_1 - self.assertTrue(re.fullmatch(r'(a*?b){1}+c', "abc")) - self.assertTrue( re.fullmatch(r'(.b|a){1}+c', 'abc')) - self.assertIsNone(re.fullmatch(r'(a|.b){1}+c', 'abc')) - self.assertTrue(re.fullmatch(r'(?:(ab)*+){1}+c', 'abc')) - self.assertTrue(re.fullmatch(r'(?:(ab)?+){1}+c', 'abc')) - - # JUMP_POSS_REPEAT_2 - self.assertTrue(re.fullmatch(r'(a*?b)*+c', "abc")) - self.assertTrue( re.fullmatch(r'(.b|a)*+c', 'abc')) - self.assertIsNone(re.fullmatch(r'(a|.b)*+c', 'abc')) - self.assertTrue(re.fullmatch(r'(?:(ab)*+)*+c', 'abc')) - self.assertTrue(re.fullmatch(r'(?:(ab)?+)*+c', 'abc')) - - # JUMP_ATOMIC_GROUP - self.assertTrue(re.fullmatch(r'(?>a*?b)c', "abc")) - self.assertTrue( re.fullmatch(r'(?>.b|a)c', 'abc')) - self.assertIsNone(re.fullmatch(r'(?>a|.b)c', 'abc')) - self.assertTrue(re.fullmatch(r'(?>(ab)*+)c', 'abc')) - self.assertTrue(re.fullmatch(r'(?>(ab)?+)c', 'abc')) - - # test-cases provided by gh-91616 - self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) + self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\Z', "a.txt"))