From 64d684016b160a52650416d5276d24302127f8e6 Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Sun, 18 Jan 2026 22:34:19 +0900
Subject: [PATCH 1/2] Bump re to 3.14.2

---
 Lib/re/__init__.py   |  2 +-
 Lib/re/_compiler.py  | 40 ++++++++++++++++++++----------
 Lib/re/_constants.py |  4 ++-
 Lib/re/_parser.py    | 19 ++------------
 Lib/test/test_re.py  | 59 ++++++++++++++++++++++----------------------
 5 files changed, 63 insertions(+), 61 deletions(-)

diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py
index 7e8abbf6ffe..af2808a77da 100644
--- a/Lib/re/__init__.py
+++ b/Lib/re/__init__.py
@@ -61,7 +61,7 @@
 resulting RE will match the second character.
     \number  Matches the contents of the group of the same number.
     \A       Matches only at the start of the string.
-    \Z       Matches only at the end of the string.
+    \z       Matches only at the end of the string.
     \b       Matches the empty string, but only at the start or end of a word.
     \B       Matches the empty string, but not at the start or end of a word.
     \d       Matches any decimal digit; equivalent to the set [0-9] in
diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py
index 1b1aaa7714b..20dd561d1c1 100644
--- a/Lib/re/_compiler.py
+++ b/Lib/re/_compiler.py
@@ -28,6 +28,8 @@
     POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
 }
 
+_CHARSET_ALL = [(NEGATE, None)]
+
 def _combine_flags(flags, add_flags, del_flags,
                    TYPE_FLAGS=_parser.TYPE_FLAGS):
     if add_flags & TYPE_FLAGS:
@@ -84,17 +86,22 @@ def _compile(code, pattern, flags):
                     code[skip] = _len(code) - skip
         elif op is IN:
             charset, hascased = _optimize_charset(av, iscased, tolower, fixes)
-            if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
-                emit(IN_LOC_IGNORE)
-            elif not hascased:
-                emit(IN)
-            elif not fixes:  # ascii
-                emit(IN_IGNORE)
+            if not charset:
+                emit(FAILURE)
+            elif charset == _CHARSET_ALL:
+                emit(ANY_ALL)
             else:
-                emit(IN_UNI_IGNORE)
-            skip = _len(code); emit(0)
-            _compile_charset(charset, flags, code)
-            code[skip] = _len(code) - skip
+                if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
+                    emit(IN_LOC_IGNORE)
+                elif not hascased:
+                    emit(IN)
+                elif not fixes:  # ascii
+                    emit(IN_IGNORE)
+                else:
+                    emit(IN_UNI_IGNORE)
+                skip = _len(code); emit(0)
+                _compile_charset(charset, flags, code)
+                code[skip] = _len(code) - skip
         elif op is ANY:
             if flags & SRE_FLAG_DOTALL:
                 emit(ANY_ALL)
@@ -277,6 +284,10 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
                             charmap[i] = 1
                 elif op is NEGATE:
                     out.append((op, av))
+                elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail:
+                    # Optimize [\s\S] etc.
+                    out = [] if out else _CHARSET_ALL
+                    return out, False
                 else:
                     tail.append((op, av))
             except IndexError:
@@ -524,13 +535,18 @@ def _compile_info(code, pattern, flags):
     # look for a literal prefix
     prefix = []
     prefix_skip = 0
-    charset = [] # not used
+    charset = None # not used
     if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE):
         # look for literal prefix
         prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags)
         # if no prefix, look for charset prefix
         if not prefix:
             charset = _get_charset_prefix(pattern, flags)
+            if charset:
+                charset, hascased = _optimize_charset(charset)
+                assert not hascased
+                if charset == _CHARSET_ALL:
+                    charset = None
 ##     if prefix:
 ##         print("*** PREFIX", prefix, prefix_skip)
 ##     if charset:
@@ -565,8 +581,6 @@ def _compile_info(code, pattern, flags):
         # generate overlap table
         code.extend(_generate_overlap_table(prefix))
     elif charset:
-        charset, hascased = _optimize_charset(charset)
-        assert not hascased
         _compile_charset(charset, flags, code)
     code[skip] = len(code) - skip
 
diff --git a/Lib/re/_constants.py b/Lib/re/_constants.py
index 9c3c294ba44..d6f32302d37 100644
--- a/Lib/re/_constants.py
+++ b/Lib/re/_constants.py
@@ -15,7 +15,7 @@
 
 MAGIC = 20230612
 
-from _sre import MAXREPEAT, MAXGROUPS
+from _sre import MAXREPEAT, MAXGROUPS  # noqa: F401
 
 # SRE standard exception (access as sre.error)
 # should this really be here?
@@ -206,6 +206,8 @@ def _makecodes(*names):
     CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
 }
 
+CH_NEGATE = dict(zip(CHCODES[::2] + CHCODES[1::2], CHCODES[1::2] + CHCODES[::2]))
+
 # flags
 SRE_FLAG_IGNORECASE = 2 # case insensitive
 SRE_FLAG_LOCALE = 4 # honour system locale
diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py
index f3c779340fe..35ab7ede2a7 100644
--- a/Lib/re/_parser.py
+++ b/Lib/re/_parser.py
@@ -49,7 +49,8 @@
     r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
     r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
     r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
-    r"\Z": (AT, AT_END_STRING), # end of string
+    r"\z": (AT, AT_END_STRING), # end of string
+    r"\Z": (AT, AT_END_STRING), # end of string (obsolete)
 }
 
 FLAGS = {
@@ -807,14 +808,6 @@ def _parse(source, state, verbose, nested, first=False):
                             state.grouprefpos[condgroup] = (
                                 source.tell() - len(condname) - 1
                             )
-                        if not (condname.isdecimal() and condname.isascii()):
-                            import warnings
-                            warnings.warn(
-                                "bad character in group name %s at position %d" %
-                                (repr(condname) if source.istext else ascii(condname),
-                                 source.tell() - len(condname) - 1),
-                                DeprecationWarning, stacklevel=nested + 6
-                            )
                     state.checklookbehindgroup(condgroup, source)
                     item_yes = _parse(source, state, verbose, nested + 1)
                     if source.match("|"):
@@ -1038,14 +1031,6 @@ def addgroup(index, pos):
                     if index >= MAXGROUPS:
                         raise s.error("invalid group reference %d" % index,
                                       len(name) + 1)
-                    if not (name.isdecimal() and name.isascii()):
-                        import warnings
-                        warnings.warn(
-                            "bad character in group name %s at position %d" %
-                            (repr(name) if s.istext else ascii(name),
-                             s.tell() - len(name) - 1),
-                            DeprecationWarning, stacklevel=5
-                        )
                 addgroup(index, len(name) + 1)
             elif c == "0":
                 if s.next in OCTDIGITS:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 04c8ee71a99..8b935dc7f12 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,12 +1,11 @@
 from test.support import (gc_collect, bigmemtest, _2G,
                           cpython_only, captured_stdout,
-                          check_disallow_instantiation, is_emscripten, is_wasi,
+                          check_disallow_instantiation, linked_to_musl,
                           warnings_helper, SHORT_TIMEOUT, Stopwatch, requires_resource)
 import locale
 import re
 import string
 import sys
-import time
 import unittest
 import warnings
 from re import Scanner
@@ -14,7 +13,7 @@
 
 # some platforms lack working multiprocessing
 try:
-    import _multiprocessing
+    import _multiprocessing  # noqa: F401
 except ImportError:
     multiprocessing = None
 else:
@@ -621,6 +620,7 @@ def test_re_fullmatch(self):
         self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
         self.assertIsNone(re.fullmatch(r"a+", "ab"))
         self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
+        self.assertIsNone(re.fullmatch(r"abc\z", "abc\n"))
         self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
         self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
         self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
@@ -806,6 +806,8 @@ def test_special_escapes(self):
         self.assertEqual(re.search(r"\B(b.)\B",
                                    "abc bcd bc abxd", re.ASCII).group(1), "bx")
         self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\z$", "abc", re.M).group(0), "abc")
+        self.assertIsNone(re.search(r"^\Aabc\z$", "\nabc\n", re.M))
         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
         self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
         self.assertEqual(re.search(br"\b(b.)\b",
@@ -817,6 +819,8 @@ def test_special_escapes(self):
         self.assertEqual(re.search(br"\B(b.)\B",
                                    b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
         self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
+        self.assertEqual(re.search(br"^\Aabc\z$", b"abc", re.M).group(0), b"abc")
+        self.assertIsNone(re.search(br"^\Aabc\z$", b"\nabc\n", re.M))
         self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
         self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
         self.assertEqual(re.search(r"\d\D\w\W\s\S",
@@ -840,7 +844,7 @@ def test_other_escapes(self):
         self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
         self.assertIsNone(re.match(r"[\^a]+", 'b'))
         re.purge()  # for warnings
-        for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
+        for c in 'ceghijklmopqyCEFGHIJKLMNOPQRTVXY':
             with self.subTest(c):
                 self.assertRaises(re.PatternError, re.compile, '\\%c' % c)
         for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
@@ -888,6 +892,8 @@ def test_named_unicode_escapes(self):
         self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
         self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
 
+    # TODO: RUSTPYTHON; re.search(r"\B", "") now returns a match in CPython 3.14
+    @unittest.expectedFailure
     def test_word_boundaries(self):
         # See http://bugs.python.org/issue10713
         self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), "abc")
@@ -983,18 +989,15 @@ def test_word_boundaries(self):
         self.assertIsNone(re.fullmatch(br".+\B", b"abc", re.LOCALE))
         self.assertIsNone(re.fullmatch(r".+\B", "ьюя"))
         self.assertTrue(re.fullmatch(r".+\B", "ьюя", re.ASCII))
-        # However, an empty string contains no word boundaries, and also no
-        # non-boundaries.
+        # However, an empty string contains no word boundaries.
         self.assertIsNone(re.search(r"\b", ""))
         self.assertIsNone(re.search(r"\b", "", re.ASCII))
         self.assertIsNone(re.search(br"\b", b""))
         self.assertIsNone(re.search(br"\b", b"", re.LOCALE))
-        # This one is questionable and different from the perlre behaviour,
-        # but describes current behavior.
-        self.assertIsNone(re.search(r"\B", ""))
-        self.assertIsNone(re.search(r"\B", "", re.ASCII))
-        self.assertIsNone(re.search(br"\B", b""))
-        self.assertIsNone(re.search(br"\B", b"", re.LOCALE))
+        self.assertTrue(re.search(r"\B", ""))
+        self.assertTrue(re.search(r"\B", "", re.ASCII))
+        self.assertTrue(re.search(br"\B", b""))
+        self.assertTrue(re.search(br"\B", b"", re.LOCALE))
         # A single word-character string has two boundaries, but no
         # non-boundary gaps.
         self.assertEqual(len(re.findall(r"\b", "a")), 2)
@@ -1423,7 +1426,7 @@ def test_pickling(self):
             newpat = pickle.loads(pickled)
             self.assertEqual(newpat, oldpat)
         # current pickle expects the _compile() reconstructor in re module
-        from re import _compile
+        from re import _compile  # noqa: F401
 
     @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_copying(self):
@@ -1755,7 +1758,7 @@ def test_bug_6561(self):
         for x in not_decimal_digits:
             self.assertIsNone(re.match(r'^\d$', x))
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON a = array.array(typecode)\n ValueError: bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)
+    @unittest.expectedFailure # TODO: RUSTPYTHON; a = array.array(typecode)\n ValueError: bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)
     @warnings_helper.ignore_warnings(category=DeprecationWarning)  # gh-80480 array('u')
     def test_empty_array(self):
         # SF buf 1647541
@@ -2185,10 +2188,9 @@ def test_bug_20998(self):
         self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
 
     @unittest.expectedFailure # TODO: RUSTPYTHON; self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))\n AssertionError: None is not true
-    @unittest.skipIf(
-        is_emscripten or is_wasi,
-        "musl libc issue on Emscripten/WASI, bpo-46390"
-    )
+    @unittest.skipIf(linked_to_musl(), "musl libc issue, bpo-46390")
+    @unittest.skipIf(sys.platform.startswith("sunos"),
+                     "test doesn't work on Solaris, gh-91214")
     def test_locale_caching(self):
         # Issue #22410
         oldlocale = locale.setlocale(locale.LC_CTYPE)
@@ -2225,10 +2227,9 @@ def check_en_US_utf8(self):
         self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
         self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
 
-    @unittest.skipIf(
-        is_emscripten or is_wasi,
-        "musl libc issue on Emscripten/WASI, bpo-46390"
-    )
+    @unittest.skipIf(linked_to_musl(), "musl libc issue, bpo-46390")
+    @unittest.skipIf(sys.platform.startswith("sunos"),
+                     "test doesn't work on Solaris, gh-91214")
     def test_locale_compiled(self):
         oldlocale = locale.setlocale(locale.LC_CTYPE)
         self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
@@ -2632,8 +2633,8 @@ def test_findall_atomic_grouping(self):
 
     @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_bug_gh91616(self):
-        self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer
-        self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt"))
+        self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\z', "a.txt")) # reproducer
+        self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\z', "a.txt"))
 
     def test_bug_gh100061(self):
         # gh-100061
@@ -2655,7 +2656,7 @@ def test_bug_gh100061(self):
         self.assertEqual(re.match("(?>(?:ab?c){1,3})", "aca").span(), (0, 2))
         self.assertEqual(re.match("(?:ab?c){1,3}+", "aca").span(), (0, 2))
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; self.assertEqual(re.match('((x)|y|z){3}+', 'xyz').groups(), ('z', 'x'))\n AssertionError: Tuples differ: ('x', 'x') != ('z', 'x')
+    @unittest.expectedFailure # TODO: RUSTPYTHON; self.assertEqual(re.match('((x)|y|z){3}+', 'xyz').groups(), ('z', 'x'))\n AssertionError: Tuples differ: ('x', 'x') != ('z', 'x')
     def test_bug_gh101955(self):
         # Possessive quantifier with nested alternative with capture groups
         self.assertEqual(re.match('((x)|y|z)*+', 'xyz').groups(), ('z', 'x'))
@@ -2893,11 +2894,11 @@ def test_long_pattern(self):
         pattern = 'Very %spattern' % ('long ' * 1000)
         r = repr(re.compile(pattern))
         self.assertLess(len(r), 300)
-        self.assertEqual(r[:30], "re.compile('Very long long lon")
+        self.assertStartsWith(r, "re.compile('Very long long lon")
         r = repr(re.compile(pattern, re.I))
         self.assertLess(len(r), 300)
-        self.assertEqual(r[:30], "re.compile('Very long long lon")
-        self.assertEqual(r[-16:], ", re.IGNORECASE)")
+        self.assertStartsWith(r, "re.compile('Very long long lon")
+        self.assertEndsWith(r, ", re.IGNORECASE)")
 
     def test_flags_repr(self):
         self.assertEqual(repr(re.I), "re.IGNORECASE")
@@ -2977,7 +2978,7 @@ def test_deprecated_modules(self):
                 self.assertEqual(mod.__name__, name)
                 self.assertEqual(mod.__package__, '')
                 for attr in deprecated[name]:
-                    self.assertTrue(hasattr(mod, attr))
+                    self.assertHasAttr(mod, attr)
                 del sys.modules[name]
 
     @cpython_only

From fbf1f60cb008bb8860993e8eb1466133d0a8dc97 Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Tue, 20 Jan 2026 14:14:04 +0900
Subject: [PATCH 2/2] Unmark resolved test

---
 Lib/test/test_warnings/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Lib/test/test_warnings/__init__.py b/Lib/test/test_warnings/__init__.py
index abdf7b32df2..87632821a8e 100644
--- a/Lib/test/test_warnings/__init__.py
+++ b/Lib/test/test_warnings/__init__.py
@@ -241,7 +241,6 @@ def test_once(self):
                                     42)
             self.assertEqual(len(w), 0)
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON re.PatternError: bad escape \z at position 15
     def test_filter_module(self):
         MS_WINDOWS = (sys.platform == 'win32')
         with self.module.catch_warnings(record=True) as w: