From 0609eb29586bb79c1c69ff69d495bda27d0a0d17 Mon Sep 17 00:00:00 2001
From: Pierre Le Marre <dev@wismill.eu>
Date: Wed, 26 Jul 2023 06:34:00 +0200
Subject: [PATCH 01/10] unicodedata: Fix Tangut Ideograph names

---
 Lib/test/test_unicodedata.py     |  2 +-
 Modules/unicodedata.c            | 51 +++++++++++++++++++++++++++++---
 Tools/unicode/makeunicodedata.py | 20 ++++++++++---
 3 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 3dc0790ca15b41..7b49a5aa6c4210 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -71,7 +71,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
 
     # Update this if the database changes. Make sure to do a full rebuild
     # (e.g. 'make distclean && make') to get the correct checksum.
-    expectedchecksum = '26ff0d31c14194b4606a5b3a81ac36df3a14e331'
+    expectedchecksum = '95cc75e49b140c61b884c16d0a9fbbb0b93a7fa9'
 
     @requires_resource('cpu')
     def test_function_checksum(self):
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index c1e22f3868931f..7359a2740615e0 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1025,7 +1025,7 @@ static const char * const hangul_syllables[][3] = {
 
 /* These ranges need to match makeunicodedata.py:cjk_ranges. */
 static int
-is_unified_ideograph(Py_UCS4 code)
+is_cjk_unified_ideograph(Py_UCS4 code)
 {
     return
         (0x3400 <= code && code <= 0x4DBF)   || /* CJK Ideograph Extension A */
@@ -1039,6 +1039,15 @@ is_unified_ideograph(Py_UCS4 code)
         (0x31350 <= code && code <= 0x323AF);   /* CJK Ideograph Extension H */
 }
 
+/* These ranges need to match makeunicodedata.py:tangut_ranges. */
+static int
+is_tangut_ideograph(Py_UCS4 code)
+{
+    return
+        (0x17000 <= code && code <= 0x187F7) || /* Tangut */
+        (0x18D00 <= code && code <= 0x18D08);   /* Tangut Supplement */
+}
+
 /* macros used to determine if the given code point is in the PUA range that
  * we are using to store aliases and named sequences */
 #define IS_ALIAS(cp) ((cp >= aliases_start) && (cp < aliases_end))
@@ -1098,7 +1107,7 @@ _getucname(PyObject *self,
         return 1;
     }
 
-    if (is_unified_ideograph(code)) {
+    if (is_cjk_unified_ideograph(code)) {
         if (buflen < 28)
             /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
             return 0;
@@ -1106,6 +1115,14 @@ _getucname(PyObject *self,
         return 1;
     }
 
+    if (is_tangut_ideograph(code)) {
+        if (buflen < 23)
+            /* Worst case: TANGUT IDEOGRAPH-18D08 */
+            return 0;
+        sprintf(buffer, "TANGUT IDEOGRAPH-%X", code);
+        return 1;
+    }
+
     /* get offset into phrasebook */
     offset = phrasebook_offset1[(code>>phrasebook_shift)];
     offset = phrasebook_offset2[(offset<<phrasebook_shift) +
@@ -1236,7 +1253,7 @@ _getcode(PyObject* self,
         return 0;
     }
 
-    /* Check for unified ideographs. */
+    /* Check for CJK unified ideographs. */
     if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
         /* Four or five hexdigits must follow. */
         v = 0;
@@ -1254,12 +1271,38 @@ _getcode(PyObject* self,
                 return 0;
             name++;
         }
-        if (!is_unified_ideograph(v))
+        if (!is_cjk_unified_ideograph(v))
+            return 0;
+        *code = v;
+        return 1;
+    }
+
+
+    /* Check for Tangut ideographs. */
+    if (strncmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
+        /* Five hexdigits must follow. */
+        v = 0;
+        name += 17;
+        namelen -= 17;
+        if (namelen != 5)
+            return 0;
+        while (namelen--) {
+            v *= 16;
+            if (*name >= '0' && *name <= '9')
+                v += *name - '0';
+            else if (*name >= 'A' && *name <= 'F')
+                v += *name - 'A' + 10;
+            else
+                return 0;
+            name++;
+        }
+        if (!is_tangut_ideograph(v))
             return 0;
         *code = v;
         return 1;
     }
 
+
     /* the following is the same as python's dictionary lookup, with
        only minor changes.  see the makeunicodedata script for more
        details */
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 034642db06e48b..6c69ba2b946709 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -99,7 +99,7 @@
 CASED_MASK = 0x2000
 EXTENDED_CASE_MASK = 0x4000
 
-# these ranges need to match unicodedata.c:is_unified_ideograph
+# these ranges need to match unicodedata.c:is_cjk_unified_ideograph
 cjk_ranges = [
     ('3400', '4DBF'),
     ('4E00', '9FFF'),
@@ -112,6 +112,12 @@
     ('31350', '323AF'),
 ]
 
+# these ranges need to match unicodedata.c:is_tangut_ideograph
+tangut_ranges = [
+    ('17000', '187F7'),
+    ('18D00', '18D08')
+]
+
 
 def maketables(trace=0):
 
@@ -123,7 +129,7 @@ def maketables(trace=0):
 
     for version in old_versions:
         print("--- Reading", UNICODE_DATA % ("-"+version), "...")
-        old_unicode = UnicodeData(version, cjk_check=False)
+        old_unicode = UnicodeData(version, ideograph_check=False)
         print(len(list(filter(None, old_unicode.table))), "characters")
         merge_old_version(version, unicode, old_unicode)
 
@@ -1020,7 +1026,7 @@ def from_row(row: List[str]) -> UcdRecord:
 class UnicodeData:
     # table: List[Optional[UcdRecord]]  # index is codepoint; None means unassigned
 
-    def __init__(self, version, cjk_check=True):
+    def __init__(self, version, ideograph_check=True):
         self.changed = []
         table = [None] * 0x110000
         for s in UcdFile(UNICODE_DATA, version):
@@ -1028,6 +1034,7 @@ def __init__(self, version, cjk_check=True):
             table[char] = from_row(s)
 
         cjk_ranges_found = []
+        tangut_ranges_found = []
 
         # expand first-last ranges
         field = None
@@ -1044,12 +1051,17 @@ def __init__(self, version, cjk_check=True):
                     if s.name.startswith("<CJK Ideograph"):
                         cjk_ranges_found.append((field[0],
                                                  s.codepoint))
+                    elif s.name.startswith("<Tangut Ideograph"):
+                        tangut_ranges_found.append((field[0],
+                                                    s.codepoint))
                     s.name = ""
                     field = None
             elif field:
                 table[i] = from_row(('%X' % i,) + field[1:])
-        if cjk_check and cjk_ranges != cjk_ranges_found:
+        if ideograph_check and cjk_ranges != cjk_ranges_found:
             raise ValueError("CJK ranges deviate: have %r" % cjk_ranges_found)
+        if ideograph_check and tangut_ranges != tangut_ranges_found:
+            raise ValueError("Tangut ranges deviate: have %r" % tangut_ranges_found)
 
         # public attributes
         self.filename = UNICODE_DATA % ''

From 3821e1bdb5019824ff0810cf46888ca968fe1300 Mon Sep 17 00:00:00 2001
From: Pierre Le Marre <dev@wismill.eu>
Date: Wed, 26 Jul 2023 06:34:01 +0200
Subject: [PATCH 02/10] News entry

---
 .../next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst   | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Misc/NEWS.d/next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst

diff --git a/Misc/NEWS.d/next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst b/Misc/NEWS.d/next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst
new file mode 100644
index 00000000000000..bbdcb4ffa0998b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst
@@ -0,0 +1 @@
+unicodedata: Fix missing Tangut Ideographs names.

From b2c4e9276786cf9a230a605193a0fed7b3eaa9d4 Mon Sep 17 00:00:00 2001
From: Pierre Le Marre <dev@wismill.eu>
Date: Wed, 26 Jul 2023 06:34:01 +0200
Subject: [PATCH 03/10] Add test

---
 Lib/test/test_unicodedata.py | 45 ++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 7b49a5aa6c4210..9a38ca8d89f488 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -97,6 +97,50 @@ def test_function_checksum(self):
         result = h.hexdigest()
         self.assertEqual(result, self.expectedchecksum)
 
+    @requires_resource('network')
+    def test_name(self):
+        TESTBASEURL = "https://www.unicode.org/Public"
+        TESTDATAFILE = "extracted/DerivedName.txt"
+        TESTDATAURL = f"{TESTBASEURL}/{unicodedata.unidata_version}/ucd/{TESTDATAFILE}"
+
+        # Hit the exception early
+        try:
+            testdata = open_urlresource(TESTDATAURL, encoding="utf-8")
+        except PermissionError:
+            self.skipTest(f"Permission error when downloading {TESTDATAURL} "
+                          f"into the test data directory")
+        except (OSError, HTTPException) as exc:
+            self.skipTest(f"Failed to download {TESTDATAURL}: {exc}")
+
+        with testdata:
+            self.run_name_tests(testdata)
+
+    def run_name_tests(self, testdata):
+        names_ref = {}
+
+        def parse_cp(s):
+            return int(s, 16)
+
+        # Parse data
+        for line in testdata:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            raw_cp, name = line.split("; ")
+            # Check for a range
+            if ".." in raw_cp:
+                cp1, cp2 = map(parse_cp, raw_cp.split(".."))
+                # remove ‘*’ at the end
+                name = name[:-1]
+                for cp in range(cp1, cp2 + 1):
+                    names_ref[cp] = f"{name}{cp:0>4X}"
+            else:
+                cp = parse_cp(raw_cp)
+                names_ref[cp] = name
+
+        for cp in range(0, sys.maxunicode + 1):
+            self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))
+
     @requires_resource('cpu')
     def test_name_inverse_lookup(self):
         for i in range(sys.maxunicode + 1):
@@ -104,6 +148,7 @@ def test_name_inverse_lookup(self):
             if looked_name := self.db.name(char, None):
                 self.assertEqual(self.db.lookup(looked_name), char)
 
+
     def test_digit(self):
         self.assertEqual(self.db.digit('A', None), None)
         self.assertEqual(self.db.digit('9'), 9)

From 2ae016c6927b11668fd327dd0b993c6e1a665c49 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Feb 2026 16:44:29 +0200
Subject: [PATCH 04/10] Fix code and tests.

---
 Lib/test/test_unicodedata.py     | 86 +++++++++++++++-----------------
 Modules/unicodedata.c            |  6 +--
 Tools/unicode/makeunicodedata.py |  4 +-
 3 files changed, 46 insertions(+), 50 deletions(-)

diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 99d3d7970ef199..89f006165f2f07 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -128,50 +128,6 @@ def test_function_checksum(self):
         result = h.hexdigest()
         self.assertEqual(result, self.expectedchecksum)
 
-    @requires_resource('network')
-    def test_name(self):
-        TESTBASEURL = "https://www.unicode.org/Public"
-        TESTDATAFILE = "extracted/DerivedName.txt"
-        TESTDATAURL = f"{TESTBASEURL}/{unicodedata.unidata_version}/ucd/{TESTDATAFILE}"
-
-        # Hit the exception early
-        try:
-            testdata = open_urlresource(TESTDATAURL, encoding="utf-8")
-        except PermissionError:
-            self.skipTest(f"Permission error when downloading {TESTDATAURL} "
-                          f"into the test data directory")
-        except (OSError, HTTPException) as exc:
-            self.skipTest(f"Failed to download {TESTDATAURL}: {exc}")
-
-        with testdata:
-            self.run_name_tests(testdata)
-
-    def run_name_tests(self, testdata):
-        names_ref = {}
-
-        def parse_cp(s):
-            return int(s, 16)
-
-        # Parse data
-        for line in testdata:
-            line = line.strip()
-            if not line or line.startswith("#"):
-                continue
-            raw_cp, name = line.split("; ")
-            # Check for a range
-            if ".." in raw_cp:
-                cp1, cp2 = map(parse_cp, raw_cp.split(".."))
-                # remove ‘*’ at the end
-                name = name[:-1]
-                for cp in range(cp1, cp2 + 1):
-                    names_ref[cp] = f"{name}{cp:0>4X}"
-            else:
-                cp = parse_cp(raw_cp)
-                names_ref[cp] = name
-
-        for cp in range(0, sys.maxunicode + 1):
-            self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))
-
     @requires_resource('cpu')
     def test_name_inverse_lookup(self):
         for char in iterallchars():
@@ -658,7 +614,47 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
     # (e.g. 'make distclean && make') to get the correct checksum.
     expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349'
                         if quicktest else
-                        '65670ae03a324c5f9e826a4de3e25bae4d73c9b7')
+                        '180bdc91143d8aa2eb9dd6726e66d37606205942')
+
+    @requires_resource('network')
+    def test_name(self):
+        TESTDATAFILE = "DerivedName.txt"
+        testdata = download_test_data_file(TESTDATAFILE)
+
+        with testdata:
+            self.run_name_tests(testdata)
+
+    def run_name_tests(self, testdata):
+        names_ref = {}
+
+        def parse_cp(s):
+            return int(s, 16)
+
+        # Parse data
+        for line in testdata:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            raw_cp, name = line.split("; ")
+            # Check for a range
+            if ".." in raw_cp:
+                cp1, cp2 = map(parse_cp, raw_cp.split(".."))
+                # remove ‘*’ at the end
+                assert name[-1] == '*', (raw_cp, name)
+                name = name[:-1]
+                for cp in range(cp1, cp2 + 1):
+                    names_ref[cp] = f"{name}{cp:04X}"
+            elif name[-1] == '*':
+                cp = parse_cp(raw_cp)
+                name = name[:-1]
+                names_ref[cp] = f"{name}{cp:04X}"
+            else:
+                assert '*' not in name, (raw_cp, name)
+                cp = parse_cp(raw_cp)
+                names_ref[cp] = name
+
+        for cp in range(0, sys.maxunicode + 1):
+            self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))
 
     def test_isxidstart(self):
         self.assertTrue(self.db.isxidstart('S'))
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index b8ab06b8fb1906..5eec24b594b102 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1075,8 +1075,8 @@ static int
 is_tangut_ideograph(Py_UCS4 code)
 {
     return
-        (0x17000 <= code && code <= 0x187F7) || /* Tangut */
-        (0x18D00 <= code && code <= 0x18D08);   /* Tangut Supplement */
+        (0x17000 <= code && code <= 0x187FF) || /* Tangut */
+        (0x18D00 <= code && code <= 0x18D1E);   /* Tangut Supplement */
 }
 
 /* macros used to determine if the given code point is in the PUA range that
@@ -1500,7 +1500,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
     /* Check for Tangut ideographs. */
     if (strncmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
         /* Five hexdigits must follow. */
-        v = 0;
+        unsigned int v = 0;
         name += 17;
         namelen -= 17;
         if (namelen != 5)
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 7eb6b879f68ab6..2627eebb2ee24c 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -126,8 +126,8 @@
 
 # these ranges need to match unicodedata.c:is_tangut_ideograph
 tangut_ranges = [
-    ('17000', '187F7'),
-    ('18D00', '18D08')
+    ('17000', '187FF'),
+    ('18D00', '18D1E')
 ]
 
 

From b69fdfc483e45cf8b72804547a939a5f57a43e8d Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Feb 2026 17:07:44 +0200
Subject: [PATCH 05/10] Add lookup tests and fix case-insensitivity for Tangut
 ideographs.

---
 Lib/test/test_ucn.py  | 24 ++++++++++++++++++++++++
 Modules/unicodedata.c | 11 ++++++-----
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py
index 0c641a455c0747..5f4f1b8e52ccef 100644
--- a/Lib/test/test_ucn.py
+++ b/Lib/test/test_ucn.py
@@ -111,6 +111,30 @@ def test_cjk_unified_ideographs(self):
         self.checkletter("cjK UniFIeD idEogRAph-2aBcD", "\U0002abcd")
         self.checkletter("CJk uNIfiEd IDeOGraPH-2AbCd", "\U0002abcd")
 
+    def test_tangut_ideographs(self):
+        self.checkletter("TANGUT IDEOGRAPH-17000", "\U00017000")
+        self.checkletter("TANGUT IDEOGRAPH-187FF", "\U000187ff")
+        self.checkletter("TANGUT IDEOGRAPH-18D00", "\U00018D00")
+        self.checkletter("TANGUT IDEOGRAPH-18D1E", "\U00018d1e")
+        self.checkletter("tangut ideograph-18d1e", "\U00018d1e")
+
+    def test_egyptian_hieroglyphs(self):
+        self.checkletter("EGYPTIAN HIEROGLYPH-13460", "\U00013460")
+        self.checkletter("EGYPTIAN HIEROGLYPH-143FA", "\U000143fa")
+        self.checkletter("egyptian hieroglyph-143fa", "\U000143fa")
+
+    def test_khitan_small_script_characters(self):
+        self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18B00", "\U00018b00")
+        self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CD5", "\U00018cd5")
+        self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CFF", "\U00018cff")
+        self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CFF", "\U00018cff")
+        self.checkletter("khitan small script character-18cff", "\U00018cff")
+
+    def test_nushu_characters(self):
+        self.checkletter("NUSHU CHARACTER-1B170", "\U0001b170")
+        self.checkletter("NUSHU CHARACTER-1B2FB", "\U0001b2fb")
+        self.checkletter("nushu character-1b2fb", "\U0001b2fb")
+
     def test_bmp_characters(self):
         for code in range(0x10000):
             char = chr(code)
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 5eec24b594b102..8c378ba576ba23 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1498,7 +1498,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
     }
 
     /* Check for Tangut ideographs. */
-    if (strncmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
+    if (PyOS_strnicmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
         /* Five hexdigits must follow. */
         unsigned int v = 0;
         name += 17;
@@ -1507,10 +1507,11 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
             return 0;
         while (namelen--) {
             v *= 16;
-            if (*name >= '0' && *name <= '9')
-                v += *name - '0';
-            else if (*name >= 'A' && *name <= 'F')
-                v += *name - 'A' + 10;
+            Py_UCS1 c = Py_TOUPPER(*name);
+            if (c >= '0' && c <= '9')
+                v += c - '0';
+            else if (c >= 'A' && c <= 'F')
+                v += c - 'A' + 10;
             else
                 return 0;
             name++;

From accd1500f59b7485fbdd2cf0327c6d2971685710 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Feb 2026 17:57:36 +0200
Subject: [PATCH 06/10] Add tests that work without network and with UCD 3.2.0.

---
 Lib/test/test_unicodedata.py | 55 +++++++++++++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 89f006165f2f07..9769528c7b9ac9 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -128,6 +128,59 @@ def test_function_checksum(self):
         result = h.hexdigest()
         self.assertEqual(result, self.expectedchecksum)
 
+    def test_name(self):
+        name = self.db.name
+        self.assertRaises(ValueError, name, '\0')
+        self.assertRaises(ValueError, name, '\n')
+        self.assertRaises(ValueError, name, '\x1F')
+        self.assertRaises(ValueError, name, '\x7F')
+        self.assertRaises(ValueError, name, '\x9F')
+        self.assertRaises(ValueError, name, '\uFFFE')
+        self.assertRaises(ValueError, name, '\uFFFF')
+        self.assertRaises(ValueError, name, '\U0010FFFF')
+        self.assertEqual(name('\U0010FFFF', 42), 42)
+
+        self.assertEqual(name(' '), 'SPACE')
+        self.assertEqual(name('1'), 'DIGIT ONE')
+        self.assertEqual(name('A'), 'LATIN CAPITAL LETTER A')
+        self.assertEqual(name('\xA0'), 'NO-BREAK SPACE')
+        self.assertEqual(name('\u0221', None), None if self.old else
+                         'LATIN SMALL LETTER D WITH CURL')
+        self.assertEqual(name('\u3400'), 'CJK UNIFIED IDEOGRAPH-3400')
+        self.assertEqual(name('\u9FA5'), 'CJK UNIFIED IDEOGRAPH-9FA5')
+        self.assertEqual(name('\uAC00'), 'HANGUL SYLLABLE GA')
+        self.assertEqual(name('\uD7A3'), 'HANGUL SYLLABLE HIH')
+        self.assertEqual(name('\uF900'), 'CJK COMPATIBILITY IDEOGRAPH-F900')
+        self.assertEqual(name('\uFA6A'), 'CJK COMPATIBILITY IDEOGRAPH-FA6A')
+        self.assertEqual(name('\uFBF9'),
+                         'ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA '
+                         'ABOVE WITH ALEF MAKSURA ISOLATED FORM')
+        self.assertEqual(name('\U00013460', None), None if self.old else
+                         'EGYPTIAN HIEROGLYPH-13460')
+        self.assertEqual(name('\U000143FA', None), None if self.old else
+                         'EGYPTIAN HIEROGLYPH-143FA')
+        self.assertEqual(name('\U00017000', None), None if self.old else
+                         'TANGUT IDEOGRAPH-17000')
+        self.assertEqual(name('\U00018B00', None), None if self.old else
+                         'KHITAN SMALL SCRIPT CHARACTER-18B00')
+        self.assertEqual(name('\U00018CD5', None), None if self.old else
+                         'KHITAN SMALL SCRIPT CHARACTER-18CD5')
+        self.assertEqual(name('\U00018CFF', None), None if self.old else
+                         'KHITAN SMALL SCRIPT CHARACTER-18CFF')
+        self.assertEqual(name('\U00018D1E', None), None if self.old else
+                         'TANGUT IDEOGRAPH-18D1E')
+        self.assertEqual(name('\U0001B170', None), None if self.old else
+                         'NUSHU CHARACTER-1B170')
+        self.assertEqual(name('\U0001B2FB', None), None if self.old else
+                         'NUSHU CHARACTER-1B2FB')
+        self.assertEqual(name('\U0001FBA8', None), None if self.old else
+                         'BOX DRAWINGS LIGHT DIAGONAL UPPER CENTRE TO '
+                         'MIDDLE LEFT AND MIDDLE RIGHT TO LOWER CENTRE')
+        self.assertEqual(name('\U0002A6D6'), 'CJK UNIFIED IDEOGRAPH-2A6D6')
+        self.assertEqual(name('\U0002FA1D'), 'CJK COMPATIBILITY IDEOGRAPH-2FA1D')
+        self.assertEqual(name('\U00033479', None), None if self.old else
+                         'CJK UNIFIED IDEOGRAPH-33479')
+
     @requires_resource('cpu')
     def test_name_inverse_lookup(self):
         for char in iterallchars():
@@ -617,7 +670,7 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
                         '180bdc91143d8aa2eb9dd6726e66d37606205942')
 
     @requires_resource('network')
-    def test_name(self):
+    def test_all_names(self):
         TESTDATAFILE = "DerivedName.txt"
         testdata = download_test_data_file(TESTDATAFILE)
 

From c09400c4f005fed5512df0094fb17b053d2a2369 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Feb 2026 18:37:40 +0200
Subject: [PATCH 07/10] Share some common code.

---
 Modules/unicodedata.c | 66 +++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 8c378ba576ba23..c2402e15bb4610 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1445,6 +1445,35 @@ _check_alias_and_seq(Py_UCS4* code, int with_named_seq)
     return 1;
 }
 
+static Py_UCS4
+parse_hex_code(const char *name, int namelen)
+{
+    if (namelen < 4 || namelen > 6) {
+        return (Py_UCS4)-1;
+    }
+    if (*name == '0') {
+        return (Py_UCS4)-1;
+    }
+    int v = 0;
+    while (namelen--) {
+        v *= 16;
+        Py_UCS1 c = Py_TOUPPER(*name);
+        if (c >= '0' && c <= '9') {
+            v += c - '0';
+        }
+        else if (c >= 'A' && c <= 'F') {
+            v += c - 'A' + 10;
+        }
+        else {
+            return (Py_UCS4)-1;
+        }
+        name++;
+    }
+    if (v > 0x10ffff) {
+        return (Py_UCS4)-1;
+    }
+    return v;
+}
 
 static int
 _getcode(const char* name, int namelen, Py_UCS4* code)
@@ -1474,25 +1503,10 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
     /* Check for CJK unified ideographs. */
     if (PyOS_strnicmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
         /* Four or five hexdigits must follow. */
-        unsigned int v;
-        v = 0;
-        name += 22;
-        namelen -= 22;
-        if (namelen != 4 && namelen != 5)
+        Py_UCS4 v = parse_hex_code(name + 22, namelen - 22);
+        if (!is_cjk_unified_ideograph(v)) {
             return 0;
-        while (namelen--) {
-            v *= 16;
-            Py_UCS1 c = Py_TOUPPER(*name);
-            if (c >= '0' && c <= '9')
-                v += c - '0';
-            else if (c >= 'A' && c <= 'F')
-                v += c - 'A' + 10;
-            else
-                return 0;
-            name++;
         }
-        if (!is_cjk_unified_ideograph(v))
-            return 0;
         *code = v;
         return 1;
     }
@@ -1500,24 +1514,10 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
     /* Check for Tangut ideographs. */
     if (PyOS_strnicmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
         /* Five hexdigits must follow. */
-        unsigned int v = 0;
-        name += 17;
-        namelen -= 17;
-        if (namelen != 5)
+        Py_UCS4 v = parse_hex_code(name + 17, namelen - 17);
+        if (!is_tangut_ideograph(v)) {
             return 0;
-        while (namelen--) {
-            v *= 16;
-            Py_UCS1 c = Py_TOUPPER(*name);
-            if (c >= '0' && c <= '9')
-                v += c - '0';
-            else if (c >= 'A' && c <= 'F')
-                v += c - 'A' + 10;
-            else
-                return 0;
-            name++;
         }
-        if (!is_tangut_ideograph(v))
-            return 0;
         *code = v;
         return 1;
     }

From 629531f0314cc7a5664460ccf4f765b9bf67ee57 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 14 Feb 2026 14:49:44 +0200
Subject: [PATCH 08/10] Generalize code for Hangul syllables and CJK and Tangut
 ideographs.

---
 Modules/unicodedata.c            | 87 ++++++++++++--------------------
 Modules/unicodename_db.h         | 28 ++++++++++
 Tools/unicode/makeunicodedata.py | 61 +++++++++++-----------
 3 files changed, 91 insertions(+), 85 deletions(-)

diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index c2402e15bb4610..1ed9760874b2a6 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1052,31 +1052,18 @@ static const char * const hangul_syllables[][3] = {
     { 0,    0,     "H"  }
 };
 
-/* These ranges need to match makeunicodedata.py:cjk_ranges. */
 static int
-is_cjk_unified_ideograph(Py_UCS4 code)
-{
-    return
-        (0x3400 <= code && code <= 0x4DBF)   || /* CJK Ideograph Extension A */
-        (0x4E00 <= code && code <= 0x9FFF)   || /* CJK Ideograph */
-        (0x20000 <= code && code <= 0x2A6DF) || /* CJK Ideograph Extension B */
-        (0x2A700 <= code && code <= 0x2B73F) || /* CJK Ideograph Extension C */
-        (0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
-        (0x2B820 <= code && code <= 0x2CEAD) || /* CJK Ideograph Extension E */
-        (0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
-        (0x2EBF0 <= code && code <= 0x2EE5D) || /* CJK Ideograph Extension I */
-        (0x30000 <= code && code <= 0x3134A) || /* CJK Ideograph Extension G */
-        (0x31350 <= code && code <= 0x323AF) || /* CJK Ideograph Extension H */
-        (0x323B0 <= code && code <= 0x33479);   /* CJK Ideograph Extension J */
-}
-
-/* These ranges need to match makeunicodedata.py:tangut_ranges. */
-static int
-is_tangut_ideograph(Py_UCS4 code)
+find_prefix_id(Py_UCS4 code)
 {
-    return
-        (0x17000 <= code && code <= 0x187FF) || /* Tangut */
-        (0x18D00 <= code && code <= 0x18D1E);   /* Tangut Supplement */
+    for (int i = 0; i < (int)Py_ARRAY_LENGTH(derived_name_ranges); i++) {
+        if (code < derived_name_ranges[i].first) {
+            return -1;
+        }
+        if (code <= derived_name_ranges[i].last) {
+            return derived_name_ranges[i].prefixid;
+        }
+    }
+    return -1;
 }
 
 /* macros used to determine if the given code point is in the PUA range that
@@ -1354,7 +1341,9 @@ _getucname(PyObject *self,
         }
     }
 
-    if (SBase <= code && code < SBase+SCount) {
+    int prefixid = find_prefix_id(code);
+    if (prefixid == 0) {
+        assert(SBase <= code && code < SBase+SCount);
         /* Hangul syllable. */
         int SIndex = code - SBase;
         int L = SIndex / NCount;
@@ -1376,19 +1365,11 @@ _getucname(PyObject *self,
         return 1;
     }
 
-    if (is_cjk_unified_ideograph(code)) {
-        if (buflen < 28)
-            /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
-            return 0;
-        sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code);
-        return 1;
-    }
-
-    if (is_tangut_ideograph(code)) {
-        if (buflen < 23)
-            /* Worst case: TANGUT IDEOGRAPH-18D08 */
+    if (prefixid > 0) {
+        const char *prefix = derived_name_prefixes[prefixid];
+        if (snprintf(buffer, buflen, "%s%04X", prefix, code) >= buflen) {
             return 0;
-        sprintf(buffer, "TANGUT IDEOGRAPH-%X", code);
+        }
         return 1;
     }
 
@@ -1482,8 +1463,19 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
      * Named aliases are not resolved, they are returned as a code point in the
      * PUA */
 
-    /* Check for hangul syllables. */
-    if (PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0) {
+    int i = 0;
+    size_t prefixlen;
+    for (; i < (int)Py_ARRAY_LENGTH(derived_name_prefixes); i++) {
+        const char *prefix = derived_name_prefixes[i];
+        prefixlen = strlen(derived_name_prefixes[i]);
+        if (PyOS_strnicmp(name, prefix, prefixlen) == 0) {
+            break;
+        }
+    }
+
+    if (i == 0) {
+        /* Hangul syllables. */
+        assert(PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0);
         int len, L = -1, V = -1, T = -1;
         const char *pos = name + 16;
         find_syllable(pos, &len, &L, LCount, 0);
@@ -1500,22 +1492,9 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
         return 0;
     }
 
-    /* Check for CJK unified ideographs. */
-    if (PyOS_strnicmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
-        /* Four or five hexdigits must follow. */
-        Py_UCS4 v = parse_hex_code(name + 22, namelen - 22);
-        if (!is_cjk_unified_ideograph(v)) {
-            return 0;
-        }
-        *code = v;
-        return 1;
-    }
-
-    /* Check for Tangut ideographs. */
-    if (PyOS_strnicmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
-        /* Five hexdigits must follow. */
-        Py_UCS4 v = parse_hex_code(name + 17, namelen - 17);
-        if (!is_tangut_ideograph(v)) {
+    if (i < (int)Py_ARRAY_LENGTH(derived_name_prefixes)) {
+        Py_UCS4 v = parse_hex_code(name + prefixlen, namelen - prefixlen);
+        if (find_prefix_id(v) != i) {
             return 0;
         }
         *code = v;
diff --git a/Modules/unicodename_db.h b/Modules/unicodename_db.h
index d67e968e7a01ae..d9d062a2345974 100644
--- a/Modules/unicodename_db.h
+++ b/Modules/unicodename_db.h
@@ -19684,3 +19684,31 @@ static const named_sequence named_sequences[] = {
     {2, {0x02E5, 0x02E9}},
     {2, {0x02E9, 0x02E5}},
 };
+
+typedef struct {
+    Py_UCS4 first;
+    Py_UCS4 last;
+    int prefixid;
+} derived_name_range;
+
+static const derived_name_range derived_name_ranges[] = {
+    {0x3400, 0x4DBF, 1},
+    {0x4E00, 0x9FFF, 1},
+    {0xAC00, 0xD7A3, 0},
+    {0x17000, 0x187FF, 2},
+    {0x18D00, 0x18D1E, 2},
+    {0x20000, 0x2A6DF, 1},
+    {0x2A700, 0x2B73F, 1},
+    {0x2B740, 0x2B81D, 1},
+    {0x2B820, 0x2CEAD, 1},
+    {0x2CEB0, 0x2EBE0, 1},
+    {0x2EBF0, 0x2EE5D, 1},
+    {0x30000, 0x3134A, 1},
+    {0x31350, 0x323AF, 1},
+    {0x323B0, 0x33479, 1},
+};
+static const char * const derived_name_prefixes[] = {
+    "HANGUL SYLLABLE ",
+    "CJK UNIFIED IDEOGRAPH-",
+    "TANGUT IDEOGRAPH-",
+};
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 2627eebb2ee24c..432dc3a68bf5ed 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -109,25 +109,13 @@
 CASED_MASK = 0x2000
 EXTENDED_CASE_MASK = 0x4000
 
-# these ranges need to match unicodedata.c:is_cjk_unified_ideograph
-cjk_ranges = [
-    ('3400', '4DBF'),    # CJK Ideograph Extension A CJK
-    ('4E00', '9FFF'),    # CJK Ideograph
-    ('20000', '2A6DF'),  # CJK Ideograph Extension B
-    ('2A700', '2B73F'),  # CJK Ideograph Extension C
-    ('2B740', '2B81D'),  # CJK Ideograph Extension D
-    ('2B820', '2CEAD'),  # CJK Ideograph Extension E
-    ('2CEB0', '2EBE0'),  # CJK Ideograph Extension F
-    ('2EBF0', '2EE5D'),  # CJK Ideograph Extension I
-    ('30000', '3134A'),  # CJK Ideograph Extension G
-    ('31350', '323AF'),  # CJK Ideograph Extension H
-    ('323B0', '33479'),  # CJK Ideograph Extension J
-]
-
-# these ranges need to match unicodedata.c:is_tangut_ideograph
-tangut_ranges = [
-    ('17000', '187FF'),
-    ('18D00', '18D1E')
+# Maps the range names in UnicodeData.txt to prefixes for
+# derived names specified by rule NR2.
+# Hangul should always be at index 0, since it uses special format.
+derived_name_range_names = [
+    ("Hangul Syllable", "HANGUL SYLLABLE "),
+    ("CJK Ideograph", "CJK UNIFIED IDEOGRAPH-"),
+    ("Tangut Ideograph", "TANGUT IDEOGRAPH-"),
 ]
 
 
@@ -737,6 +725,23 @@ def makeunicodename(unicode, trace):
             fprint('    {%d, {%s}},' % (len(sequence), seq_str))
         fprint('};')
 
+        fprint(dedent("""
+            typedef struct {
+                Py_UCS4 first;
+                Py_UCS4 last;
+                int prefixid;
+            } derived_name_range;
+            """))
+
+        fprint('static const derived_name_range derived_name_ranges[] = {')
+        for name_range in unicode.derived_name_ranges:
+            fprint('    {0x%s, 0x%s, %d},' % name_range)
+        fprint('};')
+
+        fprint('static const char * const derived_name_prefixes[] = {')
+        for _, prefix in derived_name_range_names:
+            fprint('    "%s",' % prefix)
+        fprint('};')
 
 def merge_old_version(version, new, old):
     # Changes to exclusion file not implemented yet
@@ -959,8 +964,7 @@ def __init__(self, version, ideograph_check=True):
             char = int(s[0], 16)
             table[char] = from_row(s)
 
-        cjk_ranges_found = []
-        tangut_ranges_found = []
+        self.derived_name_ranges = []
 
         # expand first-last ranges
         field = None
@@ -974,20 +978,15 @@ def __init__(self, version, ideograph_check=True):
                     s.name = ""
                     field = dataclasses.astuple(s)[:15]
                 elif s.name[-5:] == "Last>":
-                    if s.name.startswith("<CJK Ideograph"):
-                        cjk_ranges_found.append((field[0],
-                                                 s.codepoint))
-                    elif s.name.startswith("<Tangut Ideograph"):
-                        tangut_ranges_found.append((field[0],
-                                                    s.codepoint))
+                    for j, (rangename, _) in enumerate(derived_name_range_names):
+                        if s.name.startswith("<" + rangename):
+                            self.derived_name_ranges.append(
+                                (field[0], s.codepoint, j))
+                            break
                     s.name = ""
                     field = None
             elif field:
                 table[i] = from_row(('%X' % i,) + field[1:])
-        if ideograph_check and cjk_ranges != cjk_ranges_found:
-            raise ValueError("CJK ranges deviate: have %r" % cjk_ranges_found)
-        if ideograph_check and tangut_ranges != tangut_ranges_found:
-            raise ValueError("Tangut ranges deviate: have %r" % tangut_ranges_found)
 
         # public attributes
         self.filename = UNICODE_DATA % ''

From a270a295fa21a4d7922439b88b93a10b209c28d5 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 14 Feb 2026 14:51:58 +0200
Subject: [PATCH 09/10] Update the NEWS file.

---
 .../next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst b/Misc/NEWS.d/next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst
index bbdcb4ffa0998b..f82f1eeb0589c6 100644
--- a/Misc/NEWS.d/next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst
+++ b/Misc/NEWS.d/next/Library/2023-02-05-20-02-30.gh-issue-80667.7LmzeA.rst
@@ -1 +1 @@
-unicodedata: Fix missing Tangut Ideographs names.
+Add support for Tangut Ideographs names in :mod:`unicodedata`.

From 163e6c950dcba3df6d1ff8d93d76089bbb1c3f83 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 14 Feb 2026 15:08:36 +0200
Subject: [PATCH 10/10] Add tests for invalid names.

---
 Lib/test/test_unicodedata.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 9769528c7b9ac9..d100dae1110b7f 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -205,6 +205,17 @@ def test_lookup_nonexistant(self):
             "HANDBUG",
             "MODIFIER LETTER CYRILLIC SMALL QUESTION MARK",
             "???",
+            "CJK UNIFIED IDEOGRAPH-03400",
+            "CJK UNIFIED IDEOGRAPH-020000",
+            "CJK UNIFIED IDEOGRAPH-33FF",
+            "CJK UNIFIED IDEOGRAPH-F900",
+            "CJK UNIFIED IDEOGRAPH-13460",
+            "CJK UNIFIED IDEOGRAPH-17000",
+            "CJK UNIFIED IDEOGRAPH-18B00",
+            "CJK UNIFIED IDEOGRAPH-1B170",
+            "CJK COMPATIBILITY IDEOGRAPH-3400",
+            "TANGUT IDEOGRAPH-3400",
+            "HANGUL SYLLABLE AC00",
         ]:
             self.assertRaises(KeyError, self.db.lookup, nonexistent)