Fix code and tests.

serhiy-storchaka · serhiy-storchaka · commit 2ae016c6927b · 2026-02-13T16:49:18.000+02:00
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
@@ -128,50 +128,6 @@ def test_function_checksum(self):
         result = h.hexdigest()
         self.assertEqual(result, self.expectedchecksum)
 
-    @requires_resource('network')
-    def test_name(self):
-        TESTBASEURL = "https://www.unicode.org/Public"
-        TESTDATAFILE = "extracted/DerivedName.txt"
-        TESTDATAURL = f"{TESTBASEURL}/{unicodedata.unidata_version}/ucd/{TESTDATAFILE}"
-
-        # Hit the exception early
-        try:
-            testdata = open_urlresource(TESTDATAURL, encoding="utf-8")
-        except PermissionError:
-            self.skipTest(f"Permission error when downloading {TESTDATAURL} "
-                          f"into the test data directory")
-        except (OSError, HTTPException) as exc:
-            self.skipTest(f"Failed to download {TESTDATAURL}: {exc}")
-
-        with testdata:
-            self.run_name_tests(testdata)
-
-    def run_name_tests(self, testdata):
-        names_ref = {}
-
-        def parse_cp(s):
-            return int(s, 16)
-
-        # Parse data
-        for line in testdata:
-            line = line.strip()
-            if not line or line.startswith("#"):
-                continue
-            raw_cp, name = line.split("; ")
-            # Check for a range
-            if ".." in raw_cp:
-                cp1, cp2 = map(parse_cp, raw_cp.split(".."))
-                # remove ‘*’ at the end
-                name = name[:-1]
-                for cp in range(cp1, cp2 + 1):
-                    names_ref[cp] = f"{name}{cp:0>4X}"
-            else:
-                cp = parse_cp(raw_cp)
-                names_ref[cp] = name
-
-        for cp in range(0, sys.maxunicode + 1):
-            self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))
-
     @requires_resource('cpu')
     def test_name_inverse_lookup(self):
         for char in iterallchars():
@@ -658,7 +614,47 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
     # (e.g. 'make distclean && make') to get the correct checksum.
     expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349'
                         if quicktest else
-                        '65670ae03a324c5f9e826a4de3e25bae4d73c9b7')
+                        '180bdc91143d8aa2eb9dd6726e66d37606205942')
+
+    @requires_resource('network')
+    def test_name(self):
+        TESTDATAFILE = "DerivedName.txt"
+        testdata = download_test_data_file(TESTDATAFILE)
+
+        with testdata:
+            self.run_name_tests(testdata)
+
+    def run_name_tests(self, testdata):
+        names_ref = {}
+
+        def parse_cp(s):
+            return int(s, 16)
+
+        # Parse data
+        for line in testdata:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            raw_cp, name = line.split("; ")
+            # Check for a range
+            if ".." in raw_cp:
+                cp1, cp2 = map(parse_cp, raw_cp.split(".."))
+                # remove ‘*’ at the end
+                assert name[-1] == '*', (raw_cp, name)
+                name = name[:-1]
+                for cp in range(cp1, cp2 + 1):
+                    names_ref[cp] = f"{name}{cp:04X}"
+            elif name[-1] == '*':
+                cp = parse_cp(raw_cp)
+                name = name[:-1]
+                names_ref[cp] = f"{name}{cp:04X}"
+            else:
+                assert '*' not in name, (raw_cp, name)
+                cp = parse_cp(raw_cp)
+                names_ref[cp] = name
+
+        for cp in range(0, sys.maxunicode + 1):
+            self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))
 
     def test_isxidstart(self):
         self.assertTrue(self.db.isxidstart('S'))
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
@@ -1075,8 +1075,8 @@ static int
 is_tangut_ideograph(Py_UCS4 code)
 {
     return
-        (0x17000 <= code && code <= 0x187F7) || /* Tangut */
-        (0x18D00 <= code && code <= 0x18D08);   /* Tangut Supplement */
+        (0x17000 <= code && code <= 0x187FF) || /* Tangut */
+        (0x18D00 <= code && code <= 0x18D1E);   /* Tangut Supplement */
 }
 
 /* macros used to determine if the given code point is in the PUA range that
@@ -1500,7 +1500,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
     /* Check for Tangut ideographs. */
     if (strncmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
         /* Five hexdigits must follow. */
-        v = 0;
+        unsigned int v = 0;
         name += 17;
         namelen -= 17;
         if (namelen != 5)
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
@@ -126,8 +126,8 @@
 
 # these ranges need to match unicodedata.c:is_tangut_ideograph
 tangut_ranges = [
-    ('17000', '187F7'),
-    ('18D00', '18D08')
+    ('17000', '187FF'),
+    ('18D00', '18D1E')
 ]
 
 

Original file line number	Diff line number	Diff line change
`@@ -126,8 +126,8 @@`
`126`	`126`
`127`	`127`	`# these ranges need to match unicodedata.c:is_tangut_ideograph`
`128`	`128`	`tangut_ranges = [`
`129`		`- ('17000', '187F7'),`
`130`		`- ('18D00', '18D08')`
	`129`	`+ ('17000', '187FF'),`
	`130`	`+ ('18D00', '18D1E')`
`131`	`131`	`]`
`132`	`132`
`133`	`133`