Skip to content

Commit 2ae016c

Browse files
Fix code and tests.
1 parent 2e1560c commit 2ae016c

3 files changed

Lines changed: 46 additions & 50 deletions

File tree

Lib/test/test_unicodedata.py

Lines changed: 41 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -128,50 +128,6 @@ def test_function_checksum(self):
128128
result = h.hexdigest()
129129
self.assertEqual(result, self.expectedchecksum)
130130

131-
@requires_resource('network')
132-
def test_name(self):
133-
TESTBASEURL = "https://www.unicode.org/Public"
134-
TESTDATAFILE = "extracted/DerivedName.txt"
135-
TESTDATAURL = f"{TESTBASEURL}/{unicodedata.unidata_version}/ucd/{TESTDATAFILE}"
136-
137-
# Hit the exception early
138-
try:
139-
testdata = open_urlresource(TESTDATAURL, encoding="utf-8")
140-
except PermissionError:
141-
self.skipTest(f"Permission error when downloading {TESTDATAURL} "
142-
f"into the test data directory")
143-
except (OSError, HTTPException) as exc:
144-
self.skipTest(f"Failed to download {TESTDATAURL}: {exc}")
145-
146-
with testdata:
147-
self.run_name_tests(testdata)
148-
149-
def run_name_tests(self, testdata):
150-
names_ref = {}
151-
152-
def parse_cp(s):
153-
return int(s, 16)
154-
155-
# Parse data
156-
for line in testdata:
157-
line = line.strip()
158-
if not line or line.startswith("#"):
159-
continue
160-
raw_cp, name = line.split("; ")
161-
# Check for a range
162-
if ".." in raw_cp:
163-
cp1, cp2 = map(parse_cp, raw_cp.split(".."))
164-
# remove ‘*’ at the end
165-
name = name[:-1]
166-
for cp in range(cp1, cp2 + 1):
167-
names_ref[cp] = f"{name}{cp:0>4X}"
168-
else:
169-
cp = parse_cp(raw_cp)
170-
names_ref[cp] = name
171-
172-
for cp in range(0, sys.maxunicode + 1):
173-
self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))
174-
175131
@requires_resource('cpu')
176132
def test_name_inverse_lookup(self):
177133
for char in iterallchars():
@@ -658,7 +614,47 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
658614
# (e.g. 'make distclean && make') to get the correct checksum.
659615
expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349'
660616
if quicktest else
661-
'65670ae03a324c5f9e826a4de3e25bae4d73c9b7')
617+
'180bdc91143d8aa2eb9dd6726e66d37606205942')
618+
619+
@requires_resource('network')
620+
def test_name(self):
621+
TESTDATAFILE = "DerivedName.txt"
622+
testdata = download_test_data_file(TESTDATAFILE)
623+
624+
with testdata:
625+
self.run_name_tests(testdata)
626+
627+
def run_name_tests(self, testdata):
628+
names_ref = {}
629+
630+
def parse_cp(s):
631+
return int(s, 16)
632+
633+
# Parse data
634+
for line in testdata:
635+
line = line.strip()
636+
if not line or line.startswith("#"):
637+
continue
638+
raw_cp, name = line.split("; ")
639+
# Check for a range
640+
if ".." in raw_cp:
641+
cp1, cp2 = map(parse_cp, raw_cp.split(".."))
642+
# remove ‘*’ at the end
643+
assert name[-1] == '*', (raw_cp, name)
644+
name = name[:-1]
645+
for cp in range(cp1, cp2 + 1):
646+
names_ref[cp] = f"{name}{cp:04X}"
647+
elif name[-1] == '*':
648+
cp = parse_cp(raw_cp)
649+
name = name[:-1]
650+
names_ref[cp] = f"{name}{cp:04X}"
651+
else:
652+
assert '*' not in name, (raw_cp, name)
653+
cp = parse_cp(raw_cp)
654+
names_ref[cp] = name
655+
656+
for cp in range(0, sys.maxunicode + 1):
657+
self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))
662658

663659
def test_isxidstart(self):
664660
self.assertTrue(self.db.isxidstart('S'))

Modules/unicodedata.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,8 +1075,8 @@ static int
10751075
is_tangut_ideograph(Py_UCS4 code)
10761076
{
10771077
return
1078-
(0x17000 <= code && code <= 0x187F7) || /* Tangut */
1079-
(0x18D00 <= code && code <= 0x18D08); /* Tangut Supplement */
1078+
(0x17000 <= code && code <= 0x187FF) || /* Tangut */
1079+
(0x18D00 <= code && code <= 0x18D1E); /* Tangut Supplement */
10801080
}
10811081

10821082
/* macros used to determine if the given code point is in the PUA range that
@@ -1500,7 +1500,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
15001500
/* Check for Tangut ideographs. */
15011501
if (strncmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
15021502
/* Five hexdigits must follow. */
1503-
v = 0;
1503+
unsigned int v = 0;
15041504
name += 17;
15051505
namelen -= 17;
15061506
if (namelen != 5)

Tools/unicode/makeunicodedata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@
126126

127127
# these ranges need to match unicodedata.c:is_tangut_ideograph
128128
tangut_ranges = [
129-
('17000', '187F7'),
130-
('18D00', '18D08')
129+
('17000', '187FF'),
130+
('18D00', '18D1E')
131131
]
132132

133133

0 commit comments

Comments
 (0)