Skip to content

Commit 777e4e9

Browse files
committed
Drop dead splitbins Unicode generator code
All Unicode table lookups in this generator now emit packtab-based helpers, so the old splitbins compressor is no longer used. Validated by regenerating Unicode data, rebuilding python.exe and unicodedata.so, and running test_unicodedata and test_tools.
1 parent ff11ea5 commit 777e4e9

1 file changed

Lines changed: 0 additions & 62 deletions

File tree

Tools/unicode/makeunicodedata.py

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,67 +1336,5 @@ def getsize(data):
13361336
return 4
13371337

13381338

1339-
def splitbins(t, trace=0):
1340-
"""t, trace=0 -> (t1, t2, shift). Split a table to save space.
1341-
1342-
t is a sequence of ints. This function can be useful to save space if
1343-
many of the ints are the same. t1 and t2 are lists of ints, and shift
1344-
is an int, chosen to minimize the combined size of t1 and t2 (in C
1345-
code), and where for each i in range(len(t)),
1346-
t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
1347-
where mask is a bitmask isolating the last "shift" bits.
1348-
1349-
If optional arg trace is non-zero (default zero), progress info
1350-
is printed to sys.stderr. The higher the value, the more info
1351-
you'll get.
1352-
"""
1353-
1354-
if trace:
1355-
def dump(t1, t2, shift, bytes):
1356-
print("%d+%d bins at shift %d; %d bytes" % (
1357-
len(t1), len(t2), shift, bytes), file=sys.stderr)
1358-
print("Size of original table:", len(t)*getsize(t), "bytes",
1359-
file=sys.stderr)
1360-
n = len(t)-1 # last valid index
1361-
maxshift = 0 # the most we can shift n and still have something left
1362-
if n > 0:
1363-
while n >> 1:
1364-
n >>= 1
1365-
maxshift += 1
1366-
del n
1367-
bytes = sys.maxsize # smallest total size so far
1368-
t = tuple(t) # so slices can be dict keys
1369-
for shift in range(maxshift + 1):
1370-
t1 = []
1371-
t2 = []
1372-
size = 2**shift
1373-
bincache = {}
1374-
for i in range(0, len(t), size):
1375-
bin = t[i:i+size]
1376-
index = bincache.get(bin)
1377-
if index is None:
1378-
index = len(t2)
1379-
bincache[bin] = index
1380-
t2.extend(bin)
1381-
t1.append(index >> shift)
1382-
# determine memory size
1383-
b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
1384-
if trace > 1:
1385-
dump(t1, t2, shift, b)
1386-
if b < bytes:
1387-
best = t1, t2, shift
1388-
bytes = b
1389-
t1, t2, shift = best
1390-
if trace:
1391-
print("Best:", end=' ', file=sys.stderr)
1392-
dump(t1, t2, shift, bytes)
1393-
if __debug__:
1394-
# exhaustively verify that the decomposition is correct
1395-
mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
1396-
for i in range(len(t)):
1397-
assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
1398-
return best
1399-
1400-
14011339
if __name__ == "__main__":
14021340
maketables(1)

0 commit comments

Comments
 (0)