@@ -1336,67 +1336,5 @@ def getsize(data):
13361336 return 4
13371337
13381338
1339- def splitbins (t , trace = 0 ):
1340- """t, trace=0 -> (t1, t2, shift). Split a table to save space.
1341-
1342- t is a sequence of ints. This function can be useful to save space if
1343- many of the ints are the same. t1 and t2 are lists of ints, and shift
1344- is an int, chosen to minimize the combined size of t1 and t2 (in C
1345- code), and where for each i in range(len(t)),
1346- t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
1347- where mask is a bitmask isolating the last "shift" bits.
1348-
1349- If optional arg trace is non-zero (default zero), progress info
1350- is printed to sys.stderr. The higher the value, the more info
1351- you'll get.
1352- """
1353-
1354- if trace :
1355- def dump (t1 , t2 , shift , bytes ):
1356- print ("%d+%d bins at shift %d; %d bytes" % (
1357- len (t1 ), len (t2 ), shift , bytes ), file = sys .stderr )
1358- print ("Size of original table:" , len (t )* getsize (t ), "bytes" ,
1359- file = sys .stderr )
1360- n = len (t )- 1 # last valid index
1361- maxshift = 0 # the most we can shift n and still have something left
1362- if n > 0 :
1363- while n >> 1 :
1364- n >>= 1
1365- maxshift += 1
1366- del n
1367- bytes = sys .maxsize # smallest total size so far
1368- t = tuple (t ) # so slices can be dict keys
1369- for shift in range (maxshift + 1 ):
1370- t1 = []
1371- t2 = []
1372- size = 2 ** shift
1373- bincache = {}
1374- for i in range (0 , len (t ), size ):
1375- bin = t [i :i + size ]
1376- index = bincache .get (bin )
1377- if index is None :
1378- index = len (t2 )
1379- bincache [bin ] = index
1380- t2 .extend (bin )
1381- t1 .append (index >> shift )
1382- # determine memory size
1383- b = len (t1 )* getsize (t1 ) + len (t2 )* getsize (t2 )
1384- if trace > 1 :
1385- dump (t1 , t2 , shift , b )
1386- if b < bytes :
1387- best = t1 , t2 , shift
1388- bytes = b
1389- t1 , t2 , shift = best
1390- if trace :
1391- print ("Best:" , end = ' ' , file = sys .stderr )
1392- dump (t1 , t2 , shift , bytes )
1393- if __debug__ :
1394- # exhaustively verify that the decomposition is correct
1395- mask = ~ ((~ 0 ) << shift ) # i.e., low-bit mask of shift bits
1396- for i in range (len (t )):
1397- assert t [i ] == t2 [(t1 [i >> shift ] << shift ) + (i & mask )]
1398- return best
1399-
1400-
14011339if __name__ == "__main__" :
14021340 maketables (1 )
0 commit comments