Skip to content

Commit 243300a

Browse files
initial
1 parent 3dd3675 commit 243300a

1 file changed

Lines changed: 80 additions & 10 deletions

File tree

Tools/i18n/msgfmt.py

Lines changed: 80 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from email.parser import HeaderParser
3535
import codecs
3636

37-
__version__ = "1.2"
37+
__version__ = "1.3"
3838

3939

4040
MESSAGES = {}
@@ -60,21 +60,57 @@ def add(ctxt, id, str, fuzzy):
6060
def generate():
6161
"Return the generated output."
6262
global MESSAGES
63+
64+
def hash_insert_entry(string, i):
65+
hash_val = hashpjw(string)
66+
hash_cursor = hash_val % hash_tab_size
67+
inc = 1 + (hash_val % (hash_tab_size - 2))
68+
while hash_table[hash_cursor] != 0:
69+
hash_cursor += inc
70+
hash_cursor %= hash_tab_size
71+
hash_table[hash_cursor] = i + 1
72+
73+
# From [gettext.git]/gettext-tools/src/write-mo.c:
74+
# Each string has an associate hashing value V, computed by a fixed
75+
# function. To locate the string we use open addressing with double
76+
# hashing. The first index will be V % M, where M is the size of the
77+
# hashing table. If no entry is found, iterating with a second,
78+
# independent hashing function takes place. This second value will
79+
# be 1 + V % (M - 2).
80+
# The approximate number of probes will be
81+
#
82+
# for unsuccessful search: (1 - N / M) ^ -1
83+
# for successful search: - (N / M) ^ -1 * ln (1 - N / M)
84+
#
85+
# where N is the number of keys.
86+
#
87+
# If we now choose M to be the next prime bigger than 4 / 3 * N,
88+
# we get the values
89+
# 4 and 1.85 resp.
90+
# Because unsuccessful searches are unlikely this is a good value.
91+
# Formulas: [Knuth, The Art of Computer Programming, Volume 3,
92+
# 766 Sorting and Searching, 1973, Addison Wesley]
93+
hash_tab_size = next_prime((len(sorted(MESSAGES.keys())) * 4) // 3)
94+
if hash_tab_size <= 2:
95+
hash_tab_size = 3
96+
hash_table = array.array("I", [0] * hash_tab_size)
97+
6398
# the keys are sorted in the .mo file
6499
keys = sorted(MESSAGES.keys())
65100
offsets = []
66101
ids = strs = b''
67-
for id in keys:
102+
for i, id in enumerate(keys):
68103
# For each string, we need size and file offset. Each string is NUL
69104
# terminated; the NUL does not count into the size.
105+
hash_insert_entry(id, i)
70106
offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
71107
ids += id + b'\0'
72108
strs += MESSAGES[id] + b'\0'
73109
output = ''
74-
# The header is 7 32-bit unsigned integers. We don't use hash tables, so
110+
## FIX ME The header is 7 32-bit unsigned integers. We use hash tables, so
75111
# the keys start right after the index tables.
76112
# translated string.
77-
keystart = 7*4+16*len(keys)
113+
keystart = 7*4+16*len(keys)+hash_tab_size*4
78114
# and the values start after the keys
79115
valuestart = keystart + len(ids)
80116
koffsets = []
@@ -86,13 +122,15 @@ def generate():
86122
voffsets += [l2, o2+valuestart]
87123
offsets = koffsets + voffsets
88124
output = struct.pack("Iiiiiii",
89-
0x950412de, # Magic
90-
0, # Version
91-
len(keys), # # of entries
92-
7*4, # start of key index
93-
7*4+len(keys)*8, # start of value index
94-
0, 0) # size and offset of hash table
125+
0x950412de, # Magic
126+
0, # Version
127+
len(keys), # # of entries
128+
7*4, # start of key index
129+
7*4+len(keys)*8, # start of value index
130+
hash_tab_size, # size of hash table
131+
7 * 4 + 2 * (len(keys) * 8)) # offset of hash table
95132
output += array.array("i", offsets).tobytes()
133+
output += hash_table.tobytes()
96134
output += ids
97135
output += strs
98136
return output
@@ -252,5 +290,37 @@ def main():
252290
make(filename, outfile)
253291

254292

293+
# Utilities for writing hash table
294+
295+
def hashpjw(str_param):
296+
hval = 0
297+
for s in str_param:
298+
if not s:
299+
break
300+
hval <<= 4
301+
hval += s
302+
g = hval & 0xF << 28
303+
if g != 0:
304+
hval ^= g >> 24
305+
hval ^= g
306+
return hval
307+
308+
309+
def next_prime(start):
310+
def is_prime(num):
311+
divn = 3
312+
sq = divn * divn
313+
while sq < num and num % divn != 0:
314+
divn += 1
315+
sq += 4 * divn
316+
divn += 1
317+
318+
return num % divn != 0
319+
320+
candidate = start | 1
321+
while not is_prime(candidate):
322+
candidate += 2
323+
return candidate
324+
255325
if __name__ == '__main__':
256326
main()

0 commit comments

Comments
 (0)