python · StanFromIreland · Mar 25, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 27, 2025
diff --git a/Lib/test/test_tools/msgfmt_data/fuzzy.mo b/Lib/test/test_tools/msgfmt_data/fuzzy.mo
diff --git a/Lib/test/test_tools/msgfmt_data/general.mo b/Lib/test/test_tools/msgfmt_data/general.mo
diff --git a/Lib/test/test_tools/test_msgfmt.py b/Lib/test/test_tools/test_msgfmt.py
@@ -40,6 +40,9 @@ def test_compilation(self):
 
                     self.assertDictEqual(actual._catalog, expected._catalog)
 
+    def test_hash_table(self):
+        pass
+
     def test_translations(self):
         with open(data_dir / 'general.mo', 'rb') as f:
             t = GNUTranslations(f)
@@ -126,7 +129,7 @@ def test_version(self):
         for option in ('--version', '-V'):
             res = assert_python_ok(msgfmt, option)
             out = res.out.decode('utf-8').strip()
-            self.assertEqual('msgfmt.py 1.2', out)
+            self.assertEqual('msgfmt.py 1.3', out)
 
     def test_invalid_option(self):
         res = assert_python_failure(msgfmt, '--invalid-option')

diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-03-25-18-00-00.gh-issue-131725.qwfh321.rst b/Misc/NEWS.d/next/Tools-Demos/2025-03-25-18-00-00.gh-issue-131725.qwfh321.rst
@@ -0,0 +1 @@
+:program:`msgfmt` now generates GNU hash tables.
diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py
@@ -34,7 +34,7 @@
 from email.parser import HeaderParser
 import codecs
 
-__version__ = "1.2"
+__version__ = "1.3"
 
 
 MESSAGES = {}
@@ -60,21 +60,57 @@ def add(ctxt, id, str, fuzzy):
 def generate():
     "Return the generated output."
     global MESSAGES
+
+    def hash_insert_entry(string, i):
+        hash_val = hashpjw(string)
+        hash_cursor = hash_val % hash_tab_size
+        inc = 1 + (hash_val % (hash_tab_size - 2))
+        while hash_table[hash_cursor] != 0:
+            hash_cursor += inc
+            hash_cursor %= hash_tab_size
+        hash_table[hash_cursor] = i + 1
+
+    # From [gettext.git]/gettext-tools/src/write-mo.c:
+    #  Each string has an associate hashing value V, computed by a fixed
+    #  function.  To locate the string we use open addressing with double
+    #      hashing.  The first index will be V % M, where M is the size of the
+    #  hashing table.  If no entry is found, iterating with a second,
+    #  independent hashing function takes place.  This second value will
+    #  be 1 + V % (M - 2).
+    #  The approximate number of probes will be
+    #
+    #    for unsuccessful search:  (1 - N / M) ^ -1
+    #    for successful search:    - (N / M) ^ -1 * ln (1 - N / M)
+    #
+    #  where N is the number of keys.
+    #
+    #  If we now choose M to be the next prime bigger than 4 / 3 * N,
+    #  we get the values
+    #                      4   and   1.85  resp.
+    #  Because unsuccessful searches are unlikely this is a good value.
+    #  Formulas: [Knuth, The Art of Computer Programming, Volume 3,
+    #                 766 Sorting and Searching, 1973, Addison Wesley]
+    hash_tab_size = next_prime((len(sorted(MESSAGES.keys())) * 4) // 3)
+    if hash_tab_size <= 2:
+        hash_tab_size = 3
+    hash_table = array.array("I", [0] * hash_tab_size)
+
     # the keys are sorted in the .mo file
     keys = sorted(MESSAGES.keys())
     offsets = []
     ids = strs = b''
-    for id in keys:
+    for i, id in enumerate(keys):
         # For each string, we need size and file offset.  Each string is NUL
         # terminated; the NUL does not count into the size.
+        hash_insert_entry(id, i)
         offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
         ids += id + b'\0'
         strs += MESSAGES[id] + b'\0'
     output = ''
-    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
+    ## FIX ME The header is 7 32-bit unsigned integers.  We use hash tables, so
     # the keys start right after the index tables.
     # translated string.
-    keystart = 7*4+16*len(keys)
+    keystart = 7*4+16*len(keys)+hash_tab_size*4
     # and the values start after the keys
     valuestart = keystart + len(ids)
     koffsets = []
@@ -86,13 +122,15 @@ def generate():
         voffsets += [l2, o2+valuestart]
     offsets = koffsets + voffsets
     output = struct.pack("Iiiiiii",
-                         0x950412de,       # Magic
-                         0,                 # Version
-                         len(keys),         # # of entries
-                         7*4,               # start of key index
-                         7*4+len(keys)*8,   # start of value index
-                         0, 0)              # size and offset of hash table
+                         0x950412de,             # Magic
+                         0,                            # Version
+                         len(keys),                    # # of entries
+                         7*4,                          # start of key index
+                         7*4+len(keys)*8,              # start of value index
+                         hash_tab_size,                # size of hash table
+                         7 * 4 + 2 * (len(keys) * 8))  # offset of hash table
     output += array.array("i", offsets).tobytes()
+    output += hash_table.tobytes()
     output += ids
     output += strs
     return output
@@ -253,5 +291,37 @@ def main():
         make(filename, outfile)
 
 
+# Utilities for writing hash table
+
+def hashpjw(str_param):
+    hval = 0
+    for s in str_param:
+        if not s:
+            break
+        hval <<= 4
+        hval += s
+        g = hval & 0xF << 28
+        if g != 0:
+            hval ^= g >> 24
+            hval ^= g
+    return hval
+
+
+def next_prime(start):
+    def is_prime(num):
+        divn = 3
+        sq = divn * divn
+        while sq < num and num % divn != 0:
+            divn += 1
+            sq += 4 * divn
+            divn += 1
+
+        return num % divn != 0
+
+    candidate = start | 1
+    while not is_prime(candidate):
+        candidate += 2
+    return candidate
+
 if __name__ == '__main__':
     main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		:program:`msgfmt` now generates GNU hash tables.