8000 gh-131725: Generate GNU hash table in `msgfmt.py` by StanFromIreland · Pull Request #131727 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-131725: Generate GNU hash table in msgfmt.py #131727

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
8000
Prev Previous commit
Next Next commit
Add test
  • Loading branch information
StanFromIreland committed Mar 30, 2025
commit ba324cbcf45ba9691b7e7cf5529ecaceb22ee9b4
25 changes: 23 additions & 2 deletions Lib/test/test_tools/test_msgfmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from test.support.os_helper import temp_cwd
from test.support.script_helper import assert_python_failure, assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
from test.test_tools import imports_under_tool, skip_if_missing, toolsdir


skip_if_missing('i18n')
Expand All @@ -18,6 +18,9 @@
script_dir = Path(toolsdir) / 'i18n'
msgfmt = script_dir / 'msgfmt.py'

with imports_under_tool("i18n"):
from msgfmt import _hashpjw


def compile_messages(po_file, mo_file):
assert_python_ok(msgfmt, '-o', mo_file, po_file)
Expand All @@ -42,7 +45,25 @@ def test_compilation(self):
self.assertDictEqual(actual._catalog, expected._catalog)

def test_hash_table(self):
pass
# Check _hashpjw generates correct hash values
self.assertEqual(_hashpjw(b"stan"), 502398)
self.assertEqual(_hashpjw(b"foo"), 27999)

# Check hash table is generated correctly for general.po
with temp_cwd():
tmp_mo_file = "messages.mo"
compile_messages(data_dir / "general.po", tmp_mo_file)
with open(tmp_mo_file, "rb") as f:
mo_data = f.read()

header = struct.unpack("=7I", mo_data[:28])
hash_table_size, hash_table_offset = header[5:7]

hash_tab = struct.unpack(f"={hash_table_size}I",
mo_data[hash_table_offset : hash_table_offset + (hash_table_size * 4)])
Comment on lines +62 to +66
Copy link
Member Author
@StanFromIreland StanFromIreland Mar 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could just hardcode these sizes since we test them below. Is that preferred?


self.assertEqual(hash_tab, (1, 3, 0, 8, 9, 7, 2, 0, 4, 5, 0, 6, 0))


def test_binary_header(self):
with temp_cwd():
Expand Down
8 changes: 4 additions & 4 deletions Tools/i18n/msgfmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def generate():
global MESSAGES

def hash_insert_entry(string, i):
hash_val = hashpjw(string)
hash_val = _hashpjw(string)
hash_cursor = hash_val % hash_tab_size
inc = 1 + (hash_val % (hash_tab_size - 2))
while hash_table[hash_cursor]:
Expand Down Expand Up @@ -90,7 +90,7 @@ def hash_insert_entry(string, i):
# Because unsuccessful searches are unlikely this is a good value.
# Formulas: [Knuth, The Art of Computer Programming, Volume 3,
# 766 Sorting and Searching, 1973, Addison Wesley]
hash_tab_size = next_prime((len(MESSAGES) * 4) // 3)
hash_tab_size = _next_prime((len(MESSAGES) * 4) // 3)
if hash_tab_size <= 2:
hash_tab_size = 3
hash_table = array.array("I", [0] * hash_tab_size)
Expand Down Expand Up @@ -294,7 +294,7 @@ def main():

# Peter J. Weinberger hash function
# See: https://www.drdobbs.com/database/hashing-rehashed/184409859
def hashpjw(strs):
def _hashpjw(strs):
hval = 0
for s in strs:
if not s:
Expand All @@ -308,7 +308,7 @@ def hashpjw(strs):
return hval


def next_prime(start):
def _next_prime(start):
def is_prime(num):
divn = 3
sq = divn * divn
Expand Down
Loading
0