-
-
Notifications
You must be signed in to change notification settings - Fork 34.5k
gh-79516: allow msgfmt.py to compile multiple input po files #10875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 40 commits
73b5ac8
5fb1575
b1968e9
0bc4ad3
a9e67b4
6c59d6c
1ce22c0
863bd97
4390ede
008ea27
8744743
93a6eb7
ba26b80
150cea1
d9afabb
9004387
7556f79
80947d1
12acb83
1ecc1f3
e59ba68
4ffd20a
7505f2b
2c27120
1f4e5ac
4170796
46c08c5
24d89a6
17b4e05
bfc8a44
106dd40
9cb9395
08bc8d7
9d992cd
31fd434
916aec7
51fcf09
d51ad50
677f720
b4ea80a
3120add
d642923
9d91f12
4d83cb7
421272b
09b97d9
12cae51
3760851
d45039c
dde5ef1
bb6e0c5
797990a
c95af16
905ec70
b7a7c48
1b3b73e
1db2c66
3a6e1ef
743bdc5
50e7145
caa8955
a4f1769
11f6e69
0ed3107
d1e0a26
213afcb
4d54e50
ab97edd
9fd1d57
7d09934
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| file1_fr_crlf.po eol=crlf | ||
| file2_fr_lf.po eol=lf |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| # Example of French translations, crlf end of lines | ||
| # | ||
| msgid "" | ||
| msgstr "" | ||
| "Project-Id-Version: PACKAGE VERSION\n" | ||
| "Report-Msgid-Bugs-To: \n" | ||
| "POT-Creation-Date: 2018-11-30 23:46+0100\n" | ||
| "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" | ||
| "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" | ||
| "Language-Team: French\n" | ||
| "Language: fr\n" | ||
| "MIME-Version: 1.0\n" | ||
| "Content-Type: text/plain; charset=UTF-8\n" | ||
| "Content-Transfer-Encoding: 8bit\n" | ||
| "Plural-Forms: nplurals=2; plural=(n > 1);\n" | ||
|
|
||
| #: file1.py:6 | ||
| msgid "Hello!" | ||
| msgstr "Bonjour !" | ||
|
|
||
| #: file1.py:7 | ||
| #, python-brace-format | ||
| msgid "{n} horse" | ||
| msgid_plural "{n} horses" | ||
| msgstr[0] "{n} cheval" | ||
| msgstr[1] "{n} chevaux" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| # Example of French translations, lf end of lines | ||
| # | ||
| msgid "" | ||
| msgstr "" | ||
| "Project-Id-Version: PACKAGE VERSION\n" | ||
| "Report-Msgid-Bugs-To: \n" | ||
| "POT-Creation-Date: 2018-11-30 23:57+0100\n" | ||
| "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" | ||
| "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" | ||
| "Language-Team: French\n" | ||
| "Language: fr\n" | ||
| "MIME-Version: 1.0\n" | ||
| "Content-Type: text/plain; charset=UTF-8\n" | ||
| "Content-Transfer-Encoding: 8bit\n" | ||
| "Plural-Forms: nplurals=2; plural=(n > 1);\n" | ||
|
|
||
| #: file2.py:6 | ||
| msgid "It's over." | ||
| msgstr "C'est terminé." | ||
|
|
||
| #: file2.py:7 | ||
| msgid "Bye..." | ||
| msgstr "Au revoir ..." |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,8 @@ | ||
| """Tests for the Tools/i18n/msgfmt.py tool.""" | ||
|
|
||
| import filecmp | ||
| import os | ||
| import shutil | ||
| import sys | ||
| import unittest | ||
| from gettext import GNUTranslations | ||
|
|
@@ -17,8 +20,8 @@ | |
| msgfmt = script_dir / 'msgfmt.py' | ||
|
|
||
|
|
||
| def compile_messages(po_file, mo_file): | ||
| assert_python_ok(msgfmt, '-o', mo_file, po_file) | ||
| def compile_messages(mo_file, *po_files): | ||
| assert_python_ok(msgfmt, '-o', mo_file, *po_files) | ||
|
|
||
|
|
||
| class CompilationTest(unittest.TestCase): | ||
|
|
@@ -33,7 +36,7 @@ def test_compilation(self): | |
| expected = GNUTranslations(f) | ||
|
|
||
| tmp_mo_file = mo_file.name | ||
| compile_messages(po_file, tmp_mo_file) | ||
| compile_messages(tmp_mo_file, po_file) | ||
| with open(tmp_mo_file, 'rb') as f: | ||
| actual = GNUTranslations(f) | ||
|
|
||
|
|
@@ -91,6 +94,7 @@ def test_generic_syntax_error(self): | |
| err = res.err.decode('utf-8') | ||
| self.assertIn('Syntax error', err) | ||
|
|
||
|
|
||
|
merwok marked this conversation as resolved.
|
||
| class CLITest(unittest.TestCase): | ||
|
|
||
| def test_help(self): | ||
|
|
@@ -121,10 +125,60 @@ def test_nonexistent_file(self): | |
| assert_python_failure(msgfmt, 'nonexistent.po') | ||
|
|
||
|
|
||
| class MultiInputTest(unittest.TestCase): | ||
| """Tests for the issue https://github.com/python/cpython/issues/79516 | ||
| msgfmt.py accepts multiple input files | ||
|
s-ball marked this conversation as resolved.
Outdated
|
||
| """ | ||
|
|
||
| def test_no_outputfile(self): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test looks redundant. |
||
| """Test script without -o option - 1 single file""" | ||
| with temp_cwd(None): | ||
| shutil.copy(data_dir / 'file2_fr_lf.po', '.') | ||
| assert_python_ok(msgfmt, 'file2_fr_lf.po') | ||
| self.assertTrue( | ||
| filecmp.cmp(data_dir / 'file2_fr_lf.mo', 'file2_fr_lf.mo'), | ||
| 'Wrong compiled file2_fr_lf.mo') | ||
|
|
||
| def test_both_with_outputfile(self): | ||
| """Test script with -o option and 2 input files | ||
|
|
||
| The current behaviour is to merge entries having distinct ids | ||
| and keep last one if the same id occurs in multiple files. | ||
|
|
||
| Here the first file has Windows endings (cflr) while second has | ||
| Unix endings (lf) | ||
| """ | ||
| with temp_cwd(None): | ||
| assert_python_ok(msgfmt, '-o', 'file12.mo', | ||
| data_dir / 'file1_fr_crlf.po', | ||
| data_dir / 'file2_fr_lf.po') | ||
| self.assertTrue( | ||
| filecmp.cmp(data_dir / 'file12_fr.mo', 'file12.mo'), | ||
| 'Wrong compiled file12.mo') | ||
|
s-ball marked this conversation as resolved.
Outdated
|
||
|
|
||
| def test_both_without_outputfile(self): | ||
| """Test script without -o option and 2 input files""" | ||
|
|
||
| with temp_cwd(None): | ||
| shutil.copy(data_dir /'file1_fr_crlf.po', '.') | ||
| shutil.copy(data_dir /'file2_fr_lf.po', '.') | ||
|
merwok marked this conversation as resolved.
Outdated
|
||
| assert_python_ok(msgfmt, 'file1_fr_crlf.po', 'file2_fr_lf.po') | ||
| self.assertTrue( | ||
| filecmp.cmp(data_dir / 'file1_fr_crlf.mo', 'file1_fr_crlf.mo'), | ||
| 'Wrong compiled file1_fr_crlf.mo') | ||
|
s-ball marked this conversation as resolved.
Outdated
|
||
| self.assertTrue( | ||
| filecmp.cmp(data_dir / 'file2_fr_lf.mo', 'file2_fr_lf.mo'), | ||
| 'Wrong compiled file2_fr_lf.mo') | ||
|
s-ball marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| def update_catalog_snapshots(): | ||
| for po_file in data_dir.glob('*.po'): | ||
| mo_file = po_file.with_suffix('.mo') | ||
| compile_messages(po_file, mo_file) | ||
| compile_messages(mo_file, po_file) | ||
| # special processing for file12_fr.mo which results from 2 input files | ||
| compile_messages(data_dir /'file12_fr.mo', | ||
|
merwok marked this conversation as resolved.
Outdated
|
||
| data_dir / 'file1_fr_crlf.po', | ||
| data_dir / 'file2_fr_lf.po') | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would use compile_messages, not assert_python_ok
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem is that compile_message only supports one single input po file. Do you suggest that I should change the behavior of
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah… would it be a big change?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not so big. It should be ready before end of today.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. It was enough to swap parameters and allow an arbitrary number for the second through a
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I assumed that compile_messages came from the tool being tested or the gettext module, but now I see it’s also a test helper – and it calls assert_python_ok! |
||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| :program:`msgfmt.py` is now able to merge more than one po file into a compiled mo | ||
| file. When an entry exists in more than on input file, the last file wins. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,15 +6,21 @@ | |
| This program converts a textual Uniforum-style message catalog (.po file) into | ||
| a binary GNU catalog (.mo file). This is essentially the same function as the | ||
| GNU msgfmt program, however, it is a simpler implementation. Currently it | ||
| does not handle plural forms but it does handle message contexts. | ||
| handles plural forms and message contexts, but does not generate a hash table. | ||
|
|
||
| Usage: msgfmt.py [OPTIONS] filename.po | ||
| Usage: msgfmt.py [OPTIONS] filename.po ... | ||
|
|
||
| Options: | ||
| -o file | ||
| --output-file=file | ||
| Specify the output file to write to. If omitted, output will go to a | ||
| file named filename.mo (based off the input file name). | ||
| file named filename.mo (based off the input file name(s)). | ||
| If more than one input file is given, and if an output file is passed | ||
| with -o option, then all the input files are merged. If keys are | ||
| repeated (common for "" key for the header) the one from last file is used. | ||
|
s-ball marked this conversation as resolved.
Outdated
|
||
| If more than one input file is given, and no -o option is present, then | ||
| every input file is compiled in its corresponding mo file (same name | ||
|
s-ball marked this conversation as resolved.
Outdated
|
||
| with mo replacing po) | ||
|
|
||
| -h | ||
| --help | ||
|
|
@@ -47,29 +53,27 @@ def usage(code, msg=''): | |
| sys.exit(code) | ||
|
|
||
|
|
||
| def add(ctxt, id, str, fuzzy): | ||
| def add(ctxt, id, str, fuzzy, messages): | ||
|
merwok marked this conversation as resolved.
|
||
| "Add a non-fuzzy translation to the dictionary." | ||
| global MESSAGES | ||
| if not fuzzy and str: | ||
| if ctxt is None: | ||
| MESSAGES[id] = str | ||
| messages[id] = str | ||
| else: | ||
| MESSAGES[b"%b\x04%b" % (ctxt, id)] = str | ||
| messages[b"%b\x04%b" % (ctxt, id)] = str | ||
|
|
||
|
|
||
| def generate(): | ||
| def generate(messages): | ||
| "Return the generated output." | ||
| global MESSAGES | ||
| # the keys are sorted in the .mo file | ||
| keys = sorted(MESSAGES.keys()) | ||
| keys = sorted(messages.keys()) | ||
| offsets = [] | ||
| ids = strs = b'' | ||
| for id in keys: | ||
| # For each string, we need size and file offset. Each string is NUL | ||
| # terminated; the NUL does not count into the size. | ||
| offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) | ||
| offsets.append((len(ids), len(id), len(strs), len(messages[id]))) | ||
| ids += id + b'\0' | ||
| strs += MESSAGES[id] + b'\0' | ||
| strs += messages[id] + b'\0' | ||
| output = '' | ||
| # The header is 7 32-bit unsigned integers. We don't use hash tables, so | ||
| # the keys start right after the index tables. | ||
|
|
@@ -98,18 +102,44 @@ def generate(): | |
| return output | ||
|
|
||
|
|
||
| def make(filename, outfile): | ||
| ID = 1 | ||
| STR = 2 | ||
| CTXT = 3 | ||
| def make(filenames, outfile): | ||
|
s-ball marked this conversation as resolved.
|
||
| """ Compiles one or more po files(s). | ||
|
|
||
| filenames is a string or an iterable of strings representing input file(s) | ||
|
s-ball marked this conversation as resolved.
Outdated
|
||
| outfile is a string for the name of an input file or None. | ||
|
|
||
| If it is not None, the output file receives a merge of the input files. | ||
| If it is None, then filenames must be a string and the name of the output | ||
| file is obtained by replacing the po extension with mo. | ||
| Both ways are for compatibility reasons with previous behaviour. | ||
| """ | ||
| messages = {} | ||
| if isinstance(filenames, str): | ||
| infile, outfile = get_names(filenames, outfile) | ||
| process(infile, messages) | ||
| elif outfile is None: | ||
| raise TypeError("outfile cannot be None with more than one infile") | ||
| else: | ||
| for filename in filenames: | ||
| infile, _ = get_names(filename, outfile) | ||
| process(infile, messages) | ||
| output = generate(messages) | ||
| writefile(outfile, output) | ||
|
|
||
| def get_names(filename, outfile): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is no longer used. |
||
| # Compute .mo name from .po name and arguments | ||
| if filename.endswith('.po'): | ||
| infile = filename | ||
| else: | ||
| infile = filename + '.po' | ||
| if outfile is None: | ||
| outfile = os.path.splitext(infile)[0] + '.mo' | ||
| return infile, outfile | ||
|
|
||
| def process(infile, messages): | ||
| ID = 1 | ||
| STR = 2 | ||
| CTXT = 3 | ||
|
|
||
| try: | ||
| with open(infile, 'rb') as f: | ||
|
|
@@ -140,7 +170,7 @@ def make(filename, outfile): | |
| lno += 1 | ||
| # If we get a comment line after a msgstr, this is a new entry | ||
| if l[0] == '#' and section == STR: | ||
| add(msgctxt, msgid, msgstr, fuzzy) | ||
| add(msgctxt, msgid, msgstr, fuzzy, messages) | ||
| section = msgctxt = None | ||
| fuzzy = 0 | ||
| # Record a fuzzy mark | ||
|
|
@@ -152,13 +182,13 @@ def make(filename, outfile): | |
| # Now we are in a msgid or msgctxt section, output previous section | ||
| if l.startswith('msgctxt'): | ||
| if section == STR: | ||
| add(msgctxt, msgid, msgstr, fuzzy) | ||
| add(msgctxt, msgid, msgstr, fuzzy, messages) | ||
| section = CTXT | ||
| l = l[7:] | ||
| msgctxt = b'' | ||
| elif l.startswith('msgid') and not l.startswith('msgid_plural'): | ||
| if section == STR: | ||
| add(msgctxt, msgid, msgstr, fuzzy) | ||
| add(msgctxt, msgid, msgstr, fuzzy, messages) | ||
| if not msgid: | ||
| # See whether there is an encoding declaration | ||
| p = HeaderParser() | ||
|
|
@@ -213,11 +243,9 @@ def make(filename, outfile): | |
| sys.exit(1) | ||
| # Add last entry | ||
| if section == STR: | ||
| add(msgctxt, msgid, msgstr, fuzzy) | ||
|
|
||
| # Compute output | ||
| output = generate() | ||
| add(msgctxt, msgid, msgstr, fuzzy, messages) | ||
|
|
||
| def writefile(outfile, output): | ||
| try: | ||
| with open(outfile,"wb") as f: | ||
| f.write(output) | ||
|
|
@@ -247,9 +275,11 @@ def main(): | |
| print('No input file given', file=sys.stderr) | ||
| print("Try `msgfmt --help' for more information.", file=sys.stderr) | ||
| return | ||
|
|
||
| for filename in args: | ||
| make(filename, outfile) | ||
| if outfile is None: | ||
| for filename in args: | ||
| make(filename, None) | ||
| else: | ||
| make(args, outfile) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.