-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathextract_missing_translations.py
executable file
·136 lines (105 loc) · 4.64 KB
/
extract_missing_translations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
"""
Extract missing translations from a .po file.
This file is part of Ardupilot methodic configurator. https://github.com/ArduPilot/MethodicConfigurator
SPDX-FileCopyrightText: 2024 Amilcar do Carmo Lucas <[email protected]>
SPDX-License-Identifier: GPL-3.0-or-later
"""
import argparse
import gettext
import glob
import logging
import os
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Extract missing translations from a .po (GNU gettext) file.")
# Add argument for language code
parser.add_argument(
"--lang-code",
default="zh_CN",
type=str,
help="The language code for which to extract missing translations (e.g., 'zh_CN', 'pt'). Defaults to %(default)s",
)
# Add argument for output file
parser.add_argument(
"--output-file",
default="missing_translation",
type=str,
help="The base name of the file(s) where the missing translations will be written. "
"This file will contain lines in the format 'index:msgid'. Defaults to %(default)s",
)
# Add argument for maximum number of translations per output file
parser.add_argument(
"--max-translations",
default=80,
type=int,
help="The maximum number of missing translations to write to each output file. Defaults to %(default)s",
)
return parser.parse_args()
def extract_missing_translations(lang_code: str) -> list[tuple[int, str]]:
# Set up the translation catalog
po_file = os.path.join(
"ardupilot_methodic_configurator", "locale", lang_code, "LC_MESSAGES", "ardupilot_methodic_configurator.po"
)
language = gettext.translation(
"ardupilot_methodic_configurator",
localedir="ardupilot_methodic_configurator\\locale",
languages=[lang_code],
fallback=True,
)
# Read the .po file entries
with open(po_file, encoding="utf-8") as f:
lines = f.readlines()
missing_translations: list[tuple[int, str]] = []
# Iterate through lines to find untranslated msgid
msgid = ""
in_msgid = False
for i, f_line in enumerate(lines):
line = f_line.strip()
if line.startswith("msgid "):
msgid = ""
in_msgid = True
if in_msgid and not line.startswith("msgstr "):
line_split = line.split('"')
if len(line_split) > 1:
msgid += '"'.join(line_split[1:-1]) # Get the msgid string
else:
msg = f"On line {i}"
logging.error(msg)
continue
if in_msgid and line.startswith("msgstr "):
in_msgid = False
# escape \ characters in a string
# msgid_escaped = msgid.replace("\\", "\\\\")
msgid_escaped = msgid
# Check if the translation exists
if language.gettext(msgid_escaped) == msgid: # If translation is the same as msgid, it's missing
missing_translations.append((i - 1, msgid))
return missing_translations
def output_to_files(missing_translations: list[tuple[int, str]], output_file_base_name: str, max_translations: int) -> None:
# Remove any existing output files with the same base name
existing_files = glob.glob(f"{output_file_base_name}.txt")
existing_files += glob.glob(f"{output_file_base_name}_*.txt")
for existing_file in existing_files:
os.remove(existing_file)
# Determine the number of files needed
total_missing = len(missing_translations)
num_files = (total_missing // max_translations) + (1 if total_missing % max_translations else 0)
# Write untranslated msgids along with their indices to the output file(s)
for file_index in range(num_files):
start_index = file_index * max_translations
end_index = start_index + max_translations
# Set the name of the output file based on the index
current_output_file = output_file_base_name
current_output_file += f"_{file_index + 1}" if num_files > 1 else ""
current_output_file += ".txt"
# Write untranslated msgids along with their indices to the output file
with open(current_output_file, "w", encoding="utf-8") as f:
for index, item in missing_translations[start_index:end_index]:
f.write(f"{index}:{item}\n")
def main() -> None:
args = parse_arguments()
logging.basicConfig(level="INFO", format="%(asctime)s - %(levelname)s - %(message)s")
missing_translations = extract_missing_translations(args.lang_code)
output_to_files(missing_translations, args.output_file, args.max_translations)
if __name__ == "__main__":
main()