Lex2Po/Check_Translation.py
2024-08-26 14:26:22 +01:00

162 lines
6.4 KiB
Python

import os
import argparse
import re
import json # Import the json module
do_not_need_translating = ["AM", "PM", "AM/PM", "nfs", "cifs"]
def infer_prefix(entries, filename, IsGeneral=False):
"""
Infer the <abc> prefix from entries that contain it, fallback to the first 3 characters of the filename.
"""
if IsGeneral:
return "" # No prefix for General items
for key in entries.keys():
match = re.match(r"(.*?_)?.*", key)
if match:
prefix = match.group(1)
if prefix:
print(f"Found prefix:{prefix} {filename}")
return prefix
# If no prefix is found, return the first 3 characters of the filename
return filename.stem[:3].lower() + "_"
def find_lex_files(modulename):
base_locale_path = f"/home/brianr//Documents/smeserver-manager-locale/root/usr/share/smanager/lib/SrvMngr/I18N/Modules/{modulename}/"
base_en_path = f"/home/brianr//Documents/smeserver-manager/root/usr/share/smanager/lib/SrvMngr/I18N/Modules/{modulename}/"
# Get all .lex files in the specified directory
lex_files = [f for f in os.listdir(base_locale_path) if f.endswith(".lex")]
# Find the corresponding _en.lex file
en_file = next((f for f in os.listdir(base_en_path) if f.endswith("_en.lex")), None)
# Assuming the _en.lex file exists
if en_file is None:
print(f"No _en.lex file found for module: {modulename}")
return
# Read translation pairs from the _en file
translations = {}
with open(os.path.join(base_en_path, en_file), "r", encoding="utf-8") as en_f:
for line in en_f:
if "=>" in line:
key, message = line.split("=>", 1)
translations[key.strip().strip("'")] = (
message.strip().strip(",").strip("'")
)
module_prefix = infer_prefix(translations, en_file)
# Dictionary to accumulate missing translations
missing_translations = {}
key_not_in_en = {}
# Compare against each lex file in the locale directory
for lex_file in lex_files:
# Skip any english lang file in there (there might be one)
if lex_file.endswith("_en.lex"):
continue
with open(
os.path.join(base_locale_path, lex_file), "r", encoding="utf-8"
) as loc_f:
print(f"File:{lex_file}")
language_code = lex_file.split(".")[0][
-2:
] # Extracting language code from filename
# Ensure the language code entry in the dictionary
if language_code not in missing_translations:
missing_translations[
language_code
] = [] # Initialize an empty list for this language code
if language_code not in key_not_in_en:
key_not_in_en[
language_code
] = [] # Initialize an empty list for this language code
for line in loc_f:
if "=>" in line:
key, message = line.split("=>", 1)
key = key.strip().strip("'")
message = message.strip().strip(",").strip("'")
# Remove the module prefix to check against do_not_need_translating
stripped_key = key[
len(module_prefix) :
] # Remove the prefix and the underscore
# Checking for missing translations
if (
stripped_key not in do_not_need_translating
): # Check without prefix
if key in translations:
if message == translations[key] or message == "":
# print(f"Missing translation in module '{modulename}', ({language_code})', key '{key}'")
missing_translations[language_code].append(key)
else:
# print(f"Key '{key}' not found in english lex for module '{modulename} ({language_code})'.")
key_not_in_en[language_code].append(key)
for lang_code, keys in missing_translations.items():
if keys:
print(
f"Missing translations for module '{modulename}', ({lang_code}) - #{len(keys)}:"
)
# print(keys)
for lang_code, keys in key_not_in_en.items():
if keys:
print(
f"Keys not found in english lex for module '{modulename} ({lang_code}) - #{len(keys)}'."
)
print(keys)
# Remove keys that are not found in English lex from translation files
for lex_file in lex_files:
if lex_file.endswith("_en.lex"):
continue
print(f"File:{lex_file}")
language_code = lex_file.split(".")[0][
-2:
] # Extracting language code from filename
all_lines = []
with open(
os.path.join(base_locale_path, lex_file), "r", encoding="utf-8"
) as loc_f:
all_lines = loc_f.readlines()
# Filter out missing keys based on the key_not_in_en dictionary
deleted = 0
with open(
os.path.join(base_locale_path, lex_file), "w", encoding="utf-8"
) as loc_f:
for line in all_lines:
if "=>" in line:
key, message = line.split("=>", 1)
key = key.strip().strip("'") # Get the key without quotes
# Only write back lines that are not in the key_not_in_en list
if key not in key_not_in_en.get(language_code, []):
loc_f.write(line)
else:
deleted += 1
print(f"Updated {lex_file}, removed {deleted} keys.")
# Write the missing_translations dictionary to a JSON file
output_filename = f"missing_translations_{modulename}.json"
with open(output_filename, "w", encoding="utf-8") as json_file:
json.dump(missing_translations, json_file, ensure_ascii=False, indent=4)
print(f"Wrote missing translations to {output_filename}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Check for missing translations in lex files."
)
parser.add_argument(
"modulename", help="The name of the module to check translations for."
)
args = parser.parse_args()
find_lex_files(args.modulename)