Lex2Po/Lex2Po.py

231 lines
8.2 KiB
Python

import os
import polib
from pathlib import Path
import re
import shutil
# Define paths
SOURCE_LOCALE_DIR = Path("/home/brianr/Documents/smeserver-manager-locale/root/usr/share/smanager/lib/SrvMngr/I18N/Modules")
SOURCE_MANAGER_DIR = Path("/home/brianr/Documents/smeserver-manager/root/usr/share/smanager/lib/SrvMngr/I18N/Modules")
def extract_language_from_filename(filename):
match = re.search(r'[_-]([a-z]{2}(?:-[a-z]{2})?)\..*', filename, re.IGNORECASE)
if match:
return match.group(1).replace("-", "_").lower()
return None
def parse_lex_file(lex_file):
entries = {}
with open(lex_file, 'r', encoding='utf-8') as file:
content = file.read()
# Use regex to find all key-value pairs, handling quotes around keys optionally
# matches = re.finditer(r"""
# (['"]?) # Optional opening quote for the key
# ([^'" \t]+?) # The key itself (excluding quotes and whitespace)
# \1 # Optional closing quote matching the opening quote
# \s*=>\s* # The delimiter
# ' # Opening quote for the value
# (.*?) # The value (non-greedy to capture minimal text)
# ' # Closing quote for the value
# \s*,\s* # Trailing comma and optional whitespace
# """, content, re.DOTALL | re.VERBOSE)
matches = re.finditer(r"""
(['"]?) # Optional opening quote for the key
(.*?) # The key itself (including spaces and any characters)
\1 # Optional closing quote matching the opening quote
\s*=>\s* # The delimiter
' # Opening quote for the value
(.*?) # The value (non-greedy to capture minimal text)
' # Closing quote for the value
\s*,\s* # Trailing comma and optional whitespace
""", content, re.DOTALL | re.VERBOSE)
for match in matches:
key, value = match.group(2).strip(), match.group(3).strip()
entries[key] = value.replace("\\n", "\n")
return entries
def save_lex_file(entries, lex_file):
with open(lex_file, 'w', encoding='utf-8') as file:
for key, value in entries.items():
escaped_value = value.replace("'", "\\'") # Escape single quotes
file.write(f"'{key}' => '{escaped_value}',\n")
def infer_prefix(entries, filename,IsGeneral=False):
"""
Infer the <abc> prefix from entries that contain it, fallback to the first 3 characters of the filename.
"""
if IsGeneral:
return "" #No prefix for General items
for key in entries.keys():
match = re.match(r"(.*?_)?.*", key)
if match:
prefix = match.group(1)
if prefix:
print(f"Found prefix:{prefix} {filename}")
return prefix
# If no prefix is found, return the first 3 characters of the filename
return filename.stem[:3].lower() + "_"
def extract_module_name(file_path):
# Convert the file path to a Path object
path = Path(file_path)
# Extract the module directory name
module_name = path.parts[-2] # Get the second-to-last part in the path
# Capitalize the first letter
return module_name.capitalize()
def ensure_prefix(entries, prefix,file_path):
"""
Ensure each msgctxt within the entries has the given prefix.
"""
updated_entries = {}
module_name = extract_module_name(file_path)
for key, value in entries.items():
if not key.startswith(prefix):
updated_key = f"{prefix}{key}"
print(f"Adding prefix: {key} -> {updated_key} ({module_name})")
updated_entries[updated_key] = value
else:
updated_entries[key] = value
return updated_entries
def convert_lex_to_po(lex_file, po_file, en_entries, general_en_entries,IsGeneral=False):
translated_entries = parse_lex_file(lex_file)
language_code = extract_language_from_filename(po_file.name)
# Infer prefix from original en_entries and ensure all entries have this prefix
prefix = infer_prefix(en_entries, Path(lex_file),IsGeneral)
en_entries = ensure_prefix(en_entries, prefix,lex_file)
translated_entries = ensure_prefix(translated_entries, prefix,lex_file)
po = polib.POFile()
po.metadata = {
'Project-Id-Version': '1.0',
'Language': language_code,
'Content-Type': 'text/plain; charset=utf-8',
}
new_entries = []
for msgctxt, msgstr in translated_entries.items():
msgid = en_entries.get(msgctxt, "") # Find the original text using spacey version of msgctxt (Msg ID)
if not msgid:
print(f"Warning: Could not find original text for Msg ID {msgctxt} ({language_code})")
# See if in General
msgid = general_en_entries.get(msgctxt, "")
if not msgid:
msgid = "Placeholder for missing original text - this means that \
there was a string in the translated lex file which did not appear in the english base file.\
Probably due to it no longer being required."
new_entries.append((msgctxt, msgid))
else:
print(f"Found {msgctxt} => {msgid} in general")
entry = polib.POEntry(
msgctxt=msgctxt,
msgid=msgid,
msgstr=msgstr
)
po.append(entry)
# Update en_entries with new entries
for msgctxt, msgid in new_entries:
en_entries[msgctxt] = msgid
po.save(po_file)
def process_directory_with_en_mapping(source_directory, master_directory):
# First pick up the general ones that may come in any of the files
print("loading General directory")
# Load it up and use it to check for ones without prefix
general_en_file = master_directory /"General/general_en.lex"
general_en_entries = parse_lex_file(general_en_file)
if len(general_en_entries)>0:
print(f"Found {len(general_en_entries)} entries in General lex file")
else:
quit()
for subdir, _, files in os.walk(source_directory):
subdir_path = Path(subdir)
print(subdir_path)
# Skip if subdir_path is exactly the source_directory or contains 'pofiles'
if subdir_path == source_directory or 'pofiles' in subdir_path.parts:
continue
# Extract the subservient directory name and ensure it's capitalized
try:
subservient_dir_name = subdir_path.relative_to(source_directory).parts[0].capitalize()
except IndexError:
print(f"Skipping directory {subdir_path} as it has no subservient directory.")
continue
# Apply it to find the corresponding en directory
corresponding_en_dir = master_directory / subservient_dir_name
#Tag the General directory as it has no prefix's
IsGeneral = subservient_dir_name == "General"
# Find corresponding `*`_en.lex` files
en_entries = {}
en_file = corresponding_en_dir / f"{subservient_dir_name.lower()}_en.lex"
if en_file.is_file():
en_entries = parse_lex_file(en_file)
else:
print(f"Warning: No *_en.lex file found in {corresponding_en_dir}")
pofiles_dir = subdir_path / "pofiles"
if not pofiles_dir.exists():
pofiles_dir.mkdir(parents=True, exist_ok=True)
for file in files:
if file.endswith('.lex') and not file.endswith('_en.lex'):
lex_file = subdir_path / file
po_file = pofiles_dir / lex_file.with_suffix('.po').name
#po_file = po_file.lower().replace("-", "_")
print(f"Converting {lex_file} to {po_file}")
convert_lex_to_po(lex_file, po_file, en_entries,general_en_entries,IsGeneral)
# Save the updated en_entries to the en.lex file in the locale directory
if en_entries:
locale_en_file = subdir_path / f"{subservient_dir_name.lower()}_en.lex"
if en_file.exists():
shutil.copy(en_file, locale_en_file) # Backup the original en.lex file to the locale path
save_lex_file(en_entries, locale_en_file)
print(f"Updated and saved {locale_en_file}")
# Create and save the po file in the pofiles directory
updated_po_file = pofiles_dir / f"{subservient_dir_name.lower()}_en.po"
create_po_from_en_entries(en_entries, updated_po_file)
def create_po_from_en_entries(entries, po_file):
po = polib.POFile()
po.metadata = {
'Project-Id-Version': '1.0',
'Language': 'en',
'Content-Type': 'text/plain; charset=utf-8',
}
for msgctxt, msgid in entries.items():
entry = polib.POEntry(
msgctxt=msgctxt,
msgid=msgid,
msgstr=""
)
po.append(entry)
po.save(po_file)
print(f"Created {po_file}")
def main():
# Convert all .lex files under smeserver-manager-locale into respective pofiles directories using en-mapping
print("Processing locale directory...")
process_directory_with_en_mapping(SOURCE_LOCALE_DIR, SOURCE_MANAGER_DIR)
print("Conversion complete.")
if __name__ == "__main__":
main()