Initial upload of python program file
This commit is contained in:
parent
27f13abc7f
commit
c6a4e9bc3f
218
trylex2po.py
Normal file
218
trylex2po.py
Normal file
@ -0,0 +1,218 @@
|
||||
import os
|
||||
import polib
|
||||
from pathlib import Path
|
||||
import re
|
||||
import shutil
|
||||
|
||||
# Define paths
|
||||
SOURCE_LOCALE_DIR = Path("/home/brianr/Documents/smeserver-manager-locale/root/usr/share/smanager/lib/SrvMngr/I18N/Modules")
|
||||
SOURCE_MANAGER_DIR = Path("/home/brianr/Documents/smeserver-manager/root/usr/share/smanager/lib/SrvMngr/I18N/Modules")
|
||||
|
||||
def extract_language_from_filename(filename):
|
||||
match = re.search(r'[_-]([a-z]{2}(?:-[a-z]{2})?)\.lex', filename, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).replace("-", "_").lower()
|
||||
return None
|
||||
|
||||
def parse_lex_file(lex_file):
|
||||
entries = {}
|
||||
with open(lex_file, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
# Use regex to find all key-value pairs, handling quotes around keys optionally
|
||||
matches = re.finditer(r"""
|
||||
(['"]?) # Optional opening quote for the key
|
||||
([^'" \t]+?) # The key itself (excluding quotes and whitespace)
|
||||
\1 # Optional closing quote matching the opening quote
|
||||
\s*=>\s* # The delimiter
|
||||
' # Opening quote for the value
|
||||
(.*?) # The value (non-greedy to capture minimal text)
|
||||
' # Closing quote for the value
|
||||
\s*,\s* # Trailing comma and optional whitespace
|
||||
""", content, re.DOTALL | re.VERBOSE)
|
||||
#matches = re.findall(r"['\"]?([^'\"]+)['\"]?\s*=>\s*['\"](.*?)['\"]\s*,", content, re.DOTALL)
|
||||
for match in matches:
|
||||
key, value = match.group(2).strip(), match.group(3).strip()
|
||||
entries[key] = value.replace("\\n", "\n")
|
||||
return entries
|
||||
|
||||
def save_lex_file(entries, lex_file):
|
||||
with open(lex_file, 'w', encoding='utf-8') as file:
|
||||
for key, value in entries.items():
|
||||
escaped_value = value.replace("'", "\\'") # Escape single quotes
|
||||
file.write(f"'{key}' => '{escaped_value}',\n")
|
||||
|
||||
def infer_prefix(entries, filename):
|
||||
"""
|
||||
Infer the <abc> prefix from entries that contain it, fallback to the first 3 characters of the filename.
|
||||
"""
|
||||
for key in entries.keys():
|
||||
match = re.match(r"(.*?_)?.*", key)
|
||||
if match:
|
||||
prefix = match.group(1)
|
||||
if prefix:
|
||||
print(f"Found prefix:{prefix} {filename}")
|
||||
return prefix
|
||||
# If no prefix is found, return the first 3 characters of the filename
|
||||
return filename.stem[:3].lower() + "_"
|
||||
|
||||
def extract_module_name(file_path):
|
||||
# Convert the file path to a Path object
|
||||
path = Path(file_path)
|
||||
# Extract the module directory name
|
||||
module_name = path.parts[-2] # Get the second-to-last part in the path
|
||||
# Capitalize the first letter
|
||||
return module_name.capitalize()
|
||||
|
||||
def ensure_prefix(entries, prefix,file_path):
|
||||
"""
|
||||
Ensure each msgctxt within the entries has the given prefix.
|
||||
"""
|
||||
updated_entries = {}
|
||||
module_name = extract_module_name(file_path)
|
||||
for key, value in entries.items():
|
||||
if not key.startswith(prefix):
|
||||
updated_key = f"{prefix}{key}"
|
||||
print(f"Adding prefix: {key} -> {updated_key} ({module_name})")
|
||||
updated_entries[updated_key] = value
|
||||
else:
|
||||
updated_entries[key] = value
|
||||
return updated_entries
|
||||
|
||||
def convert_lex_to_po(lex_file, po_file, en_entries, general_en_entries):
|
||||
translated_entries = parse_lex_file(lex_file)
|
||||
language_code = extract_language_from_filename(po_file.name)
|
||||
|
||||
# Infer prefix from original en_entries and ensure all entries have this prefix
|
||||
prefix = infer_prefix(en_entries, Path(lex_file))
|
||||
en_entries = ensure_prefix(en_entries, prefix,lex_file)
|
||||
translated_entries = ensure_prefix(translated_entries, prefix,lex_file)
|
||||
|
||||
po = polib.POFile()
|
||||
po.metadata = {
|
||||
'Project-Id-Version': '1.0',
|
||||
'Language': language_code,
|
||||
'Content-Type': 'text/plain; charset=utf-8',
|
||||
}
|
||||
|
||||
new_entries = []
|
||||
|
||||
for msgctxt, msgstr in translated_entries.items():
|
||||
msgid = en_entries.get(msgctxt, "") # Find the original text using msgctxt (Msg ID)
|
||||
if not msgid:
|
||||
print(f"Warning: Could not find original text for Msg ID '{msgctxt}'")
|
||||
# See if in General
|
||||
msgid = general_en_entries.get(msgctxt, "")
|
||||
if not msgid:
|
||||
msgid = "Placeholder for missing original text"
|
||||
new_entries.append((msgctxt, msgid))
|
||||
else:
|
||||
print(f"Found {msgctxt} => {msgid} in general")
|
||||
entry = polib.POEntry(
|
||||
msgctxt=msgctxt,
|
||||
msgid=msgid,
|
||||
msgstr=msgstr
|
||||
)
|
||||
po.append(entry)
|
||||
|
||||
# Update en_entries with new entries
|
||||
for msgctxt, msgid in new_entries:
|
||||
en_entries[msgctxt] = msgid
|
||||
|
||||
po.save(po_file)
|
||||
|
||||
def process_directory_with_en_mapping(source_directory, master_directory):
|
||||
# First pick up the general ones that may come in any of the files
|
||||
print("loading General directory")
|
||||
# Load it up and use it to check for ones without prefix
|
||||
general_en_file = master_directory /"General/general_en.lex"
|
||||
general_en_entries = parse_lex_file(general_en_file)
|
||||
if len(general_en_entries)>0:
|
||||
print(f"Found {len(general_en_entries)} entries in General lex file")
|
||||
else:
|
||||
quit()
|
||||
for subdir, _, files in os.walk(source_directory):
|
||||
subdir_path = Path(subdir)
|
||||
print(subdir_path)
|
||||
|
||||
# Skip if subdir_path is exactly the source_directory or contains 'pofiles'
|
||||
if subdir_path == source_directory or 'pofiles' in subdir_path.parts:
|
||||
continue
|
||||
|
||||
# Extract the subservient directory name and ensure it's capitalized
|
||||
try:
|
||||
subservient_dir_name = subdir_path.relative_to(source_directory).parts[0].capitalize()
|
||||
except IndexError:
|
||||
print(f"Skipping directory {subdir_path} as it has no subservient directory.")
|
||||
continue
|
||||
|
||||
|
||||
# Apply it to find the corresponding en directory
|
||||
corresponding_en_dir = master_directory / subservient_dir_name
|
||||
|
||||
#Skip the General directory as it has not prefix's
|
||||
if subservient_dir_name == "General":
|
||||
# and skip to next
|
||||
continue
|
||||
|
||||
# Find corresponding `*`_en.lex` files
|
||||
en_entries = {}
|
||||
en_file = corresponding_en_dir / f"{subservient_dir_name.lower()}_en.lex"
|
||||
if en_file.is_file():
|
||||
en_entries = parse_lex_file(en_file)
|
||||
else:
|
||||
print(f"Warning: No *_en.lex file found in {corresponding_en_dir}")
|
||||
|
||||
pofiles_dir = subdir_path / "pofiles"
|
||||
if not pofiles_dir.exists():
|
||||
pofiles_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for file in files:
|
||||
if file.endswith('.lex') and not file.endswith('_en.lex'):
|
||||
lex_file = subdir_path / file
|
||||
po_file = pofiles_dir / lex_file.with_suffix('.po').name
|
||||
#po_file = po_file.lower().replace("-", "_")
|
||||
|
||||
print(f"Converting {lex_file} to {po_file}")
|
||||
convert_lex_to_po(lex_file, po_file, en_entries,general_en_entries)
|
||||
|
||||
# Save the updated en_entries to the en.lex file in the locale directory
|
||||
if en_entries:
|
||||
locale_en_file = subdir_path / f"{subservient_dir_name.lower()}_en.lex"
|
||||
if en_file.exists():
|
||||
shutil.copy(en_file, locale_en_file) # Backup the original en.lex file to the locale path
|
||||
|
||||
save_lex_file(en_entries, locale_en_file)
|
||||
print(f"Updated and saved {locale_en_file}")
|
||||
|
||||
# Create and save the po file in the pofiles directory
|
||||
updated_po_file = pofiles_dir / f"{subservient_dir_name.lower()}_en.po"
|
||||
create_po_from_en_entries(en_entries, updated_po_file)
|
||||
|
||||
def create_po_from_en_entries(entries, po_file):
|
||||
po = polib.POFile()
|
||||
po.metadata = {
|
||||
'Project-Id-Version': '1.0',
|
||||
'Language': 'en',
|
||||
'Content-Type': 'text/plain; charset=utf-8',
|
||||
}
|
||||
|
||||
for msgctxt, msgid in entries.items():
|
||||
entry = polib.POEntry(
|
||||
msgctxt=msgctxt,
|
||||
msgid=msgid,
|
||||
msgstr=""
|
||||
)
|
||||
po.append(entry)
|
||||
|
||||
po.save(po_file)
|
||||
print(f"Created {po_file}")
|
||||
|
||||
def main():
|
||||
# Convert all .lex files under smeserver-manager-locale into respective pofiles directories using en-mapping
|
||||
print("Processing locale directory...")
|
||||
process_directory_with_en_mapping(SOURCE_LOCALE_DIR, SOURCE_MANAGER_DIR)
|
||||
|
||||
print("Conversion complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user