Initial upload of python program file
This commit is contained in:
parent
27f13abc7f
commit
c6a4e9bc3f
218
trylex2po.py
Normal file
218
trylex2po.py
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
import os
|
||||||
|
import polib
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
# Define paths
|
||||||
|
SOURCE_LOCALE_DIR = Path("/home/brianr/Documents/smeserver-manager-locale/root/usr/share/smanager/lib/SrvMngr/I18N/Modules")
|
||||||
|
SOURCE_MANAGER_DIR = Path("/home/brianr/Documents/smeserver-manager/root/usr/share/smanager/lib/SrvMngr/I18N/Modules")
|
||||||
|
|
||||||
|
def extract_language_from_filename(filename):
|
||||||
|
match = re.search(r'[_-]([a-z]{2}(?:-[a-z]{2})?)\.lex', filename, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
return match.group(1).replace("-", "_").lower()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def parse_lex_file(lex_file):
|
||||||
|
entries = {}
|
||||||
|
with open(lex_file, 'r', encoding='utf-8') as file:
|
||||||
|
content = file.read()
|
||||||
|
# Use regex to find all key-value pairs, handling quotes around keys optionally
|
||||||
|
matches = re.finditer(r"""
|
||||||
|
(['"]?) # Optional opening quote for the key
|
||||||
|
([^'" \t]+?) # The key itself (excluding quotes and whitespace)
|
||||||
|
\1 # Optional closing quote matching the opening quote
|
||||||
|
\s*=>\s* # The delimiter
|
||||||
|
' # Opening quote for the value
|
||||||
|
(.*?) # The value (non-greedy to capture minimal text)
|
||||||
|
' # Closing quote for the value
|
||||||
|
\s*,\s* # Trailing comma and optional whitespace
|
||||||
|
""", content, re.DOTALL | re.VERBOSE)
|
||||||
|
#matches = re.findall(r"['\"]?([^'\"]+)['\"]?\s*=>\s*['\"](.*?)['\"]\s*,", content, re.DOTALL)
|
||||||
|
for match in matches:
|
||||||
|
key, value = match.group(2).strip(), match.group(3).strip()
|
||||||
|
entries[key] = value.replace("\\n", "\n")
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def save_lex_file(entries, lex_file):
|
||||||
|
with open(lex_file, 'w', encoding='utf-8') as file:
|
||||||
|
for key, value in entries.items():
|
||||||
|
escaped_value = value.replace("'", "\\'") # Escape single quotes
|
||||||
|
file.write(f"'{key}' => '{escaped_value}',\n")
|
||||||
|
|
||||||
|
def infer_prefix(entries, filename):
|
||||||
|
"""
|
||||||
|
Infer the <abc> prefix from entries that contain it, fallback to the first 3 characters of the filename.
|
||||||
|
"""
|
||||||
|
for key in entries.keys():
|
||||||
|
match = re.match(r"(.*?_)?.*", key)
|
||||||
|
if match:
|
||||||
|
prefix = match.group(1)
|
||||||
|
if prefix:
|
||||||
|
print(f"Found prefix:{prefix} {filename}")
|
||||||
|
return prefix
|
||||||
|
# If no prefix is found, return the first 3 characters of the filename
|
||||||
|
return filename.stem[:3].lower() + "_"
|
||||||
|
|
||||||
|
def extract_module_name(file_path):
|
||||||
|
# Convert the file path to a Path object
|
||||||
|
path = Path(file_path)
|
||||||
|
# Extract the module directory name
|
||||||
|
module_name = path.parts[-2] # Get the second-to-last part in the path
|
||||||
|
# Capitalize the first letter
|
||||||
|
return module_name.capitalize()
|
||||||
|
|
||||||
|
def ensure_prefix(entries, prefix,file_path):
|
||||||
|
"""
|
||||||
|
Ensure each msgctxt within the entries has the given prefix.
|
||||||
|
"""
|
||||||
|
updated_entries = {}
|
||||||
|
module_name = extract_module_name(file_path)
|
||||||
|
for key, value in entries.items():
|
||||||
|
if not key.startswith(prefix):
|
||||||
|
updated_key = f"{prefix}{key}"
|
||||||
|
print(f"Adding prefix: {key} -> {updated_key} ({module_name})")
|
||||||
|
updated_entries[updated_key] = value
|
||||||
|
else:
|
||||||
|
updated_entries[key] = value
|
||||||
|
return updated_entries
|
||||||
|
|
||||||
|
def convert_lex_to_po(lex_file, po_file, en_entries, general_en_entries):
|
||||||
|
translated_entries = parse_lex_file(lex_file)
|
||||||
|
language_code = extract_language_from_filename(po_file.name)
|
||||||
|
|
||||||
|
# Infer prefix from original en_entries and ensure all entries have this prefix
|
||||||
|
prefix = infer_prefix(en_entries, Path(lex_file))
|
||||||
|
en_entries = ensure_prefix(en_entries, prefix,lex_file)
|
||||||
|
translated_entries = ensure_prefix(translated_entries, prefix,lex_file)
|
||||||
|
|
||||||
|
po = polib.POFile()
|
||||||
|
po.metadata = {
|
||||||
|
'Project-Id-Version': '1.0',
|
||||||
|
'Language': language_code,
|
||||||
|
'Content-Type': 'text/plain; charset=utf-8',
|
||||||
|
}
|
||||||
|
|
||||||
|
new_entries = []
|
||||||
|
|
||||||
|
for msgctxt, msgstr in translated_entries.items():
|
||||||
|
msgid = en_entries.get(msgctxt, "") # Find the original text using msgctxt (Msg ID)
|
||||||
|
if not msgid:
|
||||||
|
print(f"Warning: Could not find original text for Msg ID '{msgctxt}'")
|
||||||
|
# See if in General
|
||||||
|
msgid = general_en_entries.get(msgctxt, "")
|
||||||
|
if not msgid:
|
||||||
|
msgid = "Placeholder for missing original text"
|
||||||
|
new_entries.append((msgctxt, msgid))
|
||||||
|
else:
|
||||||
|
print(f"Found {msgctxt} => {msgid} in general")
|
||||||
|
entry = polib.POEntry(
|
||||||
|
msgctxt=msgctxt,
|
||||||
|
msgid=msgid,
|
||||||
|
msgstr=msgstr
|
||||||
|
)
|
||||||
|
po.append(entry)
|
||||||
|
|
||||||
|
# Update en_entries with new entries
|
||||||
|
for msgctxt, msgid in new_entries:
|
||||||
|
en_entries[msgctxt] = msgid
|
||||||
|
|
||||||
|
po.save(po_file)
|
||||||
|
|
||||||
|
def process_directory_with_en_mapping(source_directory, master_directory):
|
||||||
|
# First pick up the general ones that may come in any of the files
|
||||||
|
print("loading General directory")
|
||||||
|
# Load it up and use it to check for ones without prefix
|
||||||
|
general_en_file = master_directory /"General/general_en.lex"
|
||||||
|
general_en_entries = parse_lex_file(general_en_file)
|
||||||
|
if len(general_en_entries)>0:
|
||||||
|
print(f"Found {len(general_en_entries)} entries in General lex file")
|
||||||
|
else:
|
||||||
|
quit()
|
||||||
|
for subdir, _, files in os.walk(source_directory):
|
||||||
|
subdir_path = Path(subdir)
|
||||||
|
print(subdir_path)
|
||||||
|
|
||||||
|
# Skip if subdir_path is exactly the source_directory or contains 'pofiles'
|
||||||
|
if subdir_path == source_directory or 'pofiles' in subdir_path.parts:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract the subservient directory name and ensure it's capitalized
|
||||||
|
try:
|
||||||
|
subservient_dir_name = subdir_path.relative_to(source_directory).parts[0].capitalize()
|
||||||
|
except IndexError:
|
||||||
|
print(f"Skipping directory {subdir_path} as it has no subservient directory.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
# Apply it to find the corresponding en directory
|
||||||
|
corresponding_en_dir = master_directory / subservient_dir_name
|
||||||
|
|
||||||
|
#Skip the General directory as it has not prefix's
|
||||||
|
if subservient_dir_name == "General":
|
||||||
|
# and skip to next
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Find corresponding `*`_en.lex` files
|
||||||
|
en_entries = {}
|
||||||
|
en_file = corresponding_en_dir / f"{subservient_dir_name.lower()}_en.lex"
|
||||||
|
if en_file.is_file():
|
||||||
|
en_entries = parse_lex_file(en_file)
|
||||||
|
else:
|
||||||
|
print(f"Warning: No *_en.lex file found in {corresponding_en_dir}")
|
||||||
|
|
||||||
|
pofiles_dir = subdir_path / "pofiles"
|
||||||
|
if not pofiles_dir.exists():
|
||||||
|
pofiles_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
if file.endswith('.lex') and not file.endswith('_en.lex'):
|
||||||
|
lex_file = subdir_path / file
|
||||||
|
po_file = pofiles_dir / lex_file.with_suffix('.po').name
|
||||||
|
#po_file = po_file.lower().replace("-", "_")
|
||||||
|
|
||||||
|
print(f"Converting {lex_file} to {po_file}")
|
||||||
|
convert_lex_to_po(lex_file, po_file, en_entries,general_en_entries)
|
||||||
|
|
||||||
|
# Save the updated en_entries to the en.lex file in the locale directory
|
||||||
|
if en_entries:
|
||||||
|
locale_en_file = subdir_path / f"{subservient_dir_name.lower()}_en.lex"
|
||||||
|
if en_file.exists():
|
||||||
|
shutil.copy(en_file, locale_en_file) # Backup the original en.lex file to the locale path
|
||||||
|
|
||||||
|
save_lex_file(en_entries, locale_en_file)
|
||||||
|
print(f"Updated and saved {locale_en_file}")
|
||||||
|
|
||||||
|
# Create and save the po file in the pofiles directory
|
||||||
|
updated_po_file = pofiles_dir / f"{subservient_dir_name.lower()}_en.po"
|
||||||
|
create_po_from_en_entries(en_entries, updated_po_file)
|
||||||
|
|
||||||
|
def create_po_from_en_entries(entries, po_file):
|
||||||
|
po = polib.POFile()
|
||||||
|
po.metadata = {
|
||||||
|
'Project-Id-Version': '1.0',
|
||||||
|
'Language': 'en',
|
||||||
|
'Content-Type': 'text/plain; charset=utf-8',
|
||||||
|
}
|
||||||
|
|
||||||
|
for msgctxt, msgid in entries.items():
|
||||||
|
entry = polib.POEntry(
|
||||||
|
msgctxt=msgctxt,
|
||||||
|
msgid=msgid,
|
||||||
|
msgstr=""
|
||||||
|
)
|
||||||
|
po.append(entry)
|
||||||
|
|
||||||
|
po.save(po_file)
|
||||||
|
print(f"Created {po_file}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Convert all .lex files under smeserver-manager-locale into respective pofiles directories using en-mapping
|
||||||
|
print("Processing locale directory...")
|
||||||
|
process_directory_with_en_mapping(SOURCE_LOCALE_DIR, SOURCE_MANAGER_DIR)
|
||||||
|
|
||||||
|
print("Conversion complete.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user