diff --git a/trylex2po.py b/trylex2po.py new file mode 100644 index 0000000..cef8d45 --- /dev/null +++ b/trylex2po.py @@ -0,0 +1,218 @@ +import os +import polib +from pathlib import Path +import re +import shutil + +# Define paths +SOURCE_LOCALE_DIR = Path("/home/brianr/Documents/smeserver-manager-locale/root/usr/share/smanager/lib/SrvMngr/I18N/Modules") +SOURCE_MANAGER_DIR = Path("/home/brianr/Documents/smeserver-manager/root/usr/share/smanager/lib/SrvMngr/I18N/Modules") + +def extract_language_from_filename(filename): + match = re.search(r'[_-]([a-z]{2}(?:-[a-z]{2})?)\.lex', filename, re.IGNORECASE) + if match: + return match.group(1).replace("-", "_").lower() + return None + +def parse_lex_file(lex_file): + entries = {} + with open(lex_file, 'r', encoding='utf-8') as file: + content = file.read() + # Use regex to find all key-value pairs, handling quotes around keys optionally + matches = re.finditer(r""" + (['"]?) # Optional opening quote for the key + ([^'" \t]+?) # The key itself (excluding quotes and whitespace) + \1 # Optional closing quote matching the opening quote + \s*=>\s* # The delimiter + ' # Opening quote for the value + (.*?) # The value (non-greedy to capture minimal text) + ' # Closing quote for the value + \s*,\s* # Trailing comma and optional whitespace + """, content, re.DOTALL | re.VERBOSE) + #matches = re.findall(r"['\"]?([^'\"]+)['\"]?\s*=>\s*['\"](.*?)['\"]\s*,", content, re.DOTALL) + for match in matches: + key, value = match.group(2).strip(), match.group(3).strip() + entries[key] = value.replace("\\n", "\n") + return entries + +def save_lex_file(entries, lex_file): + with open(lex_file, 'w', encoding='utf-8') as file: + for key, value in entries.items(): + escaped_value = value.replace("'", "\\'") # Escape single quotes + file.write(f"'{key}' => '{escaped_value}',\n") + +def infer_prefix(entries, filename): + """ + Infer the prefix from entries that contain it, fallback to the first 3 characters of the filename. + """ + for key in entries.keys(): + match = re.match(r"(.*?_)?.*", key) + if match: + prefix = match.group(1) + if prefix: + print(f"Found prefix:{prefix} {filename}") + return prefix + # If no prefix is found, return the first 3 characters of the filename + return filename.stem[:3].lower() + "_" + +def extract_module_name(file_path): + # Convert the file path to a Path object + path = Path(file_path) + # Extract the module directory name + module_name = path.parts[-2] # Get the second-to-last part in the path + # Capitalize the first letter + return module_name.capitalize() + +def ensure_prefix(entries, prefix,file_path): + """ + Ensure each msgctxt within the entries has the given prefix. + """ + updated_entries = {} + module_name = extract_module_name(file_path) + for key, value in entries.items(): + if not key.startswith(prefix): + updated_key = f"{prefix}{key}" + print(f"Adding prefix: {key} -> {updated_key} ({module_name})") + updated_entries[updated_key] = value + else: + updated_entries[key] = value + return updated_entries + +def convert_lex_to_po(lex_file, po_file, en_entries, general_en_entries): + translated_entries = parse_lex_file(lex_file) + language_code = extract_language_from_filename(po_file.name) + + # Infer prefix from original en_entries and ensure all entries have this prefix + prefix = infer_prefix(en_entries, Path(lex_file)) + en_entries = ensure_prefix(en_entries, prefix,lex_file) + translated_entries = ensure_prefix(translated_entries, prefix,lex_file) + + po = polib.POFile() + po.metadata = { + 'Project-Id-Version': '1.0', + 'Language': language_code, + 'Content-Type': 'text/plain; charset=utf-8', + } + + new_entries = [] + + for msgctxt, msgstr in translated_entries.items(): + msgid = en_entries.get(msgctxt, "") # Find the original text using msgctxt (Msg ID) + if not msgid: + print(f"Warning: Could not find original text for Msg ID '{msgctxt}'") + # See if in General + msgid = general_en_entries.get(msgctxt, "") + if not msgid: + msgid = "Placeholder for missing original text" + new_entries.append((msgctxt, msgid)) + else: + print(f"Found {msgctxt} => {msgid} in general") + entry = polib.POEntry( + msgctxt=msgctxt, + msgid=msgid, + msgstr=msgstr + ) + po.append(entry) + + # Update en_entries with new entries + for msgctxt, msgid in new_entries: + en_entries[msgctxt] = msgid + + po.save(po_file) + +def process_directory_with_en_mapping(source_directory, master_directory): + # First pick up the general ones that may come in any of the files + print("loading General directory") + # Load it up and use it to check for ones without prefix + general_en_file = master_directory /"General/general_en.lex" + general_en_entries = parse_lex_file(general_en_file) + if len(general_en_entries)>0: + print(f"Found {len(general_en_entries)} entries in General lex file") + else: + quit() + for subdir, _, files in os.walk(source_directory): + subdir_path = Path(subdir) + print(subdir_path) + + # Skip if subdir_path is exactly the source_directory or contains 'pofiles' + if subdir_path == source_directory or 'pofiles' in subdir_path.parts: + continue + + # Extract the subservient directory name and ensure it's capitalized + try: + subservient_dir_name = subdir_path.relative_to(source_directory).parts[0].capitalize() + except IndexError: + print(f"Skipping directory {subdir_path} as it has no subservient directory.") + continue + + + # Apply it to find the corresponding en directory + corresponding_en_dir = master_directory / subservient_dir_name + + #Skip the General directory as it has not prefix's + if subservient_dir_name == "General": + # and skip to next + continue + + # Find corresponding `*`_en.lex` files + en_entries = {} + en_file = corresponding_en_dir / f"{subservient_dir_name.lower()}_en.lex" + if en_file.is_file(): + en_entries = parse_lex_file(en_file) + else: + print(f"Warning: No *_en.lex file found in {corresponding_en_dir}") + + pofiles_dir = subdir_path / "pofiles" + if not pofiles_dir.exists(): + pofiles_dir.mkdir(parents=True, exist_ok=True) + + for file in files: + if file.endswith('.lex') and not file.endswith('_en.lex'): + lex_file = subdir_path / file + po_file = pofiles_dir / lex_file.with_suffix('.po').name + #po_file = po_file.lower().replace("-", "_") + + print(f"Converting {lex_file} to {po_file}") + convert_lex_to_po(lex_file, po_file, en_entries,general_en_entries) + + # Save the updated en_entries to the en.lex file in the locale directory + if en_entries: + locale_en_file = subdir_path / f"{subservient_dir_name.lower()}_en.lex" + if en_file.exists(): + shutil.copy(en_file, locale_en_file) # Backup the original en.lex file to the locale path + + save_lex_file(en_entries, locale_en_file) + print(f"Updated and saved {locale_en_file}") + + # Create and save the po file in the pofiles directory + updated_po_file = pofiles_dir / f"{subservient_dir_name.lower()}_en.po" + create_po_from_en_entries(en_entries, updated_po_file) + +def create_po_from_en_entries(entries, po_file): + po = polib.POFile() + po.metadata = { + 'Project-Id-Version': '1.0', + 'Language': 'en', + 'Content-Type': 'text/plain; charset=utf-8', + } + + for msgctxt, msgid in entries.items(): + entry = polib.POEntry( + msgctxt=msgctxt, + msgid=msgid, + msgstr="" + ) + po.append(entry) + + po.save(po_file) + print(f"Created {po_file}") + +def main(): + # Convert all .lex files under smeserver-manager-locale into respective pofiles directories using en-mapping + print("Processing locale directory...") + process_directory_with_en_mapping(SOURCE_LOCALE_DIR, SOURCE_MANAGER_DIR) + + print("Conversion complete.") + +if __name__ == "__main__": + main()