Lex2Po/Check_Translation.py

import os
import argparse
import re
import json  # Import the json module

do_not_need_translating = ["AM", "PM", "AM/PM", "nfs", "cifs"]


def infer_prefix(entries, filename, IsGeneral=False):
    """
    Infer the <abc> prefix from entries that contain it, fallback to the first 3 characters of the filename.
    """
    if IsGeneral:
        return ""  # No prefix for General items
    for key in entries.keys():
        match = re.match(r"(.*?_)?.*", key)
        if match:
            prefix = match.group(1)
            if prefix:
                print(f"Found prefix:{prefix} {filename}")
                return prefix
    # If no prefix is found, return the first 3 characters of the filename
    return filename.stem[:3].lower() + "_"


def find_lex_files(modulename):
    base_locale_path = f"/home/brianr//Documents/smeserver-manager-locale/root/usr/share/smanager/lib/SrvMngr/I18N/Modules/{modulename}/"
    base_en_path = f"/home/brianr//Documents/smeserver-manager/root/usr/share/smanager/lib/SrvMngr/I18N/Modules/{modulename}/"

    # Get all .lex files in the specified directory
    lex_files = [f for f in os.listdir(base_locale_path) if f.endswith(".lex")]
    # Find the corresponding _en.lex file
    en_file = next((f for f in os.listdir(base_en_path) if f.endswith("_en.lex")), None)

    # Assuming the _en.lex file exists
    if en_file is None:
        print(f"No _en.lex file found for module: {modulename}")
        return

    # Read translation pairs from the _en file
    translations = {}
    with open(os.path.join(base_en_path, en_file), "r", encoding="utf-8") as en_f:
        for line in en_f:
            if "=>" in line:
                key, message = line.split("=>", 1)
                translations[key.strip().strip("'")] = (
                    message.strip().strip(",").strip("'")
                )

    module_prefix = infer_prefix(translations, en_file)

    # Dictionary to accumulate missing translations
    missing_translations = {}
    key_not_in_en = {}

    # Compare against each lex file in the locale directory
    for lex_file in lex_files:
        # Skip any english lang file in there (there might be one)
        if lex_file.endswith("_en.lex"):
            continue
        with open(
            os.path.join(base_locale_path, lex_file), "r", encoding="utf-8"
        ) as loc_f:
            print(f"File:{lex_file}")
            language_code = lex_file.split(".")[0][
                -2:
            ]  # Extracting language code from filename

            # Ensure the language code entry in the dictionary
            if language_code not in missing_translations:
                missing_translations[
                    language_code
                ] = []  # Initialize an empty list for this language code
            if language_code not in key_not_in_en:
                key_not_in_en[
                    language_code
                ] = []  # Initialize an empty list for this language code

            for line in loc_f:
                if "=>" in line:
                    key, message = line.split("=>", 1)
                    key = key.strip().strip("'")
                    message = message.strip().strip(",").strip("'")

                    # Remove the module prefix to check against do_not_need_translating
                    stripped_key = key[
                        len(module_prefix) :
                    ]  # Remove the prefix and the underscore

                    # Checking for missing translations
                    if (
                        stripped_key not in do_not_need_translating
                    ):  # Check without prefix
                        if key in translations:
                            if message == translations[key] or message == "":
                                # print(f"Missing translation in module '{modulename}', ({language_code})', key '{key}'")
                                missing_translations[language_code].append(key)
                        else:
                            # print(f"Key '{key}' not found in english lex for module '{modulename} ({language_code})'.")
                            key_not_in_en[language_code].append(key)

    for lang_code, keys in missing_translations.items():
        if keys:
            print(
                f"Missing translations for module '{modulename}', ({lang_code}) - #{len(keys)}:"
            )
            # print(keys)

    for lang_code, keys in key_not_in_en.items():
        if keys:
            print(
                f"Keys not found in english lex for module '{modulename} ({lang_code}) - #{len(keys)}'."
            )
            print(keys)

    # Remove keys that are not found in English lex from translation files
    for lex_file in lex_files:
        if lex_file.endswith("_en.lex"):
            continue
        print(f"File:{lex_file}")
        language_code = lex_file.split(".")[0][
            -2:
        ]  # Extracting language code from filename
        all_lines = []
        with open(
            os.path.join(base_locale_path, lex_file), "r", encoding="utf-8"
        ) as loc_f:
            all_lines = loc_f.readlines()

        # Filter out missing keys based on the key_not_in_en dictionary
        deleted = 0
        with open(
            os.path.join(base_locale_path, lex_file), "w", encoding="utf-8"
        ) as loc_f:
            for line in all_lines:
                if "=>" in line:
                    key, message = line.split("=>", 1)
                    key = key.strip().strip("'")  # Get the key without quotes
                    # Only write back lines that are not in the key_not_in_en list
                    if key not in key_not_in_en.get(language_code, []):
                        loc_f.write(line)
                    else:
                        deleted += 1
        print(f"Updated {lex_file}, removed {deleted} keys.")
    # Write the missing_translations dictionary to a JSON file
    output_filename = f"missing_translations_{modulename}.json"
    with open(output_filename, "w", encoding="utf-8") as json_file:
        json.dump(missing_translations, json_file, ensure_ascii=False, indent=4)
    print(f"Wrote missing translations to {output_filename}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Check for missing translations in lex files."
    )
    parser.add_argument(
        "modulename", help="The name of the module to check translations for."
    )

    args = parser.parse_args()
    find_lex_files(args.modulename)