diff --git a/lex_scan.py b/lex_scan.py index c5df676..51fdc81 100644 --- a/lex_scan.py +++ b/lex_scan.py @@ -6,17 +6,21 @@ import sys import json # Configure logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") -SYSTEM_BASE_PATH = "/usr/share/smanager/" +missing_files = [] def validate_panel_name(panel_name): if not panel_name[0].isupper(): logging.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.") sys.exit(1) +def get_full_base_path(system): + return os.path.expanduser(os.path.join("~", system, "usr", "share", "smanager")) + def check_controller_file_exists(system, panel): - controller_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/Controller", f"{panel}.pm") + full_base_path = get_full_base_path(system) + controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm") if not os.path.exists(controller_path): logging.error(f"Error: Controller file \'{controller_path}\' does not exist.") sys.exit(1) @@ -26,28 +30,29 @@ def extract_title_prefix(controller_path): prefix = None with open(controller_path, 'r') as f: content = f.read() - # Corrected regex: match either " or \' for the string enclosure - match = re.search(r"my \$title = \$c->l\([\"|\"]([A-Za-z]{2,4})_.*?\)", content) + match = re.search(r"\$c->l\([\"|\"]([A-Za-z]{2,4})_.*?\)", content) if match: prefix = match.group(1) logging.info(f"Extracted prefix: {prefix}") else: - logging.error(f"Error: Could not find title prefix in \'{controller_path}\'. Expected format: my $title = $c->l(\"_something\") or my $title = $c->l(\\'_something\\')") + logging.error(f"Error: Could not find title prefix in \'{controller_path}\'. Expected format: my $title = $c->l(\"_something\") or my $title = $c->l(\\\\\\\\'_something\\\")") sys.exit(1) return prefix def scan_application_files(system, panel, prefix, scan_general=False): extracted_strings = {} + full_base_path = get_full_base_path(system) + # Controller file - controller_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/Controller", f"{panel}.pm") + controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm") logging.info(f"Scanning controller file: {controller_path}") scan_file_for_lexical_strings(controller_path, prefix, extracted_strings, scan_general) # Template files themes = ["default", "AdminLTE"] for theme in themes: - template_base_path = os.path.join(SYSTEM_BASE_PATH, "themes", theme, "templates") + template_base_path = os.path.join(full_base_path, "themes", theme, "templates") panel_template_path = os.path.join(template_base_path, f"{panel.lower()}.html.ep") logging.info(f"Scanning panel template file: {panel_template_path}") scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general) @@ -56,7 +61,8 @@ def scan_application_files(system, panel, prefix, scan_general=False): partials_dir = os.path.join(template_base_path, "partials") if os.path.exists(partials_dir): for filename in os.listdir(partials_dir): - if filename.endswith(".html.ep"): + # Only scan partial files that match the pattern __.html.ep + if filename.startswith(f"_{prefix.lower()}_") and filename.endswith(".html.ep"): partial_path = os.path.join(partials_dir, filename) logging.info(f"Scanning partial template file: {partial_path}") scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general) @@ -65,13 +71,12 @@ def scan_application_files(system, panel, prefix, scan_general=False): def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan_general): if not os.path.exists(filepath): - logging.warning(f"Warning: File not found: {filepath}") + missing_files.append(filepath) return with open(filepath, 'r') as f: content = f.read() if scan_general: - # Regex for general strings: l[\s|(][\"|"](.*)[\"|"]\) pattern = re.compile(r"l[\s|(][\"|\"](.*?)[\"|\"]\)") found_strings = pattern.findall(content) for s in found_strings: @@ -81,7 +86,6 @@ def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan if filepath not in extracted_strings_dict[s]: extracted_strings_dict[s].append(filepath) else: - # Regex to find _ pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+") found_strings = pattern.findall(content) for s in found_strings: @@ -91,27 +95,33 @@ def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan extracted_strings_dict[s].append(filepath) def read_lex_file(filepath): + logging.info(f"Reading file: {filepath}") lex_data = {} if not os.path.exists(filepath): - logging.warning(f"Lex file not found: {filepath}. Returning empty dictionary.") + logging.warning(f"File not found: {filepath}") + missing_files.append(filepath) return lex_data - with open(filepath, 'r') as f: - for line in f: - match = re.match(r"'(.*?)' => '(.*)'", line.strip()) - if match: - key, value = match.groups() - lex_data[key] = value + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + # This regex finds all 'key' => 'value' pairs, even if separated by commas or newlines + pattern = r"'(.*?)'\s*=>\s*'(.*?)(? '{value}'\n") + f.write(f"\'{key}\' => \'{value}\',{os.linesep}") def read_languages_json(filepath): if not os.path.exists(filepath): - logging.error(f"Error: languages.json file not found at {filepath}") - sys.exit(1) + missing_files.append(filepath) + return [] # Return empty list instead of exiting with open(filepath, 'r') as f: languages = json.load(f) return languages @@ -128,161 +138,180 @@ def update_file_with_new_lexical_string(filepath, old_string, new_string): logging.error(f"Error updating file {filepath}: {e}") def main(): - parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.") - parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).") - parser.add_argument("-s", "--system", default="SME11", help="System name (default: SME11).") - - args = parser.parse_args() + parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.") + parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).") + parser.add_argument("-s", "--system", default="SME11", help="System name (default: SME11).") + parser.add_argument("-e", "--edit", action="store_true", help="Enable editing of original files (default: False).") + parser.add_argument("-l", "--lang", action="store_true", help="Enable other language processing (default: False).") - panel = args.panel - system = args.system + args = parser.parse_args() - logging.info(f"Starting scan for panel: {panel}, system: {system}") + panel = args.panel + system = args.system + edit_files = args.edit + do_lang = args.lang - validate_panel_name(panel) - controller_path = check_controller_file_exists(system, panel) - prefix = extract_title_prefix(controller_path) + logging.info(f"Starting scan for panel: {panel}, system: {system} edit: {edit_files} lang: {do_lang}") + #quit(0) - if prefix: - logging.info(f"Scanning application files for strings with prefix \'{prefix}\'...") - extracted_panel_strings = scan_application_files(system, panel, prefix) - logging.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.") + validate_panel_name(panel) + controller_path = check_controller_file_exists(system, panel) + prefix = extract_title_prefix(controller_path) - # Process panel-specific English lexical file - # Output to current working directory - panel_lex_output_dir = os.path.join(os.getcwd(), "output", panel.capitalize()) - os.makedirs(panel_lex_output_dir, exist_ok=True) - - en_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", panel.capitalize(), f"{panel.lower()}_en.lex") - en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new") + if prefix: + logging.info(f"Scanning application files for strings with prefix \'{prefix}\'...") + extracted_panel_strings = scan_application_files(system, panel, prefix) + logging.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.") - en_lex_data = read_lex_file(en_lex_path) - new_en_lex_data = {} + # Process panel-specific English lexical file + # Output to current working directory + panel_lex_output_dir = os.path.join(os.getcwd(), "output", panel.capitalize()) + os.makedirs(panel_lex_output_dir, exist_ok=True) + + full_base_path = get_full_base_path(system) - for lex_string in extracted_panel_strings.keys(): - if lex_string in en_lex_data: - new_en_lex_data[lex_string] = en_lex_data[lex_string] - else: - # Convert _ to with underlines mapped to spaces - sometext = lex_string.replace(f"{prefix}_", "").replace("_", " ") - new_en_lex_data[lex_string] = sometext - - write_lex_file(en_lex_new_path, new_en_lex_data) - logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}") + # Corrected capitalization for panel in path + en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_en.lex") + en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new") - # Read languages.json (assuming it\'s in a known path, e.g., /usr/share/smanager/Templates/languages.json) - languages_json_path = os.path.join(SYSTEM_BASE_PATH, "Templates", "languages.json") # Placeholder path - try: - languages = read_languages_json(languages_json_path) - except SystemExit: - logging.warning(f"Could not read languages.json from {languages_json_path}. Skipping language-specific lexical file processing.") - languages = [] # Set to empty list to skip the loop + en_lex_data = read_lex_file(en_lex_path) + logging.info(f"Original English lex file lines: {len(en_lex_data)}") + new_en_lex_data = {} - # Process panel-specific other language lexical files - for lang_entry in languages: - lang_code = lang_entry["code"] - if lang_code == "en": # Skip English, already processed - continue - - lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", panel.capitalize(), f"{panel.lower()}_{lang_code}.lex") - lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new") + for lex_string in extracted_panel_strings.keys(): + if lex_string in en_lex_data: + new_en_lex_data[lex_string] = en_lex_data[lex_string] + else: + #Replace rhs by the lhs less the prefix and no underlines, in lowercase (but capitalised) + # this may make a reasonable message, derived from the lex string id. + sometext = lex_string.replace(f"{prefix}_", "").replace("_", " ") + # Split into words + words = sometext.split() + # Lowercase all words, capitalize the first + if words: + words = [words[0].capitalize()] + [w.lower() for w in words[1:]] + sometext = ' '.join(words) + new_en_lex_data[lex_string] = sometext + + write_lex_file(en_lex_new_path, new_en_lex_data) + logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}") - lang_lex_data = read_lex_file(lang_lex_path) - new_lang_lex_data = {} + if do_lang: + languages_json_path = os.path.join(".", "Templates", "languages.json") # Corrected path + languages = read_languages_json(languages_json_path) - for lex_string in extracted_panel_strings.keys(): - if lex_string in lang_lex_data: - new_lang_lex_data[lex_string] = lang_lex_data[lex_string] - else: - sometext_from_en = new_en_lex_data.get(lex_string, "") - new_lang_lex_data[lex_string] = sometext_from_en - - write_lex_file(lang_lex_new_path, new_lang_lex_data) - logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}") + for lang_entry in languages: + lang_code = lang_entry["code"] + if lang_code == "en": + continue + + lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_{lang_code}.lex") + lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new") - # Scan for general lexical strings - logging.info("Scanning application files for general lexical strings...") - extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True) - logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.") + lang_lex_data = read_lex_file(lang_lex_path) + logging.info(f"Original {lang_code} lex file lines: {len(lang_lex_data)}") + new_lang_lex_data = {} - general_lex_output_dir = os.path.join(os.getcwd(), "output", "general") - os.makedirs(general_lex_output_dir, exist_ok=True) + for lex_string in extracted_panel_strings.keys(): + if lex_string in lang_lex_data: + new_lang_lex_data[lex_string] = lang_lex_data[lex_string] + else: + sometext_from_en = new_en_lex_data.get(lex_string, "") + new_en_lex_data[lex_string] = sometext + new_lang_lex_data[lex_string] = sometext_from_en + + write_lex_file(lang_lex_new_path, new_lang_lex_data) + logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}") - general_en_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", "general_en.lex") - general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new") + logging.info("Scanning application files for general lexical strings...") + extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True) + logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.") - general_en_lex_data = read_lex_file(general_en_lex_path) - new_general_en_lex_data = {} + general_lex_output_dir = os.path.join(os.getcwd(), "output", "General") + os.makedirs(general_lex_output_dir, exist_ok=True) - for lex_string in extracted_general_strings.keys(): - if lex_string in general_en_lex_data: - new_general_en_lex_data[lex_string] = general_en_lex_data[lex_string] - else: - sometext = lex_string.replace("_", " ") - new_general_en_lex_data[lex_string] = sometext - - write_lex_file(general_en_lex_new_path, new_general_en_lex_data) - logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}") + general_en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", "general_en.lex") + general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new") - # Process general other language lexical files - for lang_entry in languages: - lang_code = lang_entry["code"] - if lang_code == "en": - continue - - general_lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", f"general_{lang_code}.lex") - general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new") + general_en_lex_data = read_lex_file(general_en_lex_path) + logging.info(f"Original general English lex file lines: {len(general_en_lex_data)}") + new_general_en_lex_data = {} - general_lang_lex_data = read_lex_file(general_lang_lex_path) - new_general_lang_lex_data = {} + for lex_string in extracted_general_strings.keys(): + if lex_string in general_en_lex_data: + new_general_en_lex_data[lex_string] = general_en_lex_data[lex_string] + else: + sometext = lex_string.replace("_", " ") + # Split into words + words = sometext.split() + # Lowercase all words, capitalize the first + if words: + words = [words[0].capitalize()] + [w.lower() for w in words[1:]] + sometext = ' '.join(words) + new_general_en_lex_data[lex_string] = sometext + write_lex_file(general_en_lex_new_path, new_general_en_lex_data) + logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}") - for lex_string in extracted_general_strings.keys(): - if lex_string in general_lang_lex_data: - new_general_lang_lex_data[lex_string] = general_lang_lex_data[lex_string] - else: - sometext_from_en = new_general_en_lex_data.get(lex_string, "") - new_general_lang_lex_data[lex_string] = sometext_from_en - - write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data) - logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}") + if do_lang: + for lang_entry in languages: + lang_code = lang_entry["code"] + if lang_code == "en": + continue + + general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex") + general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new") - # Handle single-word lexical strings - logging.info("Handling single-word lexical strings...") - for lex_string, filepaths in extracted_panel_strings.items(): - if lex_string.startswith(f"{prefix}_"): - sometext_part = lex_string[len(prefix) + 1:] - if "_" not in sometext_part: # It\'s a single word after prefix - just_one_word = sometext_part - - # Check in general_en.lex.new - if just_one_word not in new_general_en_lex_data: - new_general_en_lex_data[just_one_word] = just_one_word - logging.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}") - write_lex_file(general_en_lex_new_path, new_general_en_lex_data) + general_lang_lex_data = read_lex_file(general_lang_lex_path) + logging.info(f"Original general {lang_code} lex file lines: {len(general_lang_lex_data)}") + new_general_lang_lex_data = {} - # Update other general language files - for lang_entry in languages: - lang_code = lang_entry["code"] - if lang_code == "en": - continue - general_lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", f"general_{lang_code}.lex") - general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new") - - current_general_lang_lex_data = read_lex_file(general_lang_lex_new_path) # Read the .new file - if just_one_word not in current_general_lang_lex_data: - current_general_lang_lex_data[just_one_word] = just_one_word # Assuming same value for now - write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data) - logging.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}") + for lex_string in extracted_general_strings.keys(): + if lex_string in general_lang_lex_data: + new_general_lang_lex_data[lex_string] = general_lang_lex_data[lex_string] + else: + sometext_from_en = new_general_en_lex_data.get(lex_string, "") + new_general_lang_lex_data[lex_string] = sometext_from_en + + write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data) + logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}") - # Edit original files - for filepath in filepaths: - update_file_with_new_lexical_string(filepath, lex_string, just_one_word) + if edit_files: + logging.info("Handling single-word lexical strings...") + for lex_string, filepaths in extracted_panel_strings.items(): + if lex_string.startswith(f"{prefix}_"): + sometext_part = lex_string[len(prefix) + 1:] + if "_" not in sometext_part: + just_one_word = sometext_part + + if just_one_word not in new_general_en_lex_data: + new_general_en_lex_data[just_one_word] = just_one_word + logging.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}") + write_lex_file(general_en_lex_new_path, new_general_en_lex_data) - else: - logging.error("Could not determine prefix, exiting.") - sys.exit(1) + for lang_entry in languages: + lang_code = lang_entry["code"] + if lang_code == "en": + continue + general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex") + general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new") + + current_general_lang_lex_data = read_lex_file(general_lang_lex_new_path) + if just_one_word not in current_general_lang_lex_data: + current_general_lang_lex_data[just_one_word] = just_one_word + write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data) + logging.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}") + + for filepath in filepaths: + update_file_with_new_lexical_string(filepath, lex_string, just_one_word) + + else: + logging.error("Could not determine prefix, exiting.") + sys.exit(1) + + if missing_files: + logging.warning("The following files were not found:") + for f in missing_files: + logging.warning(f"- {f}") if __name__ == "__main__": - main() - - + main() \ No newline at end of file