import argparse import logging import os import re import sys import json # Configure logger #logger.basicConfig(level=logger.WARNING, format="%(asctime)s - %(levelname)s - %(message)s") # Create a custom logger logger = logging.getLogger("lex_scan_logger") logger.setLevel(logging.DEBUG) # Set to lowest level needed by any handler # Formatter for both handlers formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') # File handler (INFO and above) file_handler = logging.FileHandler('lex_scan.log') file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) # Console handler (WARNING and above) console_handler = logging.StreamHandler() console_handler.setLevel(logging.WARNING) console_handler.setFormatter(formatter) # Add handlers to the logger logger.addHandler(file_handler) logger.addHandler(console_handler) missing_files = [] def validate_panel_name(panel_name): if not panel_name[0].isupper(): logger.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.") sys.exit(1) def get_full_base_path(system): return os.path.expanduser(os.path.join("~", system, "usr", "share", "smanager")) def check_controller_file_exists(system, panel): full_base_path = get_full_base_path(system) controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm") if not os.path.exists(controller_path): logger.error(f"Error: Controller file \'{controller_path}\' does not exist.") sys.exit(1) return controller_path def extract_title_prefix(controller_path): prefix = None with open(controller_path, 'r') as f: content = f.read() # Regex: my $title = $c->l('_'); match = re.search( r"my\s*\$title\s*=\s*\$c->l\(\s*['\"]([A-Za-z]{2,10})_[^'\"]+['\"]\s*\)", content ) if match: prefix = match.group(1) logger.info(f"Extracted prefix: {prefix}") else: logger.error( f"Error: Could not find title prefix in '{controller_path}'.\n" "Expected format: my $title = $c->l('_something')" ) sys.exit(1) return prefix def find_matching_files_variable_part(input_string, directory): # Extract the first alphanumeric part from the input string match = re.match(r"([A-Za-z0-9]+)", input_string) if not match: return [] variable_part = match.group(1) # Try matching the full variable_part, then progressively remove last characters for length in range(len(variable_part), 1, -1): sub_part = variable_part[:length] matching_files = [] for fname in os.listdir(directory): name, ext = os.path.splitext(fname) if name.startswith(sub_part): # match with extra characters allowed matching_files.append(os.path.join(directory, fname)) if matching_files: return matching_files return [] def scan_application_files(system, panel, prefix, scan_general=False): extracted_strings = {} full_base_path = get_full_base_path(system) # Controller file controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm") logger.info(f"Scanning controller file: {controller_path}") scan_file_for_lexical_strings(controller_path, prefix, extracted_strings, scan_general) #Controller file custom code controller_custom_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}-Custom.pm") logger.info(f"Scanning Custom controller file: {controller_custom_path}") scan_file_for_lexical_strings(controller_custom_path, prefix, extracted_strings, scan_general) # Template files themes = ["default", "AdminLTE"] for theme in themes: template_base_path = os.path.join(full_base_path, "themes", theme, "templates") if panel in ['Backup','Yum','Bugreport']: #find the extra layout type files that these use (they do not have partials) template_files = find_matching_files_variable_part(panel.lower(),template_base_path) # print(f"Matching template files: {panel.lower()!r} -> Matches: {[os.path.basename(m) for m in template_files]}") for file_path in template_files: panel_template_path = os.path.join(template_base_path, f"{file_path}") logger.warning(f"Scanning panel template file: {panel_template_path}") scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general) else: panel_template_path = os.path.join(template_base_path, f"{panel.lower()}.html.ep") logger.info(f"Scanning panel template file: {panel_template_path}") scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general) # Scan partials partials_dir = os.path.join(template_base_path, "partials") if os.path.exists(partials_dir): for filename in os.listdir(partials_dir): # Only scan partial files that match the pattern __.html.ep if filename.startswith(f"_{prefix.lower()}_") and filename.endswith(".html.ep"): partial_path = os.path.join(partials_dir, filename) logger.info(f"Scanning partial template file: {partial_path}") scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general) # Deduplicate lists of dicts in extracted_strings for key, value in extracted_strings.items(): if isinstance(value, list) and value and isinstance(value[0], dict): # Deduplicate list of dicts using JSON serialization seen = set() deduped = [] for d in value: ser = json.dumps(d, sort_keys=True) if ser not in seen: seen.add(ser) deduped.append(d) extracted_strings[key] = deduped return extracted_strings def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan_general): if not os.path.exists(filepath): print(f"Missing file: {filepath}") return with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # Always scan for l '...' or l "..." # pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1") # found_l_calls = pattern_l_call.findall(content) # for quote, string in found_l_calls: # if string not in extracted_strings_dict: # extracted_strings_dict[string] = [] # if filepath not in extracted_strings_dict[string]: # extracted_strings_dict[string].append(filepath) # Either scan for strings with a prefix or ones without # and check for dis-allowed characters allowed_pattern = r"[A-Za-z0-9_\-/%:,()\. @]+" if scan_general: pattern = re.compile(r"l[\s|(][\"|\'](.*?)[\"|\']\)") found_strings1 = pattern.findall(content) pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1") found_l_calls = [match[1] for match in pattern_l_call.findall(content)] found_strings = found_strings1 + found_l_calls for s in found_strings: # Ignore strings that start with the prefix (with underscore) if s.startswith(f"{prefix}_"): continue s = s.replace(" ","_") #print(f"General:{s} ") if re.fullmatch(allowed_pattern, s): if s not in extracted_strings_dict: extracted_strings_dict[s] = [] if filepath not in extracted_strings_dict[s]: extracted_strings_dict[s].append(filepath) else: logger.error(f"Unexpected chars ({s}) found in {filepath}") continue else: pattern = re.compile( rf"(['\"])" # opening quote rf"({prefix}_" # prefix and underscore rf"(?:\\.|(?!\1).)*?)" # non-greedy: escaped char or any char not the closing quote rf"\1" # closing quote (same as opening) ) found_strings = [m.group(2) for m in pattern.finditer(content)] for s in found_strings: #print(f"Prefix: {s}") if re.fullmatch(allowed_pattern, s): if s not in extracted_strings_dict: extracted_strings_dict[s] = [] if filepath not in extracted_strings_dict[s]: extracted_strings_dict[s].append(filepath) else: logger.error(f"Unexpected chars ({s}) found in {filepath}") continue def read_lex_file(filepath): logger.info(f"Reading file: {filepath}") lex_data = {} with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # Improved regex: handles single/double quotes and escaped quotes in value pattern = r""" (['"])(.*?)\1 # key in quotes \s*=>\s* (['"])((?:\\.|(?!\3).)*)\3 # value in quotes, allowing escaped chars """ matches = re.findall(pattern, content, re.DOTALL | re.VERBOSE) for _, key, quote, value in matches: # Unescape the quote character and backslashes in value value = value.replace(f"\\{quote}", quote).replace("\\\\", "\\") lex_data[key] = value return lex_data def write_lex_file(filepath, lex_data): """ Writes a dictionary to a lex file, sorted alphabetically by key (case-insensitive). """ # Sort the dictionary by key, case-insensitive sorted_items = sorted(lex_data.items(), key=lambda item: item[0].lower()) with open(filepath, 'w', encoding='utf-8') as f: for key, value in sorted_items: value = value.replace("'",'"') f.write(f"'{key}' => '{value}',{os.linesep}") def read_languages_json(filepath): if not os.path.exists(filepath): missing_files.append(filepath) return [] # Return empty list instead of exiting with open(filepath, 'r') as f: languages = json.load(f) return languages def update_file_with_new_lexical_string(filepath, old_string, new_string): try: with open(filepath, 'r') as f: content = f.read() new_content = content.replace(old_string, new_string) with open(filepath, 'w') as f: f.write(new_content) #map any single quotes to double logger.info(f"Updated \'{old_string}\' to \'{new_string}\' in file: {filepath}") except Exception as e: logger.error(f"Error updating file {filepath}: {e}") def export_sorted_missing_lex(input_file1, input_file2, output_file): """ Reads two lex files, finds all entries in input_file1 missing from input_file2, sorts them alphabetically by key (case-insensitive), and writes them to output_file. """ dict1 = read_lex_file(input_file1) dict2 = read_lex_file(input_file2) # Find keys in input_file1 but not in input_file2 missing_keys = set(dict1.keys()) - set(dict2.keys()) sorted_missing_keys = sorted(missing_keys, key=lambda x: x.lower()) # Write missing, sorted lines to output_file with open(output_file, 'w', encoding='utf-8') as out: for k in sorted_missing_keys: out.write(f"'{k}' => '{dict1[k]}',\n") logger.info(f"Missing lines written to {output_file}:") #for k in sorted_missing_keys: # print(f"'{k}' => '{dict1[k]}',") def main(): parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.") parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).") parser.add_argument("-s", "--system", default="SME11", help="System name (default: SME11).") parser.add_argument("-e", "--edit", action="store_true", help="Enable editing of original files (default: False).") parser.add_argument("-l", "--lang", action="store_true", help="Enable other language processing (default: False).") args = parser.parse_args() panel = args.panel system = args.system edit_files = args.edit do_lang = args.lang logger.warning(f"Lex scan for panel: {panel}, system: {system} edit: {edit_files} lang: {do_lang}\n") validate_panel_name(panel) controller_path = check_controller_file_exists(system, panel) prefix = extract_title_prefix(controller_path) if prefix: logger.info(f"Scanning application files for strings with prefix \'{prefix}\'...") extracted_panel_strings = scan_application_files(system, panel, prefix) logger.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.") # Process panel-specific English lexical file # Output to current working directory panel_lex_output_dir = os.path.join(os.getcwd(), "output", panel.capitalize()) os.makedirs(panel_lex_output_dir, exist_ok=True) full_base_path = get_full_base_path(system) # Corrected capitalization for panel in path en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_en.lex.bak") en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new") en_lex_data = read_lex_file(en_lex_path) logger.info(f"Original English lex file lines: {len(en_lex_data)}") new_en_lex_data = {} for lex_string in extracted_panel_strings.keys(): if lex_string in en_lex_data: new_en_lex_data[lex_string] = en_lex_data[lex_string] else: #Replace rhs by the lhs less the prefix and no underlines, in lowercase (but capitalised) # this may make a reasonable message, derived from the lex string id. sometext = lex_string.replace(f"{prefix}_", "").replace("_", " ") # Split into words words = sometext.split() # Lowercase all words, capitalize the first if words: words = [words[0].capitalize()] + [w.lower() for w in words[1:]] sometext = ' '.join(words) new_en_lex_data[lex_string] = sometext write_lex_file(en_lex_new_path, new_en_lex_data) logger.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}") #Create file of the ones not in the new lex file output_diff_file = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.diff") export_sorted_missing_lex(en_lex_path, en_lex_new_path, output_diff_file) if do_lang: languages_json_path = os.path.join(".", "Templates", "languages.json") # Corrected path languages = read_languages_json(languages_json_path) for lang_entry in languages: lang_code = lang_entry["code"] if lang_code == "en": continue lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_{lang_code}.lex") lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new") lang_lex_data = read_lex_file(lang_lex_path) logger.info(f"Original {lang_code} lex file lines: {len(lang_lex_data)}") new_lang_lex_data = {} for lex_string in extracted_panel_strings.keys(): if lex_string in lang_lex_data: new_lang_lex_data[lex_string] = lang_lex_data[lex_string] else: sometext_from_en = new_en_lex_data.get(lex_string, "") new_en_lex_data[lex_string] = sometext new_lang_lex_data[lex_string] = sometext_from_en write_lex_file(lang_lex_new_path, new_lang_lex_data) logger.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}") logger.info("") logger.info("Scanning application files for general lexical strings...") extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True) logger.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.") general_lex_output_dir = os.path.join(os.getcwd(), "output", "General") os.makedirs(general_lex_output_dir, exist_ok=True) general_en_lex_path_orig = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", "general_en.lex.bak") general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new") general_en_lex_data_orig = read_lex_file(general_en_lex_path_orig) logger.info(f"Original general English lex file lines: {len(general_en_lex_data_orig)}") new_general_en_lex_data = read_lex_file(general_en_lex_new_path) for lex_string in extracted_general_strings.keys(): if lex_string in general_en_lex_data_orig: new_general_en_lex_data[lex_string] = general_en_lex_data_orig[lex_string] else: sometext = lex_string.replace("_", " ") sometext = sometext.replace("'",'"') # Split into words words = sometext.split() # Lowercase all words, capitalize the first if words: words = [words[0].capitalize()] + [w.lower() for w in words[1:]] sometext = ' '.join(words) new_general_en_lex_data[lex_string] = sometext write_lex_file(general_en_lex_new_path, new_general_en_lex_data) logger.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data_orig)}") logger.info("") if do_lang: for lang_entry in languages: lang_code = lang_entry["code"] if lang_code == "en": continue general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex") general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new") general_lang_lex_data = read_lex_file(general_lang_lex_path) logger.info(f"Original general {lang_code} lex file lines: {len(general_lang_lex_data)}") new_general_lang_lex_data = {} for lex_string in extracted_general_strings.keys(): if lex_string in general_lang_lex_data: new_general_lang_lex_data[lex_string] = general_lang_lex_data[lex_string] else: sometext_from_en = new_general_en_lex_data.get(lex_string, "") new_general_lang_lex_data[lex_string] = sometext_from_en write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data) logger.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}") logger.info("") if edit_files: logger.info("Handling single-word lexical strings...") for lex_string, filepaths in extracted_panel_strings.items(): if lex_string.startswith(f"{prefix}_"): sometext_part = lex_string[len(prefix) + 1:] if "_" not in sometext_part: just_one_word = sometext_part if just_one_word not in new_general_en_lex_data: new_general_en_lex_data[just_one_word] = just_one_word logger.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}") write_lex_file(general_en_lex_new_path, new_general_en_lex_data) for lang_entry in languages: lang_code = lang_entry["code"] if lang_code == "en": continue general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex") general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new") current_general_lang_lex_data = read_lex_file(general_lang_lex_new_path) if just_one_word not in current_general_lang_lex_data: current_general_lang_lex_data[just_one_word] = just_one_word write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data) logger.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}") for filepath in filepaths: update_file_with_new_lexical_string(filepath, lex_string, just_one_word) else: logger.error("Could not determine prefix, exiting.") sys.exit(1) if missing_files: logger.warning("The following files were not found:") for f in missing_files: logger.warning(f"- {f}") if __name__ == "__main__": main()