From 2eb8c32e3066fb769d9a99f557a0850d8b64ff3e Mon Sep 17 00:00:00 2001 From: Brian Read Date: Mon, 7 Jul 2025 11:32:45 +0100 Subject: [PATCH] All but backup, yum and bugreport ok now --- lex_scan.py | 197 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 119 insertions(+), 78 deletions(-) diff --git a/lex_scan.py b/lex_scan.py index b110f58..fa780e3 100644 --- a/lex_scan.py +++ b/lex_scan.py @@ -5,14 +5,36 @@ import re import sys import json -# Configure logging -logging.basicConfig(level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s") +# Configure logger + +#logger.basicConfig(level=logger.WARNING, format="%(asctime)s - %(levelname)s - %(message)s") + +# Create a custom logger +logger = logging.getLogger("lex_scan_logger") +logger.setLevel(logging.DEBUG) # Set to lowest level needed by any handler + +# Formatter for both handlers +formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + +# File handler (INFO and above) +file_handler = logging.FileHandler('lex_scan.log') +file_handler.setLevel(logging.INFO) +file_handler.setFormatter(formatter) + +# Console handler (WARNING and above) +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.WARNING) +console_handler.setFormatter(formatter) + +# Add handlers to the logger +logger.addHandler(file_handler) +logger.addHandler(console_handler) missing_files = [] def validate_panel_name(panel_name): if not panel_name[0].isupper(): - logging.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.") + logger.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.") sys.exit(1) def get_full_base_path(system): @@ -22,7 +44,7 @@ def check_controller_file_exists(system, panel): full_base_path = get_full_base_path(system) controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm") if not os.path.exists(controller_path): - logging.error(f"Error: Controller file \'{controller_path}\' does not exist.") + logger.error(f"Error: Controller file \'{controller_path}\' does not exist.") sys.exit(1) return controller_path @@ -38,9 +60,9 @@ def extract_title_prefix(controller_path): ) if match: prefix = match.group(1) - logging.info(f"Extracted prefix: {prefix}") + logger.info(f"Extracted prefix: {prefix}") else: - logging.error( + logger.error( f"Error: Could not find title prefix in '{controller_path}'.\n" "Expected format: my $title = $c->l('_something')" ) @@ -54,15 +76,21 @@ def scan_application_files(system, panel, prefix, scan_general=False): # Controller file controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm") - logging.info(f"Scanning controller file: {controller_path}") + logger.info(f"Scanning controller file: {controller_path}") scan_file_for_lexical_strings(controller_path, prefix, extracted_strings, scan_general) + + #Controller file custom code + controller_custom_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}-Custom.pm") + logger.info(f"Scanning Custom controller file: {controller_custom_path}") + scan_file_for_lexical_strings(controller_custom_path, prefix, extracted_strings, scan_general) + # Template files themes = ["default", "AdminLTE"] for theme in themes: template_base_path = os.path.join(full_base_path, "themes", theme, "templates") panel_template_path = os.path.join(template_base_path, f"{panel.lower()}.html.ep") - logging.info(f"Scanning panel template file: {panel_template_path}") + logger.info(f"Scanning panel template file: {panel_template_path}") scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general) # Scan partials @@ -72,7 +100,7 @@ def scan_application_files(system, panel, prefix, scan_general=False): # Only scan partial files that match the pattern __.html.ep if filename.startswith(f"_{prefix.lower()}_") and filename.endswith(".html.ep"): partial_path = os.path.join(partials_dir, filename) - logging.info(f"Scanning partial template file: {partial_path}") + logger.info(f"Scanning partial template file: {partial_path}") scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general) # Deduplicate lists of dicts in extracted_strings @@ -108,45 +136,67 @@ def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan # extracted_strings_dict[string].append(filepath) # Either scan for strings with a prefix or ones without + # and check for dis-allowed characters + allowed_pattern = r"[A-Za-z0-9_\-/%:,()\. @]+" if scan_general: - pattern = re.compile(r"l[\s|(][\"|\"](.*?)[\"|\"]\)") + pattern = re.compile(r"l[\s|(][\"|\'](.*?)[\"|\']\)") found_strings1 = pattern.findall(content) pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1") found_l_calls = [match[1] for match in pattern_l_call.findall(content)] found_strings = found_strings1 + found_l_calls for s in found_strings: - if not s.startswith(f"{prefix}_"): + # Ignore strings that start with the prefix (with underscore) + if s.startswith(f"{prefix}_"): + continue + s = s.replace(" ","_") + #print(f"General:{s} ") + if re.fullmatch(allowed_pattern, s): if s not in extracted_strings_dict: extracted_strings_dict[s] = [] if filepath not in extracted_strings_dict[s]: extracted_strings_dict[s].append(filepath) + else: + logger.error(f"Unexpected chars ({s}) found in {filepath}") + continue else: - pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+") - found_strings = pattern.findall(content) - for s in found_strings: - if s not in extracted_strings_dict: - extracted_strings_dict[s] = [] - if filepath not in extracted_strings_dict[s]: - extracted_strings_dict[s].append(filepath) + pattern = re.compile( + rf"(['\"])" # opening quote + rf"({prefix}_" # prefix and underscore + rf"(?:\\.|(?!\1).)*?)" # non-greedy: escaped char or any char not the closing quote + rf"\1" # closing quote (same as opening) + ) + found_strings = [m.group(2) for m in pattern.finditer(content)] + + for s in found_strings: + #print(f"Prefix: {s}") + if re.fullmatch(allowed_pattern, s): + if s not in extracted_strings_dict: + extracted_strings_dict[s] = [] + if filepath not in extracted_strings_dict[s]: + extracted_strings_dict[s].append(filepath) + else: + logger.error(f"Unexpected chars ({s}) found in {filepath}") + continue def read_lex_file(filepath): - logging.info(f"Reading file: {filepath}") - lex_data = {} - if not os.path.exists(filepath): - logging.warning(f"File not found: {filepath}") - missing_files.append(filepath) - return lex_data - with open(filepath, 'r', encoding='utf-8') as f: - content = f.read() - # This regex finds all 'key' => 'value' pairs, even if separated by commas or newlines - pattern = r"'(.*?)'\s*=>\s*'(.*?)(?\s* + (['"])((?:\\.|(?!\3).)*)\3 # value in quotes, allowing escaped chars + """ + matches = re.findall(pattern, content, re.DOTALL | re.VERBOSE) + for _, key, quote, value in matches: + # Unescape the quote character and backslashes in value + value = value.replace(f"\\{quote}", quote).replace("\\\\", "\\") + lex_data[key] = value + return lex_data + + def write_lex_file(filepath, lex_data): """ Writes a dictionary to a lex file, sorted alphabetically by key (case-insensitive). @@ -155,7 +205,8 @@ def write_lex_file(filepath, lex_data): sorted_items = sorted(lex_data.items(), key=lambda item: item[0].lower()) with open(filepath, 'w', encoding='utf-8') as f: for key, value in sorted_items: - f.write(f"'{key}' => '{value}',{os.linesep}") + value = value.replace("'",'"') + f.write(f"'{key}' => '{value}',{os.linesep}") def read_languages_json(filepath): @@ -173,9 +224,10 @@ def update_file_with_new_lexical_string(filepath, old_string, new_string): new_content = content.replace(old_string, new_string) with open(filepath, 'w') as f: f.write(new_content) - logging.info(f"Updated \'{old_string}\' to \'{new_string}\' in file: {filepath}") + #map any single quotes to double + logger.info(f"Updated \'{old_string}\' to \'{new_string}\' in file: {filepath}") except Exception as e: - logging.error(f"Error updating file {filepath}: {e}") + logger.error(f"Error updating file {filepath}: {e}") def export_sorted_missing_lex(input_file1, input_file2, output_file): @@ -183,17 +235,6 @@ def export_sorted_missing_lex(input_file1, input_file2, output_file): Reads two lex files, finds all entries in input_file1 missing from input_file2, sorts them alphabetically by key (case-insensitive), and writes them to output_file. """ - def read_lex_file(filepath): - """Reads a lex file and returns a dict of key-value pairs.""" - if not os.path.exists(filepath): - print(f"File not found: {filepath}") - return {} - with open(filepath, 'r', encoding='utf-8') as f: - content = f.read() - # Regex to find all 'key' => 'value' pairs - pattern = r"'([^']+)'\s*=>\s*'(.*?)(? '{dict1[k]}',\n") - logging.info(f"Missing lines written to {output_file}:") + logger.info(f"Missing lines written to {output_file}:") #for k in sorted_missing_keys: # print(f"'{k}' => '{dict1[k]}',") @@ -226,17 +267,16 @@ def main(): edit_files = args.edit do_lang = args.lang - logging.info(f"Starting scan for panel: {panel}, system: {system} edit: {edit_files} lang: {do_lang}") - #quit(0) + logger.warning(f"Lex scan for panel: {panel}, system: {system} edit: {edit_files} lang: {do_lang}\n") validate_panel_name(panel) controller_path = check_controller_file_exists(system, panel) prefix = extract_title_prefix(controller_path) if prefix: - logging.info(f"Scanning application files for strings with prefix \'{prefix}\'...") + logger.info(f"Scanning application files for strings with prefix \'{prefix}\'...") extracted_panel_strings = scan_application_files(system, panel, prefix) - logging.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.") + logger.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.") # Process panel-specific English lexical file # Output to current working directory @@ -246,11 +286,11 @@ def main(): full_base_path = get_full_base_path(system) # Corrected capitalization for panel in path - en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_en.lex") + en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_en.lex.bak") en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new") en_lex_data = read_lex_file(en_lex_path) - logging.info(f"Original English lex file lines: {len(en_lex_data)}") + logger.info(f"Original English lex file lines: {len(en_lex_data)}") new_en_lex_data = {} for lex_string in extracted_panel_strings.keys(): @@ -269,7 +309,7 @@ def main(): new_en_lex_data[lex_string] = sometext write_lex_file(en_lex_new_path, new_en_lex_data) - logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}") + logger.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}") #Create file of the ones not in the new lex file output_diff_file = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.diff") @@ -288,7 +328,7 @@ def main(): lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new") lang_lex_data = read_lex_file(lang_lex_path) - logging.info(f"Original {lang_code} lex file lines: {len(lang_lex_data)}") + logger.info(f"Original {lang_code} lex file lines: {len(lang_lex_data)}") new_lang_lex_data = {} for lex_string in extracted_panel_strings.keys(): @@ -300,28 +340,29 @@ def main(): new_lang_lex_data[lex_string] = sometext_from_en write_lex_file(lang_lex_new_path, new_lang_lex_data) - logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}") + logger.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}") - logging.info("") - logging.info("Scanning application files for general lexical strings...") + logger.info("") + logger.info("Scanning application files for general lexical strings...") extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True) - logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.") + logger.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.") general_lex_output_dir = os.path.join(os.getcwd(), "output", "General") os.makedirs(general_lex_output_dir, exist_ok=True) - general_en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", "general_en.lex") + general_en_lex_path_orig = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", "general_en.lex.bak") general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new") - general_en_lex_data = read_lex_file(general_en_lex_path) - logging.info(f"Original general English lex file lines: {len(general_en_lex_data)}") + general_en_lex_data_orig = read_lex_file(general_en_lex_path_orig) + logger.info(f"Original general English lex file lines: {len(general_en_lex_data_orig)}") new_general_en_lex_data = read_lex_file(general_en_lex_new_path) for lex_string in extracted_general_strings.keys(): - if lex_string in general_en_lex_data: - new_general_en_lex_data[lex_string] = general_en_lex_data[lex_string] + if lex_string in general_en_lex_data_orig: + new_general_en_lex_data[lex_string] = general_en_lex_data_orig[lex_string] else: sometext = lex_string.replace("_", " ") + sometext = sometext.replace("'",'"') # Split into words words = sometext.split() # Lowercase all words, capitalize the first @@ -330,9 +371,9 @@ def main(): sometext = ' '.join(words) new_general_en_lex_data[lex_string] = sometext write_lex_file(general_en_lex_new_path, new_general_en_lex_data) - logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}") + logger.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data_orig)}") - logging.info("") + logger.info("") if do_lang: for lang_entry in languages: lang_code = lang_entry["code"] @@ -343,7 +384,7 @@ def main(): general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new") general_lang_lex_data = read_lex_file(general_lang_lex_path) - logging.info(f"Original general {lang_code} lex file lines: {len(general_lang_lex_data)}") + logger.info(f"Original general {lang_code} lex file lines: {len(general_lang_lex_data)}") new_general_lang_lex_data = {} for lex_string in extracted_general_strings.keys(): @@ -354,11 +395,11 @@ def main(): new_general_lang_lex_data[lex_string] = sometext_from_en write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data) - logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}") + logger.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}") - logging.info("") + logger.info("") if edit_files: - logging.info("Handling single-word lexical strings...") + logger.info("Handling single-word lexical strings...") for lex_string, filepaths in extracted_panel_strings.items(): if lex_string.startswith(f"{prefix}_"): sometext_part = lex_string[len(prefix) + 1:] @@ -367,7 +408,7 @@ def main(): if just_one_word not in new_general_en_lex_data: new_general_en_lex_data[just_one_word] = just_one_word - logging.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}") + logger.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}") write_lex_file(general_en_lex_new_path, new_general_en_lex_data) for lang_entry in languages: @@ -381,19 +422,19 @@ def main(): if just_one_word not in current_general_lang_lex_data: current_general_lang_lex_data[just_one_word] = just_one_word write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data) - logging.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}") + logger.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}") for filepath in filepaths: update_file_with_new_lexical_string(filepath, lex_string, just_one_word) else: - logging.error("Could not determine prefix, exiting.") + logger.error("Could not determine prefix, exiting.") sys.exit(1) if missing_files: - logging.warning("The following files were not found:") + logger.warning("The following files were not found:") for f in missing_files: - logging.warning(f"- {f}") + logger.warning(f"- {f}") if __name__ == "__main__": main() \ No newline at end of file