diff --git a/lex_scan.py b/lex_scan.py index 51fdc81..b110f58 100644 --- a/lex_scan.py +++ b/lex_scan.py @@ -6,7 +6,7 @@ import sys import json # Configure logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logging.basicConfig(level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s") missing_files = [] @@ -26,16 +26,24 @@ def check_controller_file_exists(system, panel): sys.exit(1) return controller_path + def extract_title_prefix(controller_path): prefix = None with open(controller_path, 'r') as f: content = f.read() - match = re.search(r"\$c->l\([\"|\"]([A-Za-z]{2,4})_.*?\)", content) + # Regex: my $title = $c->l('_'); + match = re.search( + r"my\s*\$title\s*=\s*\$c->l\(\s*['\"]([A-Za-z]{2,10})_[^'\"]+['\"]\s*\)", + content + ) if match: prefix = match.group(1) logging.info(f"Extracted prefix: {prefix}") else: - logging.error(f"Error: Could not find title prefix in \'{controller_path}\'. Expected format: my $title = $c->l(\"_something\") or my $title = $c->l(\\\\\\\\'_something\\\")") + logging.error( + f"Error: Could not find title prefix in '{controller_path}'.\n" + "Expected format: my $title = $c->l('_something')" + ) sys.exit(1) return prefix @@ -67,32 +75,59 @@ def scan_application_files(system, panel, prefix, scan_general=False): logging.info(f"Scanning partial template file: {partial_path}") scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general) + # Deduplicate lists of dicts in extracted_strings + for key, value in extracted_strings.items(): + if isinstance(value, list) and value and isinstance(value[0], dict): + # Deduplicate list of dicts using JSON serialization + seen = set() + deduped = [] + for d in value: + ser = json.dumps(d, sort_keys=True) + if ser not in seen: + seen.add(ser) + deduped.append(d) + extracted_strings[key] = deduped + return extracted_strings def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan_general): - if not os.path.exists(filepath): - missing_files.append(filepath) - return - - with open(filepath, 'r') as f: - content = f.read() - if scan_general: - pattern = re.compile(r"l[\s|(][\"|\"](.*?)[\"|\"]\)") - found_strings = pattern.findall(content) - for s in found_strings: - if not s.startswith(f"{prefix}_"): - if s not in extracted_strings_dict: - extracted_strings_dict[s] = [] - if filepath not in extracted_strings_dict[s]: - extracted_strings_dict[s].append(filepath) - else: - pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+") - found_strings = pattern.findall(content) - for s in found_strings: - if s not in extracted_strings_dict: - extracted_strings_dict[s] = [] - if filepath not in extracted_strings_dict[s]: - extracted_strings_dict[s].append(filepath) + if not os.path.exists(filepath): + print(f"Missing file: {filepath}") + return + + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + + # Always scan for l '...' or l "..." + # pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1") + # found_l_calls = pattern_l_call.findall(content) + # for quote, string in found_l_calls: + # if string not in extracted_strings_dict: + # extracted_strings_dict[string] = [] + # if filepath not in extracted_strings_dict[string]: + # extracted_strings_dict[string].append(filepath) + + # Either scan for strings with a prefix or ones without + if scan_general: + pattern = re.compile(r"l[\s|(][\"|\"](.*?)[\"|\"]\)") + found_strings1 = pattern.findall(content) + pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1") + found_l_calls = [match[1] for match in pattern_l_call.findall(content)] + found_strings = found_strings1 + found_l_calls + for s in found_strings: + if not s.startswith(f"{prefix}_"): + if s not in extracted_strings_dict: + extracted_strings_dict[s] = [] + if filepath not in extracted_strings_dict[s]: + extracted_strings_dict[s].append(filepath) + else: + pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+") + found_strings = pattern.findall(content) + for s in found_strings: + if s not in extracted_strings_dict: + extracted_strings_dict[s] = [] + if filepath not in extracted_strings_dict[s]: + extracted_strings_dict[s].append(filepath) def read_lex_file(filepath): logging.info(f"Reading file: {filepath}") @@ -111,12 +146,17 @@ def read_lex_file(filepath): value = value.replace("\\'", "'") lex_data[key] = value return lex_data - def write_lex_file(filepath, lex_data): - with open(filepath, 'w') as f: - for key, value in lex_data.items(): - f.write(f"\'{key}\' => \'{value}\',{os.linesep}") + """ + Writes a dictionary to a lex file, sorted alphabetically by key (case-insensitive). + """ + # Sort the dictionary by key, case-insensitive + sorted_items = sorted(lex_data.items(), key=lambda item: item[0].lower()) + with open(filepath, 'w', encoding='utf-8') as f: + for key, value in sorted_items: + f.write(f"'{key}' => '{value}',{os.linesep}") + def read_languages_json(filepath): if not os.path.exists(filepath): @@ -137,6 +177,41 @@ def update_file_with_new_lexical_string(filepath, old_string, new_string): except Exception as e: logging.error(f"Error updating file {filepath}: {e}") + +def export_sorted_missing_lex(input_file1, input_file2, output_file): + """ + Reads two lex files, finds all entries in input_file1 missing from input_file2, + sorts them alphabetically by key (case-insensitive), and writes them to output_file. + """ + def read_lex_file(filepath): + """Reads a lex file and returns a dict of key-value pairs.""" + if not os.path.exists(filepath): + print(f"File not found: {filepath}") + return {} + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + # Regex to find all 'key' => 'value' pairs + pattern = r"'([^']+)'\s*=>\s*'(.*?)(? '{dict1[k]}',\n") + + logging.info(f"Missing lines written to {output_file}:") + #for k in sorted_missing_keys: + # print(f"'{k}' => '{dict1[k]}',") + + def main(): parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.") parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).") @@ -195,6 +270,10 @@ def main(): write_lex_file(en_lex_new_path, new_en_lex_data) logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}") + + #Create file of the ones not in the new lex file + output_diff_file = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.diff") + export_sorted_missing_lex(en_lex_path, en_lex_new_path, output_diff_file) if do_lang: languages_json_path = os.path.join(".", "Templates", "languages.json") # Corrected path @@ -223,6 +302,7 @@ def main(): write_lex_file(lang_lex_new_path, new_lang_lex_data) logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}") + logging.info("") logging.info("Scanning application files for general lexical strings...") extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True) logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.") @@ -235,7 +315,7 @@ def main(): general_en_lex_data = read_lex_file(general_en_lex_path) logging.info(f"Original general English lex file lines: {len(general_en_lex_data)}") - new_general_en_lex_data = {} + new_general_en_lex_data = read_lex_file(general_en_lex_new_path) for lex_string in extracted_general_strings.keys(): if lex_string in general_en_lex_data: @@ -252,6 +332,7 @@ def main(): write_lex_file(general_en_lex_new_path, new_general_en_lex_data) logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}") + logging.info("") if do_lang: for lang_entry in languages: lang_code = lang_entry["code"] @@ -275,6 +356,7 @@ def main(): write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data) logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}") + logging.info("") if edit_files: logging.info("Handling single-word lexical strings...") for lex_string, filepaths in extracted_panel_strings.items():