SM2Gen/lex_scan.py

import argparse
import logging
import os
import re
import sys
import json

# Configure logging
logging.basicConfig(level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s")

missing_files = []

def validate_panel_name(panel_name):
    if not panel_name[0].isupper():
        logging.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.")
        sys.exit(1)

def get_full_base_path(system):
    return os.path.expanduser(os.path.join("~", system, "usr", "share", "smanager"))

def check_controller_file_exists(system, panel):
    full_base_path = get_full_base_path(system)
    controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm")
    if not os.path.exists(controller_path):
        logging.error(f"Error: Controller file \'{controller_path}\' does not exist.")
        sys.exit(1)
    return controller_path


def extract_title_prefix(controller_path):
    prefix = None
    with open(controller_path, 'r') as f:
        content = f.read()
        # Regex: my $title = $c->l('<prefix>_<anything>');
        match = re.search(
            r"my\s*\$title\s*=\s*\$c->l\(\s*['\"]([A-Za-z]{2,10})_[^'\"]+['\"]\s*\)",
            content
        )
        if match:
            prefix = match.group(1)
            logging.info(f"Extracted prefix: {prefix}")
        else:
            logging.error(
                f"Error: Could not find title prefix in '{controller_path}'.\n"
                "Expected format: my $title = $c->l('<prefix>_something')"
            )
            sys.exit(1)
    return prefix

def scan_application_files(system, panel, prefix, scan_general=False):
    extracted_strings = {}
    
    full_base_path = get_full_base_path(system)

    # Controller file
    controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm")
    logging.info(f"Scanning controller file: {controller_path}")
    scan_file_for_lexical_strings(controller_path, prefix, extracted_strings, scan_general)

    # Template files
    themes = ["default", "AdminLTE"]
    for theme in themes:
        template_base_path = os.path.join(full_base_path, "themes", theme, "templates")
        panel_template_path = os.path.join(template_base_path, f"{panel.lower()}.html.ep")
        logging.info(f"Scanning panel template file: {panel_template_path}")
        scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general)

        # Scan partials
        partials_dir = os.path.join(template_base_path, "partials")
        if os.path.exists(partials_dir):
            for filename in os.listdir(partials_dir):
                # Only scan partial files that match the pattern _<prefix>_<anything>.html.ep
                if filename.startswith(f"_{prefix.lower()}_") and filename.endswith(".html.ep"):
                    partial_path = os.path.join(partials_dir, filename)
                    logging.info(f"Scanning partial template file: {partial_path}")
                    scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general)

    # Deduplicate lists of dicts in extracted_strings
    for key, value in extracted_strings.items():
        if isinstance(value, list) and value and isinstance(value[0], dict):
            # Deduplicate list of dicts using JSON serialization
            seen = set()
            deduped = []
            for d in value:
                ser = json.dumps(d, sort_keys=True)
                if ser not in seen:
                    seen.add(ser)
                    deduped.append(d)
            extracted_strings[key] = deduped

    return extracted_strings

def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan_general):
	if not os.path.exists(filepath):
		print(f"Missing file: {filepath}")
		return

	with open(filepath, 'r', encoding='utf-8') as f:
		content = f.read()

		# Always scan for l '...' or l "..."
		# pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1")
		# found_l_calls = pattern_l_call.findall(content)
		# for quote, string in found_l_calls:
			# if string not in extracted_strings_dict:
				# extracted_strings_dict[string] = []
			# if filepath not in extracted_strings_dict[string]:
				# extracted_strings_dict[string].append(filepath)

		# Either scan for strings with a prefix or ones without
		if scan_general:
			pattern = re.compile(r"l[\s|(][\"|\"](.*?)[\"|\"]\)")
			found_strings1 = pattern.findall(content)
			pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1")
			found_l_calls = [match[1] for match in pattern_l_call.findall(content)]
			found_strings = found_strings1 + found_l_calls
			for s in found_strings:
				if not s.startswith(f"{prefix}_"):
					if s not in extracted_strings_dict:
						extracted_strings_dict[s] = []
					if filepath not in extracted_strings_dict[s]:
						extracted_strings_dict[s].append(filepath)
		else:
			pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+")
			found_strings = pattern.findall(content)
			for s in found_strings:
				if s not in extracted_strings_dict:
					extracted_strings_dict[s] = []
				if filepath not in extracted_strings_dict[s]:
					extracted_strings_dict[s].append(filepath)

def read_lex_file(filepath):
    logging.info(f"Reading file: {filepath}")
    lex_data = {}
    if not os.path.exists(filepath):
        logging.warning(f"File not found: {filepath}")
        missing_files.append(filepath)
        return lex_data
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()
        # This regex finds all 'key' => 'value' pairs, even if separated by commas or newlines
        pattern = r"'(.*?)'\s*=>\s*'(.*?)(?<!\\)'"
        matches = re.findall(pattern, content, re.DOTALL)
        for key, value in matches:
            # Unescape single quotes inside values
            value = value.replace("\\'", "'")
            lex_data[key] = value
    return lex_data
    
def write_lex_file(filepath, lex_data):
    """
    Writes a dictionary to a lex file, sorted alphabetically by key (case-insensitive).
    """
    # Sort the dictionary by key, case-insensitive
    sorted_items = sorted(lex_data.items(), key=lambda item: item[0].lower())
    with open(filepath, 'w', encoding='utf-8') as f:
        for key, value in sorted_items:
            f.write(f"'{key}' => '{value}',{os.linesep}")


def read_languages_json(filepath):
    if not os.path.exists(filepath):
        missing_files.append(filepath)
        return [] # Return empty list instead of exiting
    with open(filepath, 'r') as f:
        languages = json.load(f)
    return languages

def update_file_with_new_lexical_string(filepath, old_string, new_string):
    try:
        with open(filepath, 'r') as f:
            content = f.read()
        new_content = content.replace(old_string, new_string)
        with open(filepath, 'w') as f:
            f.write(new_content)
        logging.info(f"Updated \'{old_string}\' to \'{new_string}\' in file: {filepath}")
    except Exception as e:
        logging.error(f"Error updating file {filepath}: {e}")


def export_sorted_missing_lex(input_file1, input_file2, output_file):
    """
    Reads two lex files, finds all entries in input_file1 missing from input_file2,
    sorts them alphabetically by key (case-insensitive), and writes them to output_file.
    """
    def read_lex_file(filepath):
        """Reads a lex file and returns a dict of key-value pairs."""
        if not os.path.exists(filepath):
            print(f"File not found: {filepath}")
            return {}
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()
            # Regex to find all 'key' => 'value' pairs
            pattern = r"'([^']+)'\s*=>\s*'(.*?)(?<!\\)'"
            matches = re.findall(pattern, content, re.DOTALL)
            return {key: value for key, value in matches}

    dict1 = read_lex_file(input_file1)
    dict2 = read_lex_file(input_file2)

    # Find keys in input_file1 but not in input_file2
    missing_keys = set(dict1.keys()) - set(dict2.keys())
    sorted_missing_keys = sorted(missing_keys, key=lambda x: x.lower())

    # Write missing, sorted lines to output_file
    with open(output_file, 'w', encoding='utf-8') as out:
        for k in sorted_missing_keys:
            out.write(f"'{k}' => '{dict1[k]}',\n")

    logging.info(f"Missing lines written to {output_file}:")
    #for k in sorted_missing_keys:
    #    print(f"'{k}' => '{dict1[k]}',")


def main():
	parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.")
	parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).")
	parser.add_argument("-s", "--system", default="SME11", help="System name (default: SME11).")
	parser.add_argument("-e", "--edit", action="store_true", help="Enable editing of original files (default: False).")
	parser.add_argument("-l", "--lang", action="store_true", help="Enable other language processing (default: False).")

	args = parser.parse_args()

	panel = args.panel
	system = args.system
	edit_files = args.edit
	do_lang = args.lang

	logging.info(f"Starting scan for panel: {panel}, system: {system} edit: {edit_files} lang: {do_lang}")
	#quit(0)

	validate_panel_name(panel)
	controller_path = check_controller_file_exists(system, panel)
	prefix = extract_title_prefix(controller_path)

	if prefix:
		logging.info(f"Scanning application files for strings with prefix \'{prefix}\'...")
		extracted_panel_strings = scan_application_files(system, panel, prefix)
		logging.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.")

		# Process panel-specific English lexical file
		# Output to current working directory
		panel_lex_output_dir = os.path.join(os.getcwd(), "output", panel.capitalize())
		os.makedirs(panel_lex_output_dir, exist_ok=True)
		
		full_base_path = get_full_base_path(system)

		# Corrected capitalization for panel in path
		en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_en.lex")
		en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new")

		en_lex_data = read_lex_file(en_lex_path)
		logging.info(f"Original English lex file lines: {len(en_lex_data)}")
		new_en_lex_data = {}

		for lex_string in extracted_panel_strings.keys():
			if lex_string in en_lex_data:
				new_en_lex_data[lex_string] = en_lex_data[lex_string]
			else:
				#Replace rhs by the lhs less the prefix and no underlines, in lowercase (but capitalised)
				# this may make a reasonable message, derived from the lex string id.
				sometext = lex_string.replace(f"{prefix}_", "").replace("_", " ")
				# Split into words
				words = sometext.split()
				# Lowercase all words, capitalize the first
				if words:
					words = [words[0].capitalize()] + [w.lower() for w in words[1:]]
					sometext = ' '.join(words)
				new_en_lex_data[lex_string] = sometext
		
		write_lex_file(en_lex_new_path, new_en_lex_data)
		logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}")
		
		#Create file of the ones not in the new lex file
		output_diff_file = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.diff")
		export_sorted_missing_lex(en_lex_path, en_lex_new_path, output_diff_file)

		if do_lang:
			languages_json_path = os.path.join(".", "Templates", "languages.json") # Corrected path
			languages = read_languages_json(languages_json_path)

			for lang_entry in languages:
				lang_code = lang_entry["code"]
				if lang_code == "en":
					continue
				
				lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_{lang_code}.lex")
				lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new")

				lang_lex_data = read_lex_file(lang_lex_path)
				logging.info(f"Original {lang_code} lex file lines: {len(lang_lex_data)}")
				new_lang_lex_data = {}

				for lex_string in extracted_panel_strings.keys():
					if lex_string in lang_lex_data:
						new_lang_lex_data[lex_string] = lang_lex_data[lex_string]
					else:
						sometext_from_en = new_en_lex_data.get(lex_string, "") 
						new_en_lex_data[lex_string] = sometext
						new_lang_lex_data[lex_string] = sometext_from_en
				
				write_lex_file(lang_lex_new_path, new_lang_lex_data)
				logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}")

		logging.info("")
		logging.info("Scanning application files for general lexical strings...")
		extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True)
		logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.")

		general_lex_output_dir = os.path.join(os.getcwd(), "output", "General")
		os.makedirs(general_lex_output_dir, exist_ok=True)

		general_en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", "general_en.lex")
		general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new")

		general_en_lex_data = read_lex_file(general_en_lex_path)
		logging.info(f"Original general English lex file lines: {len(general_en_lex_data)}")
		new_general_en_lex_data = read_lex_file(general_en_lex_new_path)

		for lex_string in extracted_general_strings.keys():
			if lex_string in general_en_lex_data:
				new_general_en_lex_data[lex_string] = general_en_lex_data[lex_string]
			else:
				sometext = lex_string.replace("_", " ")
				# Split into words
				words = sometext.split()
				# Lowercase all words, capitalize the first
				if words:
					words = [words[0].capitalize()] + [w.lower() for w in words[1:]]
					sometext = ' '.join(words)
				new_general_en_lex_data[lex_string] = sometext
		write_lex_file(general_en_lex_new_path, new_general_en_lex_data)
		logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}")

		logging.info("")
		if do_lang:
			for lang_entry in languages:
				lang_code = lang_entry["code"]
				if lang_code == "en": 
					continue
				
				general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex")
				general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")

				general_lang_lex_data = read_lex_file(general_lang_lex_path)
				logging.info(f"Original general {lang_code} lex file lines: {len(general_lang_lex_data)}")
				new_general_lang_lex_data = {}

				for lex_string in extracted_general_strings.keys():
					if lex_string in general_lang_lex_data:
						new_general_lang_lex_data[lex_string] = general_lang_lex_data[lex_string]
					else:
						sometext_from_en = new_general_en_lex_data.get(lex_string, "")
						new_general_lang_lex_data[lex_string] = sometext_from_en
				
				write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data)
				logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}")

		logging.info("")
		if edit_files:
			logging.info("Handling single-word lexical strings...")
			for lex_string, filepaths in extracted_panel_strings.items():
				if lex_string.startswith(f"{prefix}_"):
					sometext_part = lex_string[len(prefix) + 1:]
					if "_" not in sometext_part:
						just_one_word = sometext_part
						
						if just_one_word not in new_general_en_lex_data:
							new_general_en_lex_data[just_one_word] = just_one_word
							logging.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}")
							write_lex_file(general_en_lex_new_path, new_general_en_lex_data)

						for lang_entry in languages:
							lang_code = lang_entry["code"]
							if lang_code == "en": 
								continue
							general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex")
							general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")
							
							current_general_lang_lex_data = read_lex_file(general_lang_lex_new_path)
							if just_one_word not in current_general_lang_lex_data:
								current_general_lang_lex_data[just_one_word] = just_one_word
								write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data)
								logging.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}")

						for filepath in filepaths:
							update_file_with_new_lexical_string(filepath, lex_string, just_one_word)

	else:
		logging.error("Could not determine prefix, exiting.")
		sys.exit(1)

	if missing_files:
		logging.warning("The following files were not found:")
		for f in missing_files:
			logging.warning(f"- {f}")

if __name__ == "__main__":
    main()
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`import argparse`
			`import logging`
			`import os`
			`import re`
			`import sys`
			`import json`

			`# Configure logging`
Panels and general just about working 2025-07-05 13:05:58 +01:00			`logging.basicConfig(level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s")`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`missing_files = []`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00
			`def validate_panel_name(panel_name):`
			`if not panel_name[0].isupper():`
			`logging.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.")`
			`sys.exit(1)`

Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`def get_full_base_path(system):`
			`return os.path.expanduser(os.path.join("~", system, "usr", "share", "smanager"))`

llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`def check_controller_file_exists(system, panel):`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`full_base_path = get_full_base_path(system)`
			`controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm")`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`if not os.path.exists(controller_path):`
			`logging.error(f"Error: Controller file \'{controller_path}\' does not exist.")`
			`sys.exit(1)`
			`return controller_path`

Panels and general just about working 2025-07-05 13:05:58 +01:00
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`def extract_title_prefix(controller_path):`
			`prefix = None`
			`with open(controller_path, 'r') as f:`
			`content = f.read()`
Panels and general just about working 2025-07-05 13:05:58 +01:00			`# Regex: my $title = $c->l('<prefix>_<anything>');`
			`match = re.search(`
			`r"my\s\$title\s=\s\$c->l\(\s['\"]([A-Za-z]{2,10})_[^'\"]+['\"]\s*\)",`
			`content`
			`)`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`if match:`
			`prefix = match.group(1)`
			`logging.info(f"Extracted prefix: {prefix}")`
			`else:`
Panels and general just about working 2025-07-05 13:05:58 +01:00			`logging.error(`
			`f"Error: Could not find title prefix in '{controller_path}'.\n"`
			`"Expected format: my $title = $c->l('<prefix>_something')"`
			`)`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`sys.exit(1)`
			`return prefix`

			`def scan_application_files(system, panel, prefix, scan_general=False):`
			`extracted_strings = {}`

Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`full_base_path = get_full_base_path(system)`

llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`# Controller file`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm")`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`logging.info(f"Scanning controller file: {controller_path}")`
			`scan_file_for_lexical_strings(controller_path, prefix, extracted_strings, scan_general)`

			`# Template files`
			`themes = ["default", "AdminLTE"]`
			`for theme in themes:`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`template_base_path = os.path.join(full_base_path, "themes", theme, "templates")`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`panel_template_path = os.path.join(template_base_path, f"{panel.lower()}.html.ep")`
			`logging.info(f"Scanning panel template file: {panel_template_path}")`
			`scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general)`

			`# Scan partials`
			`partials_dir = os.path.join(template_base_path, "partials")`
			`if os.path.exists(partials_dir):`
			`for filename in os.listdir(partials_dir):`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`# Only scan partial files that match the pattern _<prefix>_<anything>.html.ep`
			`if filename.startswith(f"_{prefix.lower()}_") and filename.endswith(".html.ep"):`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`partial_path = os.path.join(partials_dir, filename)`
			`logging.info(f"Scanning partial template file: {partial_path}")`
			`scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general)`

Panels and general just about working 2025-07-05 13:05:58 +01:00			`# Deduplicate lists of dicts in extracted_strings`
			`for key, value in extracted_strings.items():`
			`if isinstance(value, list) and value and isinstance(value[0], dict):`
			`# Deduplicate list of dicts using JSON serialization`
			`seen = set()`
			`deduped = []`
			`for d in value:`
			`ser = json.dumps(d, sort_keys=True)`
			`if ser not in seen:`
			`seen.add(ser)`
			`deduped.append(d)`
			`extracted_strings[key] = deduped`

llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`return extracted_strings`

			`def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan_general):`
Panels and general just about working 2025-07-05 13:05:58 +01:00			`if not os.path.exists(filepath):`
			`print(f"Missing file: {filepath}")`
			`return`

			`with open(filepath, 'r', encoding='utf-8') as f:`
			`content = f.read()`

			`# Always scan for l '...' or l "..."`
			`# pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1")`
			`# found_l_calls = pattern_l_call.findall(content)`
			`# for quote, string in found_l_calls:`
			`# if string not in extracted_strings_dict:`
			`# extracted_strings_dict[string] = []`
			`# if filepath not in extracted_strings_dict[string]:`
			`# extracted_strings_dict[string].append(filepath)`

			`# Either scan for strings with a prefix or ones without`
			`if scan_general:`
			`pattern = re.compile(r"l[\s\|(][\"\|\"](.*?)[\"\|\"]\)")`
			`found_strings1 = pattern.findall(content)`
			`pattern_l_call = re.compile(r"\bl\s*(['\"])(.+?)\1")`
			`found_l_calls = [match[1] for match in pattern_l_call.findall(content)]`
			`found_strings = found_strings1 + found_l_calls`
			`for s in found_strings:`
			`if not s.startswith(f"{prefix}_"):`
			`if s not in extracted_strings_dict:`
			`extracted_strings_dict[s] = []`
			`if filepath not in extracted_strings_dict[s]:`
			`extracted_strings_dict[s].append(filepath)`
			`else:`
			`pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+")`
			`found_strings = pattern.findall(content)`
			`for s in found_strings:`
			`if s not in extracted_strings_dict:`
			`extracted_strings_dict[s] = []`
			`if filepath not in extracted_strings_dict[s]:`
			`extracted_strings_dict[s].append(filepath)`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00
			`def read_lex_file(filepath):`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`logging.info(f"Reading file: {filepath}")`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`lex_data = {}`
			`if not os.path.exists(filepath):`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`logging.warning(f"File not found: {filepath}")`
			`missing_files.append(filepath)`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`return lex_data`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`with open(filepath, 'r', encoding='utf-8') as f:`
			`content = f.read()`
			`# This regex finds all 'key' => 'value' pairs, even if separated by commas or newlines`
			`pattern = r"'(.?)'\s=>\s'(.?)(?<!\\)'"`
			`matches = re.findall(pattern, content, re.DOTALL)`
			`for key, value in matches:`
			`# Unescape single quotes inside values`
			`value = value.replace("\\'", "'")`
			`lex_data[key] = value`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`return lex_data`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`def write_lex_file(filepath, lex_data):`
Panels and general just about working 2025-07-05 13:05:58 +01:00			`"""`
			`Writes a dictionary to a lex file, sorted alphabetically by key (case-insensitive).`
			`"""`
			`# Sort the dictionary by key, case-insensitive`
			`sorted_items = sorted(lex_data.items(), key=lambda item: item[0].lower())`
			`with open(filepath, 'w', encoding='utf-8') as f:`
			`for key, value in sorted_items:`
			`f.write(f"'{key}' => '{value}',{os.linesep}")`

llm first shot at lex audit code 2025-07-04 09:59:16 +01:00
			`def read_languages_json(filepath):`
			`if not os.path.exists(filepath):`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`missing_files.append(filepath)`
			`return [] # Return empty list instead of exiting`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`with open(filepath, 'r') as f:`
			`languages = json.load(f)`
			`return languages`

			`def update_file_with_new_lexical_string(filepath, old_string, new_string):`
			`try:`
			`with open(filepath, 'r') as f:`
			`content = f.read()`
			`new_content = content.replace(old_string, new_string)`
			`with open(filepath, 'w') as f:`
			`f.write(new_content)`
			`logging.info(f"Updated \'{old_string}\' to \'{new_string}\' in file: {filepath}")`
			`except Exception as e:`
			`logging.error(f"Error updating file {filepath}: {e}")`

Panels and general just about working 2025-07-05 13:05:58 +01:00
			`def export_sorted_missing_lex(input_file1, input_file2, output_file):`
			`"""`
			`Reads two lex files, finds all entries in input_file1 missing from input_file2,`
			`sorts them alphabetically by key (case-insensitive), and writes them to output_file.`
			`"""`
			`def read_lex_file(filepath):`
			`"""Reads a lex file and returns a dict of key-value pairs."""`
			`if not os.path.exists(filepath):`
			`print(f"File not found: {filepath}")`
			`return {}`
			`with open(filepath, 'r', encoding='utf-8') as f:`
			`content = f.read()`
			`# Regex to find all 'key' => 'value' pairs`
			`pattern = r"'([^']+)'\s=>\s'(.*?)(?<!\\)'"`
			`matches = re.findall(pattern, content, re.DOTALL)`
			`return {key: value for key, value in matches}`

			`dict1 = read_lex_file(input_file1)`
			`dict2 = read_lex_file(input_file2)`

			`# Find keys in input_file1 but not in input_file2`
			`missing_keys = set(dict1.keys()) - set(dict2.keys())`
			`sorted_missing_keys = sorted(missing_keys, key=lambda x: x.lower())`

			`# Write missing, sorted lines to output_file`
			`with open(output_file, 'w', encoding='utf-8') as out:`
			`for k in sorted_missing_keys:`
			`out.write(f"'{k}' => '{dict1[k]}',\n")`

			`logging.info(f"Missing lines written to {output_file}:")`
			`#for k in sorted_missing_keys:`
			`# print(f"'{k}' => '{dict1[k]}',")`


llm first shot at lex audit code 2025-07-04 09:59:16 +01:00			`def main():`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.")`
			`parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).")`
			`parser.add_argument("-s", "--system", default="SME11", help="System name (default: SME11).")`
			`parser.add_argument("-e", "--edit", action="store_true", help="Enable editing of original files (default: False).")`
			`parser.add_argument("-l", "--lang", action="store_true", help="Enable other language processing (default: False).")`

			`args = parser.parse_args()`

			`panel = args.panel`
			`system = args.system`
			`edit_files = args.edit`
			`do_lang = args.lang`

			`logging.info(f"Starting scan for panel: {panel}, system: {system} edit: {edit_files} lang: {do_lang}")`
			`#quit(0)`

			`validate_panel_name(panel)`
			`controller_path = check_controller_file_exists(system, panel)`
			`prefix = extract_title_prefix(controller_path)`

			`if prefix:`
			`logging.info(f"Scanning application files for strings with prefix \'{prefix}\'...")`
			`extracted_panel_strings = scan_application_files(system, panel, prefix)`
			`logging.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.")`

			`# Process panel-specific English lexical file`
			`# Output to current working directory`
			`panel_lex_output_dir = os.path.join(os.getcwd(), "output", panel.capitalize())`
			`os.makedirs(panel_lex_output_dir, exist_ok=True)`

			`full_base_path = get_full_base_path(system)`

			`# Corrected capitalization for panel in path`
			`en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_en.lex")`
			`en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new")`

			`en_lex_data = read_lex_file(en_lex_path)`
			`logging.info(f"Original English lex file lines: {len(en_lex_data)}")`
			`new_en_lex_data = {}`

			`for lex_string in extracted_panel_strings.keys():`
			`if lex_string in en_lex_data:`
			`new_en_lex_data[lex_string] = en_lex_data[lex_string]`
			`else:`
			`#Replace rhs by the lhs less the prefix and no underlines, in lowercase (but capitalised)`
			`# this may make a reasonable message, derived from the lex string id.`
			`sometext = lex_string.replace(f"{prefix}_", "").replace("_", " ")`
			`# Split into words`
			`words = sometext.split()`
			`# Lowercase all words, capitalize the first`
			`if words:`
			`words = [words[0].capitalize()] + [w.lower() for w in words[1:]]`
			`sometext = ' '.join(words)`
			`new_en_lex_data[lex_string] = sometext`

			`write_lex_file(en_lex_new_path, new_en_lex_data)`
			`logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}")`
Panels and general just about working 2025-07-05 13:05:58 +01:00
			`#Create file of the ones not in the new lex file`
			`output_diff_file = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.diff")`
			`export_sorted_missing_lex(en_lex_path, en_lex_new_path, output_diff_file)`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00
			`if do_lang:`
			`languages_json_path = os.path.join(".", "Templates", "languages.json") # Corrected path`
			`languages = read_languages_json(languages_json_path)`

			`for lang_entry in languages:`
			`lang_code = lang_entry["code"]`
			`if lang_code == "en":`
			`continue`

			`lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_{lang_code}.lex")`
			`lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new")`

			`lang_lex_data = read_lex_file(lang_lex_path)`
			`logging.info(f"Original {lang_code} lex file lines: {len(lang_lex_data)}")`
			`new_lang_lex_data = {}`

			`for lex_string in extracted_panel_strings.keys():`
			`if lex_string in lang_lex_data:`
			`new_lang_lex_data[lex_string] = lang_lex_data[lex_string]`
			`else:`
			`sometext_from_en = new_en_lex_data.get(lex_string, "")`
			`new_en_lex_data[lex_string] = sometext`
			`new_lang_lex_data[lex_string] = sometext_from_en`

			`write_lex_file(lang_lex_new_path, new_lang_lex_data)`
			`logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}")`

Panels and general just about working 2025-07-05 13:05:58 +01:00			`logging.info("")`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`logging.info("Scanning application files for general lexical strings...")`
			`extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True)`
			`logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.")`

			`general_lex_output_dir = os.path.join(os.getcwd(), "output", "General")`
			`os.makedirs(general_lex_output_dir, exist_ok=True)`

			`general_en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", "general_en.lex")`
			`general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new")`

			`general_en_lex_data = read_lex_file(general_en_lex_path)`
			`logging.info(f"Original general English lex file lines: {len(general_en_lex_data)}")`
Panels and general just about working 2025-07-05 13:05:58 +01:00			`new_general_en_lex_data = read_lex_file(general_en_lex_new_path)`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00
			`for lex_string in extracted_general_strings.keys():`
			`if lex_string in general_en_lex_data:`
			`new_general_en_lex_data[lex_string] = general_en_lex_data[lex_string]`
			`else:`
			`sometext = lex_string.replace("_", " ")`
			`# Split into words`
			`words = sometext.split()`
			`# Lowercase all words, capitalize the first`
			`if words:`
			`words = [words[0].capitalize()] + [w.lower() for w in words[1:]]`
			`sometext = ' '.join(words)`
			`new_general_en_lex_data[lex_string] = sometext`
			`write_lex_file(general_en_lex_new_path, new_general_en_lex_data)`
			`logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}")`

Panels and general just about working 2025-07-05 13:05:58 +01:00			`logging.info("")`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`if do_lang:`
			`for lang_entry in languages:`
			`lang_code = lang_entry["code"]`
			`if lang_code == "en":`
			`continue`

			`general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex")`
			`general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")`

			`general_lang_lex_data = read_lex_file(general_lang_lex_path)`
			`logging.info(f"Original general {lang_code} lex file lines: {len(general_lang_lex_data)}")`
			`new_general_lang_lex_data = {}`

			`for lex_string in extracted_general_strings.keys():`
			`if lex_string in general_lang_lex_data:`
			`new_general_lang_lex_data[lex_string] = general_lang_lex_data[lex_string]`
			`else:`
			`sometext_from_en = new_general_en_lex_data.get(lex_string, "")`
			`new_general_lang_lex_data[lex_string] = sometext_from_en`

			`write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data)`
			`logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}")`

Panels and general just about working 2025-07-05 13:05:58 +01:00			`logging.info("")`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`if edit_files:`
			`logging.info("Handling single-word lexical strings...")`
			`for lex_string, filepaths in extracted_panel_strings.items():`
			`if lex_string.startswith(f"{prefix}_"):`
			`sometext_part = lex_string[len(prefix) + 1:]`
			`if "_" not in sometext_part:`
			`just_one_word = sometext_part`

			`if just_one_word not in new_general_en_lex_data:`
			`new_general_en_lex_data[just_one_word] = just_one_word`
			`logging.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}")`
			`write_lex_file(general_en_lex_new_path, new_general_en_lex_data)`

			`for lang_entry in languages:`
			`lang_code = lang_entry["code"]`
			`if lang_code == "en":`
			`continue`
			`general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex")`
			`general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")`

			`current_general_lang_lex_data = read_lex_file(general_lang_lex_new_path)`
			`if just_one_word not in current_general_lang_lex_data:`
			`current_general_lang_lex_data[just_one_word] = just_one_word`
			`write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data)`
			`logging.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}")`

			`for filepath in filepaths:`
			`update_file_with_new_lexical_string(filepath, lex_string, just_one_word)`

			`else:`
			`logging.error("Could not determine prefix, exiting.")`
			`sys.exit(1)`

			`if missing_files:`
			`logging.warning("The following files were not found:")`
			`for f in missing_files:`
			`logging.warning(f"- {f}")`
llm first shot at lex audit code 2025-07-04 09:59:16 +01:00
			`if __name__ == "__main__":`
Working for lex string extract and re-create .lex file 2025-07-04 14:03:15 +01:00			`main()`