llm first shot at lex audit code
This commit is contained in:
		
							
								
								
									
										288
									
								
								lex_scan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										288
									
								
								lex_scan.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,288 @@
 | 
			
		||||
import argparse
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import sys
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
# Configure logging
 | 
			
		||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 | 
			
		||||
 | 
			
		||||
SYSTEM_BASE_PATH = "/usr/share/smanager/"
 | 
			
		||||
 | 
			
		||||
def validate_panel_name(panel_name):
 | 
			
		||||
    if not panel_name[0].isupper():
 | 
			
		||||
        logging.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.")
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
def check_controller_file_exists(system, panel):
 | 
			
		||||
    controller_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/Controller", f"{panel}.pm")
 | 
			
		||||
    if not os.path.exists(controller_path):
 | 
			
		||||
        logging.error(f"Error: Controller file \'{controller_path}\' does not exist.")
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
    return controller_path
 | 
			
		||||
 | 
			
		||||
def extract_title_prefix(controller_path):
 | 
			
		||||
    prefix = None
 | 
			
		||||
    with open(controller_path, 'r') as f:
 | 
			
		||||
        content = f.read()
 | 
			
		||||
        # Corrected regex: match either " or \' for the string enclosure
 | 
			
		||||
        match = re.search(r"my \$title = \$c->l\([\"|\"]([A-Za-z]{2,4})_.*?\)", content)
 | 
			
		||||
        if match:
 | 
			
		||||
            prefix = match.group(1)
 | 
			
		||||
            logging.info(f"Extracted prefix: {prefix}")
 | 
			
		||||
        else:
 | 
			
		||||
            logging.error(f"Error: Could not find title prefix in \'{controller_path}\'. Expected format: my $title = $c->l(\"<prefix>_something\") or my $title = $c->l(\\'<prefix>_something\\')")
 | 
			
		||||
            sys.exit(1)
 | 
			
		||||
    return prefix
 | 
			
		||||
 | 
			
		||||
def scan_application_files(system, panel, prefix, scan_general=False):
 | 
			
		||||
    extracted_strings = {}
 | 
			
		||||
    
 | 
			
		||||
    # Controller file
 | 
			
		||||
    controller_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/Controller", f"{panel}.pm")
 | 
			
		||||
    logging.info(f"Scanning controller file: {controller_path}")
 | 
			
		||||
    scan_file_for_lexical_strings(controller_path, prefix, extracted_strings, scan_general)
 | 
			
		||||
 | 
			
		||||
    # Template files
 | 
			
		||||
    themes = ["default", "AdminLTE"]
 | 
			
		||||
    for theme in themes:
 | 
			
		||||
        template_base_path = os.path.join(SYSTEM_BASE_PATH, "themes", theme, "templates")
 | 
			
		||||
        panel_template_path = os.path.join(template_base_path, f"{panel.lower()}.html.ep")
 | 
			
		||||
        logging.info(f"Scanning panel template file: {panel_template_path}")
 | 
			
		||||
        scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general)
 | 
			
		||||
 | 
			
		||||
        # Scan partials
 | 
			
		||||
        partials_dir = os.path.join(template_base_path, "partials")
 | 
			
		||||
        if os.path.exists(partials_dir):
 | 
			
		||||
            for filename in os.listdir(partials_dir):
 | 
			
		||||
                if filename.endswith(".html.ep"):
 | 
			
		||||
                    partial_path = os.path.join(partials_dir, filename)
 | 
			
		||||
                    logging.info(f"Scanning partial template file: {partial_path}")
 | 
			
		||||
                    scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general)
 | 
			
		||||
 | 
			
		||||
    return extracted_strings
 | 
			
		||||
 | 
			
		||||
def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan_general):
 | 
			
		||||
    if not os.path.exists(filepath):
 | 
			
		||||
        logging.warning(f"Warning: File not found: {filepath}")
 | 
			
		||||
        return
 | 
			
		||||
    
 | 
			
		||||
    with open(filepath, 'r') as f:
 | 
			
		||||
        content = f.read()
 | 
			
		||||
        if scan_general:
 | 
			
		||||
            # Regex for general strings: l[\s|(][\"|"](.*)[\"|"]\)
 | 
			
		||||
            pattern = re.compile(r"l[\s|(][\"|\"](.*?)[\"|\"]\)")
 | 
			
		||||
            found_strings = pattern.findall(content)
 | 
			
		||||
            for s in found_strings:
 | 
			
		||||
                if not s.startswith(f"{prefix}_"):
 | 
			
		||||
                    if s not in extracted_strings_dict:
 | 
			
		||||
                        extracted_strings_dict[s] = []
 | 
			
		||||
                    if filepath not in extracted_strings_dict[s]:
 | 
			
		||||
                        extracted_strings_dict[s].append(filepath)
 | 
			
		||||
        else:
 | 
			
		||||
            # Regex to find <prefix>_<some text>
 | 
			
		||||
            pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+") 
 | 
			
		||||
            found_strings = pattern.findall(content)
 | 
			
		||||
            for s in found_strings:
 | 
			
		||||
                if s not in extracted_strings_dict:
 | 
			
		||||
                    extracted_strings_dict[s] = []
 | 
			
		||||
                if filepath not in extracted_strings_dict[s]:
 | 
			
		||||
                    extracted_strings_dict[s].append(filepath)
 | 
			
		||||
 | 
			
		||||
def read_lex_file(filepath):
 | 
			
		||||
    lex_data = {}
 | 
			
		||||
    if not os.path.exists(filepath):
 | 
			
		||||
        logging.warning(f"Lex file not found: {filepath}. Returning empty dictionary.")
 | 
			
		||||
        return lex_data
 | 
			
		||||
    with open(filepath, 'r') as f:
 | 
			
		||||
        for line in f:
 | 
			
		||||
            match = re.match(r"'(.*?)' => '(.*)'", line.strip())
 | 
			
		||||
            if match:
 | 
			
		||||
                key, value = match.groups()
 | 
			
		||||
                lex_data[key] = value
 | 
			
		||||
    return lex_data
 | 
			
		||||
 | 
			
		||||
def write_lex_file(filepath, lex_data):
 | 
			
		||||
    with open(filepath, 'w') as f:
 | 
			
		||||
        for key, value in lex_data.items():
 | 
			
		||||
            f.write(f"'{key}' => '{value}'\n")
 | 
			
		||||
 | 
			
		||||
def read_languages_json(filepath):
 | 
			
		||||
    if not os.path.exists(filepath):
 | 
			
		||||
        logging.error(f"Error: languages.json file not found at {filepath}")
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
    with open(filepath, 'r') as f:
 | 
			
		||||
        languages = json.load(f)
 | 
			
		||||
    return languages
 | 
			
		||||
 | 
			
		||||
def update_file_with_new_lexical_string(filepath, old_string, new_string):
 | 
			
		||||
    try:
 | 
			
		||||
        with open(filepath, 'r') as f:
 | 
			
		||||
            content = f.read()
 | 
			
		||||
        new_content = content.replace(old_string, new_string)
 | 
			
		||||
        with open(filepath, 'w') as f:
 | 
			
		||||
            f.write(new_content)
 | 
			
		||||
        logging.info(f"Updated \'{old_string}\' to \'{new_string}\' in file: {filepath}")
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logging.error(f"Error updating file {filepath}: {e}")
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.")
 | 
			
		||||
    parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).")
 | 
			
		||||
    parser.add_argument("-s", "--system", default="SME11", help="System name (default: SME11).")
 | 
			
		||||
    
 | 
			
		||||
    args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
    panel = args.panel
 | 
			
		||||
    system = args.system
 | 
			
		||||
 | 
			
		||||
    logging.info(f"Starting scan for panel: {panel}, system: {system}")
 | 
			
		||||
 | 
			
		||||
    validate_panel_name(panel)
 | 
			
		||||
    controller_path = check_controller_file_exists(system, panel)
 | 
			
		||||
    prefix = extract_title_prefix(controller_path)
 | 
			
		||||
 | 
			
		||||
    if prefix:
 | 
			
		||||
        logging.info(f"Scanning application files for strings with prefix \'{prefix}\'...")
 | 
			
		||||
        extracted_panel_strings = scan_application_files(system, panel, prefix)
 | 
			
		||||
        logging.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.")
 | 
			
		||||
 | 
			
		||||
        # Process panel-specific English lexical file
 | 
			
		||||
        # Output to current working directory
 | 
			
		||||
        panel_lex_output_dir = os.path.join(os.getcwd(), "output", panel.capitalize())
 | 
			
		||||
        os.makedirs(panel_lex_output_dir, exist_ok=True)
 | 
			
		||||
        
 | 
			
		||||
        en_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", panel.capitalize(), f"{panel.lower()}_en.lex")
 | 
			
		||||
        en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new")
 | 
			
		||||
 | 
			
		||||
        en_lex_data = read_lex_file(en_lex_path)
 | 
			
		||||
        new_en_lex_data = {}
 | 
			
		||||
 | 
			
		||||
        for lex_string in extracted_panel_strings.keys():
 | 
			
		||||
            if lex_string in en_lex_data:
 | 
			
		||||
                new_en_lex_data[lex_string] = en_lex_data[lex_string]
 | 
			
		||||
            else:
 | 
			
		||||
                # Convert <prefix>_<sometext> to <sometext> with underlines mapped to spaces
 | 
			
		||||
                sometext = lex_string.replace(f"{prefix}_", "").replace("_", " ")
 | 
			
		||||
                new_en_lex_data[lex_string] = sometext
 | 
			
		||||
        
 | 
			
		||||
        write_lex_file(en_lex_new_path, new_en_lex_data)
 | 
			
		||||
        logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}")
 | 
			
		||||
 | 
			
		||||
        # Read languages.json (assuming it\'s in a known path, e.g., /usr/share/smanager/Templates/languages.json)
 | 
			
		||||
        languages_json_path = os.path.join(SYSTEM_BASE_PATH, "Templates", "languages.json") # Placeholder path
 | 
			
		||||
        try:
 | 
			
		||||
            languages = read_languages_json(languages_json_path)
 | 
			
		||||
        except SystemExit:
 | 
			
		||||
            logging.warning(f"Could not read languages.json from {languages_json_path}. Skipping language-specific lexical file processing.")
 | 
			
		||||
            languages = [] # Set to empty list to skip the loop
 | 
			
		||||
 | 
			
		||||
        # Process panel-specific other language lexical files
 | 
			
		||||
        for lang_entry in languages:
 | 
			
		||||
            lang_code = lang_entry["code"]
 | 
			
		||||
            if lang_code == "en": # Skip English, already processed
 | 
			
		||||
                continue
 | 
			
		||||
            
 | 
			
		||||
            lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", panel.capitalize(), f"{panel.lower()}_{lang_code}.lex")
 | 
			
		||||
            lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new")
 | 
			
		||||
 | 
			
		||||
            lang_lex_data = read_lex_file(lang_lex_path)
 | 
			
		||||
            new_lang_lex_data = {}
 | 
			
		||||
 | 
			
		||||
            for lex_string in extracted_panel_strings.keys():
 | 
			
		||||
                if lex_string in lang_lex_data:
 | 
			
		||||
                    new_lang_lex_data[lex_string] = lang_lex_data[lex_string]
 | 
			
		||||
                else:
 | 
			
		||||
                    sometext_from_en = new_en_lex_data.get(lex_string, "") 
 | 
			
		||||
                    new_lang_lex_data[lex_string] = sometext_from_en
 | 
			
		||||
            
 | 
			
		||||
            write_lex_file(lang_lex_new_path, new_lang_lex_data)
 | 
			
		||||
            logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}")
 | 
			
		||||
 | 
			
		||||
        # Scan for general lexical strings
 | 
			
		||||
        logging.info("Scanning application files for general lexical strings...")
 | 
			
		||||
        extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True)
 | 
			
		||||
        logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.")
 | 
			
		||||
 | 
			
		||||
        general_lex_output_dir = os.path.join(os.getcwd(), "output", "general")
 | 
			
		||||
        os.makedirs(general_lex_output_dir, exist_ok=True)
 | 
			
		||||
 | 
			
		||||
        general_en_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", "general_en.lex")
 | 
			
		||||
        general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new")
 | 
			
		||||
 | 
			
		||||
        general_en_lex_data = read_lex_file(general_en_lex_path)
 | 
			
		||||
        new_general_en_lex_data = {}
 | 
			
		||||
 | 
			
		||||
        for lex_string in extracted_general_strings.keys():
 | 
			
		||||
            if lex_string in general_en_lex_data:
 | 
			
		||||
                new_general_en_lex_data[lex_string] = general_en_lex_data[lex_string]
 | 
			
		||||
            else:
 | 
			
		||||
                sometext = lex_string.replace("_", " ")
 | 
			
		||||
                new_general_en_lex_data[lex_string] = sometext
 | 
			
		||||
        
 | 
			
		||||
        write_lex_file(general_en_lex_new_path, new_general_en_lex_data)
 | 
			
		||||
        logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}")
 | 
			
		||||
 | 
			
		||||
        # Process general other language lexical files
 | 
			
		||||
        for lang_entry in languages:
 | 
			
		||||
            lang_code = lang_entry["code"]
 | 
			
		||||
            if lang_code == "en": 
 | 
			
		||||
                continue
 | 
			
		||||
            
 | 
			
		||||
            general_lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", f"general_{lang_code}.lex")
 | 
			
		||||
            general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")
 | 
			
		||||
 | 
			
		||||
            general_lang_lex_data = read_lex_file(general_lang_lex_path)
 | 
			
		||||
            new_general_lang_lex_data = {}
 | 
			
		||||
 | 
			
		||||
            for lex_string in extracted_general_strings.keys():
 | 
			
		||||
                if lex_string in general_lang_lex_data:
 | 
			
		||||
                    new_general_lang_lex_data[lex_string] = general_lang_lex_data[lex_string]
 | 
			
		||||
                else:
 | 
			
		||||
                    sometext_from_en = new_general_en_lex_data.get(lex_string, "")
 | 
			
		||||
                    new_general_lang_lex_data[lex_string] = sometext_from_en
 | 
			
		||||
            
 | 
			
		||||
            write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data)
 | 
			
		||||
            logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}")
 | 
			
		||||
 | 
			
		||||
        # Handle single-word lexical strings
 | 
			
		||||
        logging.info("Handling single-word lexical strings...")
 | 
			
		||||
        for lex_string, filepaths in extracted_panel_strings.items():
 | 
			
		||||
            if lex_string.startswith(f"{prefix}_"):
 | 
			
		||||
                sometext_part = lex_string[len(prefix) + 1:]
 | 
			
		||||
                if "_" not in sometext_part: # It\'s a single word after prefix
 | 
			
		||||
                    just_one_word = sometext_part
 | 
			
		||||
                    
 | 
			
		||||
                    # Check in general_en.lex.new
 | 
			
		||||
                    if just_one_word not in new_general_en_lex_data:
 | 
			
		||||
                        new_general_en_lex_data[just_one_word] = just_one_word
 | 
			
		||||
                        logging.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}")
 | 
			
		||||
                        write_lex_file(general_en_lex_new_path, new_general_en_lex_data)
 | 
			
		||||
 | 
			
		||||
                    # Update other general language files
 | 
			
		||||
                    for lang_entry in languages:
 | 
			
		||||
                        lang_code = lang_entry["code"]
 | 
			
		||||
                        if lang_code == "en": 
 | 
			
		||||
                            continue
 | 
			
		||||
                        general_lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", f"general_{lang_code}.lex")
 | 
			
		||||
                        general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")
 | 
			
		||||
                        
 | 
			
		||||
                        current_general_lang_lex_data = read_lex_file(general_lang_lex_new_path) # Read the .new file
 | 
			
		||||
                        if just_one_word not in current_general_lang_lex_data:
 | 
			
		||||
                            current_general_lang_lex_data[just_one_word] = just_one_word # Assuming same value for now
 | 
			
		||||
                            write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data)
 | 
			
		||||
                            logging.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}")
 | 
			
		||||
 | 
			
		||||
                    # Edit original files
 | 
			
		||||
                    for filepath in filepaths:
 | 
			
		||||
                        update_file_with_new_lexical_string(filepath, lex_string, just_one_word)
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        logging.error("Could not determine prefix, exiting.")
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    main()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user