Working for lex string extract and re-create .lex file

This commit is contained in:
Brian Read 2025-07-04 14:03:15 +01:00
parent 4050d94608
commit 7612dac6b3

View File

@ -6,17 +6,21 @@ import sys
import json import json
# Configure logging # Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
SYSTEM_BASE_PATH = "/usr/share/smanager/" missing_files = []
def validate_panel_name(panel_name): def validate_panel_name(panel_name):
if not panel_name[0].isupper(): if not panel_name[0].isupper():
logging.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.") logging.error(f"Error: Panel name \'{panel_name}\' must start with a capital letter.")
sys.exit(1) sys.exit(1)
def get_full_base_path(system):
return os.path.expanduser(os.path.join("~", system, "usr", "share", "smanager"))
def check_controller_file_exists(system, panel): def check_controller_file_exists(system, panel):
controller_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/Controller", f"{panel}.pm") full_base_path = get_full_base_path(system)
controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm")
if not os.path.exists(controller_path): if not os.path.exists(controller_path):
logging.error(f"Error: Controller file \'{controller_path}\' does not exist.") logging.error(f"Error: Controller file \'{controller_path}\' does not exist.")
sys.exit(1) sys.exit(1)
@ -26,28 +30,29 @@ def extract_title_prefix(controller_path):
prefix = None prefix = None
with open(controller_path, 'r') as f: with open(controller_path, 'r') as f:
content = f.read() content = f.read()
# Corrected regex: match either " or \' for the string enclosure match = re.search(r"\$c->l\([\"|\"]([A-Za-z]{2,4})_.*?\)", content)
match = re.search(r"my \$title = \$c->l\([\"|\"]([A-Za-z]{2,4})_.*?\)", content)
if match: if match:
prefix = match.group(1) prefix = match.group(1)
logging.info(f"Extracted prefix: {prefix}") logging.info(f"Extracted prefix: {prefix}")
else: else:
logging.error(f"Error: Could not find title prefix in \'{controller_path}\'. Expected format: my $title = $c->l(\"<prefix>_something\") or my $title = $c->l(\\'<prefix>_something\\')") logging.error(f"Error: Could not find title prefix in \'{controller_path}\'. Expected format: my $title = $c->l(\"<prefix>_something\") or my $title = $c->l(\\\\\\\\'<prefix>_something\\\")")
sys.exit(1) sys.exit(1)
return prefix return prefix
def scan_application_files(system, panel, prefix, scan_general=False): def scan_application_files(system, panel, prefix, scan_general=False):
extracted_strings = {} extracted_strings = {}
full_base_path = get_full_base_path(system)
# Controller file # Controller file
controller_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/Controller", f"{panel}.pm") controller_path = os.path.join(full_base_path, "lib/SrvMngr/Controller", f"{panel}.pm")
logging.info(f"Scanning controller file: {controller_path}") logging.info(f"Scanning controller file: {controller_path}")
scan_file_for_lexical_strings(controller_path, prefix, extracted_strings, scan_general) scan_file_for_lexical_strings(controller_path, prefix, extracted_strings, scan_general)
# Template files # Template files
themes = ["default", "AdminLTE"] themes = ["default", "AdminLTE"]
for theme in themes: for theme in themes:
template_base_path = os.path.join(SYSTEM_BASE_PATH, "themes", theme, "templates") template_base_path = os.path.join(full_base_path, "themes", theme, "templates")
panel_template_path = os.path.join(template_base_path, f"{panel.lower()}.html.ep") panel_template_path = os.path.join(template_base_path, f"{panel.lower()}.html.ep")
logging.info(f"Scanning panel template file: {panel_template_path}") logging.info(f"Scanning panel template file: {panel_template_path}")
scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general) scan_file_for_lexical_strings(panel_template_path, prefix, extracted_strings, scan_general)
@ -56,7 +61,8 @@ def scan_application_files(system, panel, prefix, scan_general=False):
partials_dir = os.path.join(template_base_path, "partials") partials_dir = os.path.join(template_base_path, "partials")
if os.path.exists(partials_dir): if os.path.exists(partials_dir):
for filename in os.listdir(partials_dir): for filename in os.listdir(partials_dir):
if filename.endswith(".html.ep"): # Only scan partial files that match the pattern _<prefix>_<anything>.html.ep
if filename.startswith(f"_{prefix.lower()}_") and filename.endswith(".html.ep"):
partial_path = os.path.join(partials_dir, filename) partial_path = os.path.join(partials_dir, filename)
logging.info(f"Scanning partial template file: {partial_path}") logging.info(f"Scanning partial template file: {partial_path}")
scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general) scan_file_for_lexical_strings(partial_path, prefix, extracted_strings, scan_general)
@ -65,13 +71,12 @@ def scan_application_files(system, panel, prefix, scan_general=False):
def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan_general): def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan_general):
if not os.path.exists(filepath): if not os.path.exists(filepath):
logging.warning(f"Warning: File not found: {filepath}") missing_files.append(filepath)
return return
with open(filepath, 'r') as f: with open(filepath, 'r') as f:
content = f.read() content = f.read()
if scan_general: if scan_general:
# Regex for general strings: l[\s|(][\"|"](.*)[\"|"]\)
pattern = re.compile(r"l[\s|(][\"|\"](.*?)[\"|\"]\)") pattern = re.compile(r"l[\s|(][\"|\"](.*?)[\"|\"]\)")
found_strings = pattern.findall(content) found_strings = pattern.findall(content)
for s in found_strings: for s in found_strings:
@ -81,7 +86,6 @@ def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan
if filepath not in extracted_strings_dict[s]: if filepath not in extracted_strings_dict[s]:
extracted_strings_dict[s].append(filepath) extracted_strings_dict[s].append(filepath)
else: else:
# Regex to find <prefix>_<some text>
pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+") pattern = re.compile(rf"{prefix}_[a-zA-Z0-9_]+")
found_strings = pattern.findall(content) found_strings = pattern.findall(content)
for s in found_strings: for s in found_strings:
@ -91,27 +95,33 @@ def scan_file_for_lexical_strings(filepath, prefix, extracted_strings_dict, scan
extracted_strings_dict[s].append(filepath) extracted_strings_dict[s].append(filepath)
def read_lex_file(filepath): def read_lex_file(filepath):
logging.info(f"Reading file: {filepath}")
lex_data = {} lex_data = {}
if not os.path.exists(filepath): if not os.path.exists(filepath):
logging.warning(f"Lex file not found: {filepath}. Returning empty dictionary.") logging.warning(f"File not found: {filepath}")
missing_files.append(filepath)
return lex_data return lex_data
with open(filepath, 'r') as f: with open(filepath, 'r', encoding='utf-8') as f:
for line in f: content = f.read()
match = re.match(r"'(.*?)' => '(.*)'", line.strip()) # This regex finds all 'key' => 'value' pairs, even if separated by commas or newlines
if match: pattern = r"'(.*?)'\s*=>\s*'(.*?)(?<!\\)'"
key, value = match.groups() matches = re.findall(pattern, content, re.DOTALL)
lex_data[key] = value for key, value in matches:
# Unescape single quotes inside values
value = value.replace("\\'", "'")
lex_data[key] = value
return lex_data return lex_data
def write_lex_file(filepath, lex_data): def write_lex_file(filepath, lex_data):
with open(filepath, 'w') as f: with open(filepath, 'w') as f:
for key, value in lex_data.items(): for key, value in lex_data.items():
f.write(f"'{key}' => '{value}'\n") f.write(f"\'{key}\' => \'{value}\',{os.linesep}")
def read_languages_json(filepath): def read_languages_json(filepath):
if not os.path.exists(filepath): if not os.path.exists(filepath):
logging.error(f"Error: languages.json file not found at {filepath}") missing_files.append(filepath)
sys.exit(1) return [] # Return empty list instead of exiting
with open(filepath, 'r') as f: with open(filepath, 'r') as f:
languages = json.load(f) languages = json.load(f)
return languages return languages
@ -128,161 +138,180 @@ def update_file_with_new_lexical_string(filepath, old_string, new_string):
logging.error(f"Error updating file {filepath}: {e}") logging.error(f"Error updating file {filepath}: {e}")
def main(): def main():
parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.") parser = argparse.ArgumentParser(description="Scan Mojolicious application files for lexical strings.")
parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).") parser.add_argument("-p", "--panel", required=True, help="Name of the Mojolicious panel (e.g., MyPanel).")
parser.add_argument("-s", "--system", default="SME11", help="System name (default: SME11).") parser.add_argument("-s", "--system", default="SME11", help="System name (default: SME11).")
parser.add_argument("-e", "--edit", action="store_true", help="Enable editing of original files (default: False).")
parser.add_argument("-l", "--lang", action="store_true", help="Enable other language processing (default: False).")
args = parser.parse_args() args = parser.parse_args()
panel = args.panel panel = args.panel
system = args.system system = args.system
edit_files = args.edit
do_lang = args.lang
logging.info(f"Starting scan for panel: {panel}, system: {system}") logging.info(f"Starting scan for panel: {panel}, system: {system} edit: {edit_files} lang: {do_lang}")
#quit(0)
validate_panel_name(panel) validate_panel_name(panel)
controller_path = check_controller_file_exists(system, panel) controller_path = check_controller_file_exists(system, panel)
prefix = extract_title_prefix(controller_path) prefix = extract_title_prefix(controller_path)
if prefix: if prefix:
logging.info(f"Scanning application files for strings with prefix \'{prefix}\'...") logging.info(f"Scanning application files for strings with prefix \'{prefix}\'...")
extracted_panel_strings = scan_application_files(system, panel, prefix) extracted_panel_strings = scan_application_files(system, panel, prefix)
logging.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.") logging.info(f"Deduplicated extracted panel strings: {len(extracted_panel_strings)} unique strings found.")
# Process panel-specific English lexical file # Process panel-specific English lexical file
# Output to current working directory # Output to current working directory
panel_lex_output_dir = os.path.join(os.getcwd(), "output", panel.capitalize()) panel_lex_output_dir = os.path.join(os.getcwd(), "output", panel.capitalize())
os.makedirs(panel_lex_output_dir, exist_ok=True) os.makedirs(panel_lex_output_dir, exist_ok=True)
en_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", panel.capitalize(), f"{panel.lower()}_en.lex") full_base_path = get_full_base_path(system)
en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new")
en_lex_data = read_lex_file(en_lex_path) # Corrected capitalization for panel in path
new_en_lex_data = {} en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_en.lex")
en_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_en.lex.new")
for lex_string in extracted_panel_strings.keys(): en_lex_data = read_lex_file(en_lex_path)
if lex_string in en_lex_data: logging.info(f"Original English lex file lines: {len(en_lex_data)}")
new_en_lex_data[lex_string] = en_lex_data[lex_string] new_en_lex_data = {}
else:
# Convert <prefix>_<sometext> to <sometext> with underlines mapped to spaces
sometext = lex_string.replace(f"{prefix}_", "").replace("_", " ")
new_en_lex_data[lex_string] = sometext
write_lex_file(en_lex_new_path, new_en_lex_data) for lex_string in extracted_panel_strings.keys():
logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}") if lex_string in en_lex_data:
new_en_lex_data[lex_string] = en_lex_data[lex_string]
else:
#Replace rhs by the lhs less the prefix and no underlines, in lowercase (but capitalised)
# this may make a reasonable message, derived from the lex string id.
sometext = lex_string.replace(f"{prefix}_", "").replace("_", " ")
# Split into words
words = sometext.split()
# Lowercase all words, capitalize the first
if words:
words = [words[0].capitalize()] + [w.lower() for w in words[1:]]
sometext = ' '.join(words)
new_en_lex_data[lex_string] = sometext
# Read languages.json (assuming it\'s in a known path, e.g., /usr/share/smanager/Templates/languages.json) write_lex_file(en_lex_new_path, new_en_lex_data)
languages_json_path = os.path.join(SYSTEM_BASE_PATH, "Templates", "languages.json") # Placeholder path logging.info(f"Generated {en_lex_new_path}. Lines in new file: {len(new_en_lex_data)}, Lines in original file: {len(en_lex_data)}")
try:
languages = read_languages_json(languages_json_path)
except SystemExit:
logging.warning(f"Could not read languages.json from {languages_json_path}. Skipping language-specific lexical file processing.")
languages = [] # Set to empty list to skip the loop
# Process panel-specific other language lexical files if do_lang:
for lang_entry in languages: languages_json_path = os.path.join(".", "Templates", "languages.json") # Corrected path
lang_code = lang_entry["code"] languages = read_languages_json(languages_json_path)
if lang_code == "en": # Skip English, already processed
continue
lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", panel.capitalize(), f"{panel.lower()}_{lang_code}.lex") for lang_entry in languages:
lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new") lang_code = lang_entry["code"]
if lang_code == "en":
continue
lang_lex_data = read_lex_file(lang_lex_path) lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", panel, f"{panel.lower()}_{lang_code}.lex")
new_lang_lex_data = {} lang_lex_new_path = os.path.join(panel_lex_output_dir, f"{panel.lower()}_{lang_code}.lex.new")
for lex_string in extracted_panel_strings.keys(): lang_lex_data = read_lex_file(lang_lex_path)
if lex_string in lang_lex_data: logging.info(f"Original {lang_code} lex file lines: {len(lang_lex_data)}")
new_lang_lex_data[lex_string] = lang_lex_data[lex_string] new_lang_lex_data = {}
else:
sometext_from_en = new_en_lex_data.get(lex_string, "")
new_lang_lex_data[lex_string] = sometext_from_en
write_lex_file(lang_lex_new_path, new_lang_lex_data) for lex_string in extracted_panel_strings.keys():
logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}") if lex_string in lang_lex_data:
new_lang_lex_data[lex_string] = lang_lex_data[lex_string]
else:
sometext_from_en = new_en_lex_data.get(lex_string, "")
new_en_lex_data[lex_string] = sometext
new_lang_lex_data[lex_string] = sometext_from_en
# Scan for general lexical strings write_lex_file(lang_lex_new_path, new_lang_lex_data)
logging.info("Scanning application files for general lexical strings...") logging.info(f"Generated {lang_lex_new_path}. Lines in new file: {len(new_lang_lex_data)}, Lines in original file: {len(lang_lex_data)}")
extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True)
logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.")
general_lex_output_dir = os.path.join(os.getcwd(), "output", "general") logging.info("Scanning application files for general lexical strings...")
os.makedirs(general_lex_output_dir, exist_ok=True) extracted_general_strings = scan_application_files(system, panel, prefix, scan_general=True)
logging.info(f"Deduplicated extracted general strings: {len(extracted_general_strings)} unique strings found.")
general_en_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", "general_en.lex") general_lex_output_dir = os.path.join(os.getcwd(), "output", "General")
general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new") os.makedirs(general_lex_output_dir, exist_ok=True)
general_en_lex_data = read_lex_file(general_en_lex_path) general_en_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", "general_en.lex")
new_general_en_lex_data = {} general_en_lex_new_path = os.path.join(general_lex_output_dir, "general_en.lex.new")
for lex_string in extracted_general_strings.keys(): general_en_lex_data = read_lex_file(general_en_lex_path)
if lex_string in general_en_lex_data: logging.info(f"Original general English lex file lines: {len(general_en_lex_data)}")
new_general_en_lex_data[lex_string] = general_en_lex_data[lex_string] new_general_en_lex_data = {}
else:
sometext = lex_string.replace("_", " ")
new_general_en_lex_data[lex_string] = sometext
write_lex_file(general_en_lex_new_path, new_general_en_lex_data) for lex_string in extracted_general_strings.keys():
logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}") if lex_string in general_en_lex_data:
new_general_en_lex_data[lex_string] = general_en_lex_data[lex_string]
else:
sometext = lex_string.replace("_", " ")
# Split into words
words = sometext.split()
# Lowercase all words, capitalize the first
if words:
words = [words[0].capitalize()] + [w.lower() for w in words[1:]]
sometext = ' '.join(words)
new_general_en_lex_data[lex_string] = sometext
write_lex_file(general_en_lex_new_path, new_general_en_lex_data)
logging.info(f"Generated {general_en_lex_new_path}. Lines in new file: {len(new_general_en_lex_data)}, Lines in original file: {len(general_en_lex_data)}")
# Process general other language lexical files if do_lang:
for lang_entry in languages: for lang_entry in languages:
lang_code = lang_entry["code"] lang_code = lang_entry["code"]
if lang_code == "en": if lang_code == "en":
continue continue
general_lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", f"general_{lang_code}.lex") general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex")
general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new") general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")
general_lang_lex_data = read_lex_file(general_lang_lex_path) general_lang_lex_data = read_lex_file(general_lang_lex_path)
new_general_lang_lex_data = {} logging.info(f"Original general {lang_code} lex file lines: {len(general_lang_lex_data)}")
new_general_lang_lex_data = {}
for lex_string in extracted_general_strings.keys(): for lex_string in extracted_general_strings.keys():
if lex_string in general_lang_lex_data: if lex_string in general_lang_lex_data:
new_general_lang_lex_data[lex_string] = general_lang_lex_data[lex_string] new_general_lang_lex_data[lex_string] = general_lang_lex_data[lex_string]
else: else:
sometext_from_en = new_general_en_lex_data.get(lex_string, "") sometext_from_en = new_general_en_lex_data.get(lex_string, "")
new_general_lang_lex_data[lex_string] = sometext_from_en new_general_lang_lex_data[lex_string] = sometext_from_en
write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data) write_lex_file(general_lang_lex_new_path, new_general_lang_lex_data)
logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}") logging.info(f"Generated {general_lang_lex_new_path}. Lines in new file: {len(new_general_lang_lex_data)}, Lines in original file: {len(general_lang_lex_data)}")
# Handle single-word lexical strings if edit_files:
logging.info("Handling single-word lexical strings...") logging.info("Handling single-word lexical strings...")
for lex_string, filepaths in extracted_panel_strings.items(): for lex_string, filepaths in extracted_panel_strings.items():
if lex_string.startswith(f"{prefix}_"): if lex_string.startswith(f"{prefix}_"):
sometext_part = lex_string[len(prefix) + 1:] sometext_part = lex_string[len(prefix) + 1:]
if "_" not in sometext_part: # It\'s a single word after prefix if "_" not in sometext_part:
just_one_word = sometext_part just_one_word = sometext_part
# Check in general_en.lex.new if just_one_word not in new_general_en_lex_data:
if just_one_word not in new_general_en_lex_data: new_general_en_lex_data[just_one_word] = just_one_word
new_general_en_lex_data[just_one_word] = just_one_word logging.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}")
logging.info(f"Added \'{just_one_word}\' to {general_en_lex_new_path}") write_lex_file(general_en_lex_new_path, new_general_en_lex_data)
write_lex_file(general_en_lex_new_path, new_general_en_lex_data)
# Update other general language files for lang_entry in languages:
for lang_entry in languages: lang_code = lang_entry["code"]
lang_code = lang_entry["code"] if lang_code == "en":
if lang_code == "en": continue
continue general_lang_lex_path = os.path.join(full_base_path, "lib/SrvMngr/I18N/Modules", "General", f"general_{lang_code}.lex")
general_lang_lex_path = os.path.join(SYSTEM_BASE_PATH, "lib/SrvMngr/I128N", "general", f"general_{lang_code}.lex") general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")
general_lang_lex_new_path = os.path.join(general_lex_output_dir, f"general_{lang_code}.lex.new")
current_general_lang_lex_data = read_lex_file(general_lang_lex_new_path) # Read the .new file current_general_lang_lex_data = read_lex_file(general_lang_lex_new_path)
if just_one_word not in current_general_lang_lex_data: if just_one_word not in current_general_lang_lex_data:
current_general_lang_lex_data[just_one_word] = just_one_word # Assuming same value for now current_general_lang_lex_data[just_one_word] = just_one_word
write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data) write_lex_file(general_lang_lex_new_path, current_general_lang_lex_data)
logging.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}") logging.info(f"Added \'{just_one_word}\' to {general_lang_lex_new_path}")
# Edit original files for filepath in filepaths:
for filepath in filepaths: update_file_with_new_lexical_string(filepath, lex_string, just_one_word)
update_file_with_new_lexical_string(filepath, lex_string, just_one_word)
else: else:
logging.error("Could not determine prefix, exiting.") logging.error("Could not determine prefix, exiting.")
sys.exit(1) sys.exit(1)
if missing_files:
logging.warning("The following files were not found:")
for f in missing_files:
logging.warning(f"- {f}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()