SM2Gen/sm2gen.py

918 lines
32 KiB
Python

import json5
import sys
import argparse
from chameleon import PageTemplateFile, PageTemplate
import pkg_resources
import xml.etree.ElementTree as ET
import re
from openai import OpenAI
import configparser
import json
from pathlib import Path
import traceback
import os
from datetime import datetime, timedelta
from spellchecker import SpellChecker
import logging
#
# To Do
# 1. Get routing working
# 2. Deal with ? and ! as sentence ends in text-format
# 3. Make lexical scan work with double or single quotes (if possible)
SME2Gen_version = "0.8"
json5_dict: dict = {}
json5_html_list: list = []
# Define the path to the INI file
ini_file_path = os.path.expanduser("~/.smegit/conf")
OPENAI_API_KEY = ""
# Configure the basic logging system
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Create a logger
logger = logging.getLogger(__name__)
def example_function(**kwargs):
logger.info(kwargs)
def spell_check_and_correct(text):
try:
# Initialize the spell checker
spell = SpellChecker()
# Tokenize the text into words
words = text.split()
# Create a new list to store corrected words
corrected_words = []
for word in words:
# Skip words that are entirely uppercase
if word.isupper():
corrected_words.append(word)
continue
# Check if the word is correctly spelled
if word in spell:
# Word is correct, add the original word
corrected_words.append(word)
else:
# Word is misspelled, get the correction
correction = spell.correction(word)
# If a correction is found, use it; otherwise, keep the original word
corrected_words.append(correction if correction else word)
# Join the corrected words back into a single string
corrected_text = ' '.join(corrected_words)
except:
logger.warning(f"Spelling checker exception ({text})")
return text
return corrected_text
def python_to_perl_structure(data):
#logger.info(data)
if data:
if isinstance(data, list):
# Convert each dictionary in the list to Perl format: ['text' => 'value']
elements = [
f"['{item['Text']}' => '{item['Value']}']"
for item in data
]
return '[' + ', '.join(elements) + ']'
else:
raise TypeError("Expected a list of dictionaries")
return ""
def assemble_version_string():
try:
chameleon_version = pkg_resources.get_distribution("Chameleon").version
except pkg_resources.DistributionNotFound:
chameleon_version = "No version information"
python_version = sys.version
version_pattern = r"(\d{1,3}\.\d{1,3}\.\d{1,3})"
version_match = re.search(version_pattern, python_version)
python_version = version_match.group(0) if version_match else "Unknown"
current_datetime = datetime.now()
formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
strVersion = (
"SM2Gen version:"
+ SME2Gen_version
+ " Chameleon version:"
+ chameleon_version
+ " On Python:"
+ python_version
+ " at "
+ formatted_datetime
)
return strVersion
def parse_json(json_obj, prefix=""):
structured_list = []
if isinstance(json_obj, dict):
for k, v in json_obj.items():
new_key = f"{prefix}.{k}" if prefix else k
structured_list.extend(parse_json(v, new_key))
elif isinstance(json_obj, list):
for i, v in enumerate(json_obj):
new_key = f"{prefix}[{i}]"
structured_list.extend(parse_json(v, new_key))
else:
structured_list.append(f"{prefix}: {json_obj}")
return structured_list
def json5_to_list(filename):
with open(filename, "r") as file:
data = json5.load(file)
return parse_json(data)
def json5_to_pandas(filename):
with open(filename, "r") as file:
data = json5.load(file)
logger.debug(data)
return data.json_normalize(data)
def json5_to_dict(filename):
with open(filename, "r") as file:
data = json5.load(file)
return data
def rec_print(data, prefix=""):
# Check if this item is a dictionary.
if isinstance(data, dict):
for key, val in data.items():
rec_print(val, f"{prefix}.{key}")
# Check if this item is a list.
elif isinstance(data, list):
for idx, val in enumerate(data):
rec_print(val, f"{prefix}[{idx}]")
# If neither, it's a basic type.
else:
logger.info(f"{prefix}: {data}")
def find_item(nested_dict, target_key):
for key, val in nested_dict.items():
if key == target_key:
return val
elif isinstance(val, dict):
result = find_item(val, target_key)
if result is not None:
return result
def find_dicts_with_key(data, target_key):
results = []
if isinstance(data, dict):
if target_key in data:
results.append(data)
for val in data.values():
if isinstance(val, (dict, list)):
results.extend(find_dicts_with_key(val, target_key))
elif isinstance(data, list):
for item in data:
if isinstance(item, (dict, list)):
results.extend(find_dicts_with_key(item, target_key))
return results
def find_values_with_key(data, target_key):
results = []
if isinstance(data, dict):
if target_key in data:
results.append(data[target_key])
for val in data.values():
if isinstance(val, (dict, list)):
results.extend(find_values_with_key(val, target_key))
elif isinstance(data, list):
for item in data:
if isinstance(item, (dict, list)):
results.extend(find_values_with_key(item, target_key))
return results
def lint_json5(filename):
try:
with open(filename, "r") as file:
data = file.read()
json5.loads(data)
logger.info(f"{filename} as JSON5 data is valid")
except Exception as e:
logger.warning(f"{filename} as JSON5 data is invalid")
logger.warning("Error:", str(e))
sys.exit()
def flatten_hash_of_lists(hash_of_lists):
flattened = {}
for key, value in hash_of_lists.items():
if isinstance(value, list):
for i, item in enumerate(value):
new_key = (
f"{key}_{i}" # Appending index to the key to maintain uniqueness
)
flattened[new_key] = item
else:
flattened[key] = value
return flattened
def hl(keyname):
# Return highest level value for the keyname
if keyname in json5_dict:
return json5_dict[keyname]
else:
logger.warning(f"{keyname} not found in JSON5 - top level")
return "None"
def get_all_routes():
route_list = [html_block.get("route") for html_block in json5_dict.get("html", [])]
return route_list
def lc_get_all_routes():
# All routes in lower case
route_list = [
html_block.get("route").lower() for html_block in json5_dict.get("html", [])
]
return route_list
def has_file_been_modified(file_path):
# Get the file's creation time and last modification time in Unix timestamp
creation_time = os.path.getctime(file_path)
last_modification_time = os.path.getmtime(file_path)
logger.debug(f"{creation_time}*{last_modification_time}")
quit()
# Compare the creation time and last modification time
if creation_time < last_modification_time:
return True # File has been modified after creation
else:
return False # File has not been modified after creation
def parse_xml_to_dict(xml_file):
# Parse the XML file
tree = ET.parse(xml_file)
root = tree.getroot()
xml_dict = {} # Initialize an empty dictionary to store the data
# Iterate through the XML tree and extract data
for elem in root:
tag = elem.tag
if elem.text:
xml_dict[tag] = elem.text
else:
cdata_content = elem.find(".//").text # Extract CDATA text
xml_dict[tag] = cdata_content
return xml_dict
def deduplicate_array(arr):
# Convert the array to a set to remove duplicates
unique_set = set(arr)
# Convert the set back to a list to maintain the order
deduplicated_list = list(unique_set)
return deduplicated_list
def get_db_fields():
return []
def get_table_control_data():
return find_values_with_key(json5_html_list, "TableControl")
def extract_tables(json_data):
result = {}
# Iterate over the 'html' list in the JSON data
for item in json_data.get('html', []):
if isinstance(item, dict):
for key, value in item.items():
if isinstance(value, dict):
if value.get('Type') == 'Table' and 'TableControl' in value:
table_control = value['TableControl']
columns = value.get('Columns', [])
result[table_control] = {'columns': columns}
return result
def format_text(text):
#
# Turn a piece of text into something a bit better formatted - spaces after full stop and comma and also capitalise sentences
#
# Create a list to hold the formatted sentences
formatted_sentences = []
# Split the text into sentences
sentences = text.split(".")
words = sentences[0].split(" ")
# Deal with one capitalised word
if sentences[0].isupper() and len(sentences) == 1 and len(words) == 1:
return sentences[0].capitalize()
else:
for sentence in sentences:
# and splt into sub phrases, based on comma
formatted_phrases = []
phrases = sentence.split(",")
for phrase in phrases:
phrase = phrase.lstrip()
formatted_words = []
words = phrase.split(" ")
for i, word in enumerate(words):
# Check if the word is fully uppercase or not the first
word = word.lstrip()
if word.isupper() or i != 0:
formatted_words.append(word)
else:
# Capitalize the word
formatted_words.append(word.capitalize())
# Join the formatted words back together for this phrase
formatted_phrase = " ".join(formatted_words).lstrip()
formatted_phrases.append(formatted_phrase)
# and sentence
formatted_sentence = ", ".join(formatted_phrases).lstrip()
formatted_sentences.append(formatted_sentence)
# Join the formatted sentences back together
formatted_text = ". ".join(formatted_sentences).lstrip()
return spell_check_and_correct(capitalize_words_if_all_caps(formatted_text))
def get_completion(prompt):
#
# temperature: Controls the randomness of the response. Lower values make the output more focused and deterministic, while higher values make it more creative and random.
# top_p: Controls the diversity of the response. It relates to nucleus sampling, where the model considers only the tokens with the top p probability mass.
# n: Number of completions to generate for each prompt.
# stop: A string or list of strings. Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
# presence_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
# frequency_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
# logit_bias: A dictionary that allows you to manipulate the likelihood of specified tokens appearing in the output.
#
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
temperature=0,
max_tokens=100,
)
# Extracting the content message from the JSON data
response_data = response.to_dict()
content_message = response_data["choices"][0]["message"]["content"]
return content_message.strip()
def get_translation(message="Hello", language="french"):
prompt = f"""Translate the following text in triple ~~~ so that it is suitable for a server management application menu
and only return the actual translation; translate it from from English to {language} ~~~{message}~~~"""
translated_message = get_completion(prompt)
# Look for over long messages
if len(message) / len(translated_message) < 0.2:
logger.error(f"{message} translated to {translated_message}")
quit()
return translated_message
import os
from datetime import datetime, timedelta
def check_file_version(filename,force_Files=False):
#
# Check modified versus creation date of the file and return +".new" if modified since creation + ThresholdSecs
#
if force_Files:
return filename
ThresholdSecs = 70 #allow secs since creation date before treat as changed.
try:
with open(filename, 'r') as file:
# Read the first three lines
header_lines = [file.readline().strip() for _ in range(5)]
# Extract the timestamp
timestamp_str = None
for line in header_lines:
if ' at ' in line:
# Split at 'at', expect the timestamp to be in the third part
timestamp_str = line.split('at')[2].strip()
break
if timestamp_str is None:
logger.warning("Warning: No timestamp found. Returning original filename.")
return filename # Return the original filename if no timestamp is found
# Convert the string timestamp to a datetime object
file_timestamp = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
# Add the threshold seconds to the creation date
file_timestamp += timedelta(seconds=ThresholdSecs)
# Get the last modified time of the file, ignoring milliseconds
file_modified_time = datetime.fromtimestamp(os.path.getmtime(filename)).replace(microsecond=0)
logger.debug(f"{filename} - created:{timestamp_str} - modified:{file_modified_time.strftime('%Y-%m-%d %H:%M:%S')}")
# Compare the timestamps
if file_modified_time > file_timestamp:
return f"{filename}.new"
else:
return filename
except FileNotFoundError:
logger.warning(f"Error: The file '{filename}' does not exist.")
return filename
except Exception as e:
logger.warning(f"An error occurred: {traceback.format_exc()}")
return filename
def convert_lex_to_dict(pairs_string):
# Splitting pairs string by comma and newline, excluding empty lines
pairs_list = [line for line in pairs_string.split(",\n") if line.strip()]
data_dict = {
pair.split(" => ")[0].strip("'"): pair.split(" => ")[1].strip("'")
for pair in pairs_list
}
formatted_dict = [{"id": key, "text": value} for key, value in data_dict.items()]
return formatted_dict
def capitalize_words_if_all_caps(s):
# Check if all characters in the string are uppercase or spaces
if s.replace(" ", "").isupper():
# Capitalize each word
return s.title()
else:
return s
def extract_input_fields(json_data, value_type):
result = {}
# Pattern to identify and strip the specified value type and surrounding brackets
pattern = re.compile(rf"{value_type}\((.*?)\)")
# Iterate over each panel in the 'html' array
for panel in json_data['html']:
panel_name = panel['route']
# Initialize an empty list for each panel
result[panel_name] = []
# Iterate over each item in the panel
for key, value in panel.items():
if key.startswith('Input') and isinstance(value, dict):
if value.get('Type').lower() in ['readonlytext', 'text', 'select','checkbox','textarea','email']:
# input_value = value.get('Value', '')
input_name = value.get('Name', '')
logger.debug(input_name)
# # Match and extract the value without the value_type and parentheses
# match = pattern.search(input_value)
# if match:
# # Extract the inner content of the matching pattern
# clean_value = match.group(1)
# # Take out any double quotes
# clean_value = clean_value.replace('"','')
# # Add the clean value to the list for the current panel
# result[panel_name].append(clean_value)
result[panel_name].append(input_name)
# Note: Empty lists are not removed, so all panels will be present in the result
return result
def contains_chameleon_code(template: str) -> bool:
# Check for common Chameleon code indicators
return "${" in template or "tal:" in template or "metal:" in template
if __name__ == "__main__":
strVersion = assemble_version_string()
json5_dict: dict = {}
json5_html_list: list = []
logger.info(f"SM2 code from JSON5 - {strVersion}")
home_dir = "/home/brianr/clients/SM2/SM2Gen/"
json_filename = f"{home_dir}/json5/nfsshare.json5" # CreateStarterWebsite.json5"
# read open ai key from ini file
# Check if the file exists
if os.path.exists(ini_file_path):
# Create a configparser object and read the file
config = configparser.ConfigParser()
config.read(ini_file_path)
# Read the value of "OPENAI_API_KEY"
if "OPENAI_API_KEY" in config["smegit"]:
OPENAI_API_KEY = config["smegit"]["OPENAI_API_KEY"]
# logger.info("API Key:", OPENAI_API_KEY)
client = OpenAI(api_key=OPENAI_API_KEY)
else:
logger.info("OPENAI_API_KEY not found in the configuration file.")
else:
logger.info("Configuration file not found at:", file_path)
# Command line parameters - not in use
parser = argparse.ArgumentParser(description="SM2Gen")
parser.add_argument(
"-f",
"--filename",
help="Specify a filename for the JSON5 file",
default=json_filename,
)
parser.add_argument(
"--force",
action = 'store_true',
help="Force an overwrite of all files",
)
parser.add_argument(
"--noController",
action = 'store_false',
help="Stop it creating a controller file (not currently implemented)",
)
parser.add_argument(
"--noHtml",
action = 'store_false',
help="Stop it creating html files(s) (not currently implemented)", default=False
)
parser.add_argument(
"--noLang",
action = 'store_false',
help="Stop it creating language localise files(s)",
)
parser.add_argument(
"--noCust",
action = 'store_false',
help="Stop it creating Custom controller file (not currently implemented)",
)
args = parser.parse_args()
json_filename = args.filename
logger.info(
f"JSON5 from {json_filename} with noController={args.noController}, noHtml={args.noHtml} and noLang={args.noLang}"
) # Not yet activated
# check if json5 file exists
json_file_path = Path(json_filename)
if not json_file_path.exists():
logger.info(f"json5 file: {json_filename} not found")
quit(1)
# check syntax of JSON5
lint_json5(json_filename)
# Get dict of it all
try:
json5_dict = json5_to_dict(json_filename)
except Exception as e:
logger.info(f"json5 file {json_filename} failed lint test (e)")
quit(1)
# Get dict of just the html bit
json5_html_list = json5_dict["html"]
# Identify message
logger.info(f"Generating mojo panels for {hl('PackageName')}")
logger.info("-----------------------------------")
# Routes for each panel
routes = get_all_routes()
lc_routes = lc_get_all_routes()
#
# File names
#
# Define the path for the generated files
# force them to be overwritten
force_Files = args.force
directory_path = Path("Targets/" + hl("PackageName"))
# Create the directory if it doesn't exist
directory_path.mkdir(parents=True, exist_ok=True)
target_directory_path = "Targets/" + hl("PackageName") + "/"
controller_file = check_file_version(target_directory_path + hl("PackageName") + ".pm",force_Files)
custom_controller_file = check_file_version(target_directory_path + hl("PackageName") + "-Custom.pm",force_Files)
#logger.info(custom_controller_file)
layout_file = check_file_version(target_directory_path + hl("PackageName").lower() + ".html.ep",force_Files)
css_file = check_file_version(target_directory_path + hl("PackageName").lower() + ".css",force_Files)
partial_files = list()
for panel in routes:
partial_files.append(check_file_version(
target_directory_path + '_' + hl("prefix") + "_" + panel + ".html.ep",force_Files)
)
logger.debug(f"Partial files to be created:{partial_files}")
lex_file = check_file_version(target_directory_path + hl("PackageName").lower() + "_en.lex",force_Files)
logger.info(lex_file)
tablecontrols = extract_tables(json5_dict)
# arrays of hashes used to drive rows in tables
# Generate controller file
dbfields = [] #extract_input_fields(json5_dict, 'db') # Params which correspond to Db fields - TBD
logger.debug(f"{tablecontrols}")
logger.debug(f"{tablecontrols.items()}")
#quit(1)
try:
controller_template = PageTemplateFile(
"Templates/controller.pm.tem", CHAMELEON_DEBUG="true"
)
try:
controller_perl = controller_template.render(
version=strVersion,
tablecontrols=tablecontrols.items(),
dbfields=dbfields,
**json5_dict,
panels=routes,
lcPackageName=json5_dict["PackageName"].lower(),
)
with open(controller_file, "w") as file:
file.write(controller_perl)
logger.info(f"{controller_file} controller generated ok")
except Exception as e:
logger.info(f"A Chameleon controller *render* error occurred: {e} {traceback.format_exc()}")
except Exception as e:
logger.info(f"A Chameleon controller *template* error occurred: {e} {traceback.format_exc()}")
# Generate Custom controller file
try:
custom_controller_template = PageTemplateFile("Templates/custom.pm.tem")
fields = extract_input_fields(json5_dict, 'stash') # Params which correspond to singleton values
#flatfields = flatten_hash_of_lists(fields)
#logger.info(fields)
#quit(0)
try:
custom_controller_perl = custom_controller_template.render(
version=strVersion,
panels=routes,
tablecontrols=tablecontrols.items(),
fields=fields,
dbfields=dbfields
)
# We must be careful to not overwrite the custom file if the developer has already written to it - TBD
with open(custom_controller_file, "w") as file:
file.write(custom_controller_perl)
logger.info(f"{custom_controller_file} custom controller generated ok")
except Exception as e:
logger.info(f"A Chameleon custom controller *render* error occurred: {e} {traceback.format_exc()}")
except Exception as e:
logger.info(f"A Chameleon custom controller *template* error occurred: {e} {traceback.format_exc()}")
# generate Layout file
layout_template = PageTemplateFile("Templates/layout.html.ep.tem")
try:
try:
layout_mojo = layout_template.render(
version=strVersion, **json5_dict, conditions=routes,
lcPackageName=json5_dict["PackageName"].lower()
)
with open(layout_file, "w") as file:
file.write(layout_mojo)
logger.info(f"{layout_file} mojo template layout file generated ok")
except Exception as e:
logger.info(f"A Chameleon *render* on layout file error occurred: {e}")
except Exception as e:
logger.info(f"A Chameleon *template* layout file error occurred: {e}")
# Generate a partial file for each of the entries in the html list
# Pull in the template code for each of the input types
# html_controls = json5_to_dict('Templates/html_controls.html.ep.tem')
html_controls = parse_xml_to_dict("Templates/html_controls.html.ep.xml")
i = 0
acc_css_entries = ""; #One entry for each class created for html entries
for html in json5_html_list:
# Generate a mojo template file, and then add in the controls
# main file first
try:
partial_template = PageTemplateFile("Templates/partial.html.ep.tem")
partial_mojo_context = {**json5_dict, **html}
try:
partial_mojo_template = partial_template.render(
version=strVersion, **partial_mojo_context,
lcPackageName=json5_dict["PackageName"].lower()
)
with open(partial_files[i], "w") as file:
file.write(partial_mojo_template)
logger.info(f"{partial_files[i]} mojo template generated ok - phase 1")
except Exception as e:
logger.info(
f"A Chameleon render error on partial file {html['route']} occurred: {e}"
)
except Exception as e:
logger.info(f"A Chameleon html {html['route']} error occurred: {e}")
# Now generate the controls from the rest of the entries in the dict.
all_controls_html = ""
prefix_is = hl("prefix")
for html_control in html:
inner_html = html[html_control]
if isinstance(inner_html, dict):
# input or table
html_Type = inner_html['Type']
type_serial = "".join(char for char in html_control if char.isdigit())
class_name = html_Type.lower()[:4]+type_serial
acc_css_entries += f".{class_name} {{}}\n"
OptionsInPerl = python_to_perl_structure(inner_html.get("Options", ""))
#if html_Type == "Select":
# example_function(**inner_html)
# quit(1)
if html_Type == "Table":
acc_css_entries += f"thead .{class_name} {{}}\n"
acc_css_entries += f"tbody .{class_name} {{}}\n"
try:
control_template = PageTemplate(html_controls[inner_html["Type"]])
try:
control_html = control_template.render(
version=strVersion, **inner_html, prefix=prefix_is,
classname=class_name,
type_serial=type_serial,
OptionsInPerl=OptionsInPerl
)
all_controls_html = all_controls_html + control_html
except Exception as e:
logger.info(
f"A Chameleon *render* on partial file control {html_control} error occurred: {e}"
)
except Exception as e:
logger.info(
f"A Chameleon *template* on partial file control {html_control} error occurred: {e}"
)
else:
# just a simple entry - name less numerics is type
# If the html does not include any Chameleon / TAL symbols, then do not run the Template extraction, just
# insert the result of the html directly. This avoids Chameleon aborting things when a closing tag is on its own
# such as the "Endgroup" token.
html_Type = "".join(char for char in html_control if not char.isdigit())
type_serial = "".join(char for char in html_control if char.isdigit())
class_name = html_Type.lower()[:4]+type_serial
acc_css_entries += f".{class_name} {{}}\n"
simple_control_html = ""
logger.debug(f"Partial ep generation html type:{html_Type}")
if not type_serial == "":
logger.debug(f"{html_control},{html_Type},{type_serial}")
if html_Type in html_controls:
if contains_chameleon_code(html_controls[html_Type]):
try:
simple_control_template = PageTemplate(html_controls[html_Type])
try:
simple_control_html = simple_control_template.render(
version=strVersion, Value=inner_html, prefix=prefix_is,
type_serial=type_serial
)
except Exception as e:
logger.warning(
f"A Chameleon *render* on partial file control {html_control} error occurred: {e}"
)
except Exception as e:
logger.warning(
f"A Chameleon *template* partial file control {html_control} error occurred: {e}"
)
else:
logger.debug(f"Skipping Chameleon expansion for {html_control}")
simple_control_html = html_controls[html_Type]
all_controls_html = all_controls_html + simple_control_html
else:
logger.debug(f"{html_Type} not found in html_controls xml")
# Now insert it into the partial file in the correct place.
# Read in the text file and split at "%# Inputs etc in here."
with open(partial_files[i], "r") as file:
lines = file.readlines()
index = next(
(i for i, line in enumerate(lines) if "%# Inputs etc in here." in line),
len(lines),
)
# Insert the string at the specified index
lines.insert(index + 1, all_controls_html + "\n")
# Write the modified content back to the file
with open(partial_files[i], "w") as file:
file.writelines(lines)
logger.info(f"Content modified and saved to {partial_files[i]}")
i += 1
# Create the css file (the header, followed by a dumy entry for each class created/used above)
with open(css_file, "w") as file:
file.write(f"/*\nGenerated by SM2Gen version: {strVersion}\n*/\n")
file.write(f".{hl('PackageName')}-panel {{}}\n")
file.write(acc_css_entries);
# Now generate the <name>.en file
# Look through the generated files for the /l[\s|(]['|"](.*)['|"]\)/ strings.
# create a combined list of all the files
all_files = [controller_file, layout_file] + partial_files
all_strings = []
for filename in all_files:
with open(filename, "r") as file:
file_content = file.read()
# Define the regular expression pattern to match the strings you want to extract
pattern = r"l[\s|(][\'|\"](.*)[\'|\"]\)"
# Use re.findall to extract all occurrences of the pattern from the file content
extracted_strings = re.findall(pattern, file_content)
all_strings = all_strings + extracted_strings
# Take out any duplicates
all_strings = deduplicate_array(all_strings)
# '<prefix>_english-message' => 'English Message',
string_lib = [] # Array of dicts
for lex_message in all_strings:
# If has a prefix - leave it for left hand side but delete it for the right
# If has no prefix - add one for left hand side but and leave it for the right
# Map all spaces to "_" on left hand side
# amd truncate it to max five words
original_str = lex_message
# Checkif it starts with the prefix (any case|)
if lex_message.lower().startswith(hl("prefix").lower()):
left_str = lex_message
right_str = lex_message[len(hl("prefix")) + 1 :]
# And take out any "_", map to " "
else:
left_str = hl("prefix") + "_" + lex_message
right_str = lex_message
right_str = right_str.replace("_", " ")
right_str = format_text(right_str)
left_str = left_str.replace(" ", "_")
words = left_str.split("_")[:6]
left_str = "_".join(words)
next_lex_str = {"orig": original_str, "left": left_str, "right": right_str}
string_lib.append(next_lex_str)
# And write it to lex file
# Now process them one by one into the lexical file
lex_all = ""
for lex_str in string_lib:
lex_all += f"'{lex_str['left']}' => '{lex_str['right']}',\n"
logger.info(f"Writing {lex_file}")
with open(lex_file, "w") as file:
file.write(f"#\n# Generated by SM2Gen version: {strVersion}\n#\n")
file.write(lex_all)
# and then play the strings back into the partials and the layout file
logger.debug("..and feed the lex string names back into other files")
for filename in all_files:
with open(filename, "r") as file:
file_content = file.read()
# Scan through
for item in string_lib:
original_str = item["orig"]
left_str = item["left"]
right_str = item["right"]
# Replace all occurrences of original string with left string in 'contents'
file_content = file_content.replace(
"l('" + original_str + "')", "l('" + left_str + "')"
)
# and write it back
with open(filename, "w") as file:
file.write(file_content)
logger.info(f"Write out modified:{filename}")
# Now generate all the translated lex files from a list of the languages and codes
# if specifically requested
if args.noLang:
languages_path = "Templates/languages.json"
with open(languages_path, "r") as file:
languages_str = file.read()
lang_dict = json.loads(languages_str)
with open(lex_file, "r") as file:
lex_str = file.read()
eng_lex_dict = convert_lex_to_dict(lex_str)
for lang_item in lang_dict:
logger.info(f"Translating from english lex file to {lang_item['language']}")
code = lang_item["code"]
translated_lex_file = check_file_version(
f"{target_directory_path}{hl('PackageName').lower()}_{code}.lex"
)
# Only do it if the lex file is missing - Removed bjr 18Sept2024
#if not os.path.exists(translated_lex_file):
translated_dict = []
for lex_item in eng_lex_dict:
# Get it from ChatGPT
translated_text = get_translation(
lex_item["text"], lang_item["language"]
)
translated_dict.append(
{"id": lex_item["id"], "text": translated_text}
)
logger.info(f"Writing out lex file for {lang_item['code']}")
with open(translated_lex_file, "w") as file:
file.write(f"#\n# Generated by SM2Gen version: {strVersion}\n#\n")
for item in translated_dict:
# escape any nasties
translated_text = (
item["text"]
.replace("\\", r"\\")
.replace('"', r"\"")
.replace("'", r"\'")
)
line = (
"'" + item["id"] + "' => " + '"' + translated_text + '",\n'
)
file.write(line)
# logger.info(f"{item['id']} => {item['text']}\n")
#else:
# logger.info(
# f"Skipping the creation of {translated_lex_file} as it exists already"
# )
quit() # end of the program