SM2Gen/sm2gen.py

727 lines
27 KiB
Python
Raw Normal View History

2024-04-09 11:30:13 +02:00
import json5
import sys
import argparse
2024-08-26 15:25:11 +02:00
from chameleon import PageTemplateFile, PageTemplate
2024-04-09 11:30:13 +02:00
import pkg_resources
import xml.etree.ElementTree as ET
2024-04-10 09:41:31 +02:00
import re
import os
from datetime import datetime
from openai import OpenAI
import configparser
import json
from pathlib import Path
2024-04-09 11:30:13 +02:00
2024-05-06 11:05:43 +02:00
#
2024-08-26 15:25:11 +02:00
# To Do
2024-05-06 11:05:43 +02:00
# 1. Get routing working
# 2. Deal with ? and ! as sentence ends in text-format
# 3. Make lexical scan work with double or single quotes (if possible)
2024-08-26 15:25:11 +02:00
SME2Gen_version = "0.8"
2024-04-09 11:30:13 +02:00
json5_dict: dict = {}
json5_html_list: list = []
# Define the path to the INI file
2024-08-26 15:25:11 +02:00
ini_file_path = os.path.expanduser("~/.smegit/conf")
OPENAI_API_KEY = ""
2024-04-09 11:30:13 +02:00
2024-08-26 15:25:11 +02:00
def parse_json(json_obj, prefix=""):
structured_list = []
if isinstance(json_obj, dict):
for k, v in json_obj.items():
new_key = f"{prefix}.{k}" if prefix else k
structured_list.extend(parse_json(v, new_key))
elif isinstance(json_obj, list):
for i, v in enumerate(json_obj):
new_key = f"{prefix}[{i}]"
structured_list.extend(parse_json(v, new_key))
else:
structured_list.append(f"{prefix}: {json_obj}")
return structured_list
2024-04-09 11:30:13 +02:00
def json5_to_list(filename):
2024-08-26 15:25:11 +02:00
with open(filename, "r") as file:
data = json5.load(file)
return parse_json(data)
2024-04-09 11:30:13 +02:00
def json5_to_pandas(filename):
2024-08-26 15:25:11 +02:00
with open(filename, "r") as file:
data = json5.load(file)
print(data)
return data.json_normalize(data)
2024-04-09 11:30:13 +02:00
def json5_to_dict(filename):
2024-08-26 15:25:11 +02:00
with open(filename, "r") as file:
data = json5.load(file)
return data
def rec_print(data, prefix=""):
# Check if this item is a dictionary.
if isinstance(data, dict):
for key, val in data.items():
rec_print(val, f"{prefix}.{key}")
# Check if this item is a list.
elif isinstance(data, list):
for idx, val in enumerate(data):
rec_print(val, f"{prefix}[{idx}]")
# If neither, it's a basic type.
else:
print(f"{prefix}: {data}")
2024-04-09 11:30:13 +02:00
def find_item(nested_dict, target_key):
2024-08-26 15:25:11 +02:00
for key, val in nested_dict.items():
if key == target_key:
return val
elif isinstance(val, dict):
result = find_item(val, target_key)
if result is not None:
return result
def find_dicts_with_key(data, target_key):
2024-08-26 15:25:11 +02:00
results = []
if isinstance(data, dict):
if target_key in data:
results.append(data)
for val in data.values():
if isinstance(val, (dict, list)):
results.extend(find_dicts_with_key(val, target_key))
elif isinstance(data, list):
for item in data:
if isinstance(item, (dict, list)):
results.extend(find_dicts_with_key(item, target_key))
return results
def find_values_with_key(data, target_key):
2024-08-26 15:25:11 +02:00
results = []
if isinstance(data, dict):
if target_key in data:
results.append(data[target_key])
for val in data.values():
if isinstance(val, (dict, list)):
results.extend(find_values_with_key(val, target_key))
elif isinstance(data, list):
for item in data:
if isinstance(item, (dict, list)):
results.extend(find_values_with_key(item, target_key))
return results
2024-04-09 11:30:13 +02:00
def lint_json5(filename):
2024-08-26 15:25:11 +02:00
try:
with open(filename, "r") as file:
data = file.read()
json5.loads(data)
print(f"{filename} as JSON5 data is valid")
except Exception as e:
print(f"{filename} as JSON5 data is invalid")
print("Error:", str(e))
sys.exit()
2024-04-09 11:30:13 +02:00
def flatten_hash_of_lists(hash_of_lists):
2024-08-26 15:25:11 +02:00
flattened = {}
for key, value in hash_of_lists.items():
if isinstance(value, list):
for i, item in enumerate(value):
new_key = (
f"{key}_{i}" # Appending index to the key to maintain uniqueness
)
flattened[new_key] = item
else:
flattened[key] = value
return flattened
2024-04-09 11:30:13 +02:00
def hl(keyname):
2024-08-26 15:25:11 +02:00
# Return highest level value for the keyname
if keyname in json5_dict:
return json5_dict[keyname]
else:
print(f"{keyname} not found in JSON5 - top level")
return "None"
2024-04-09 11:30:13 +02:00
def get_all_routes():
2024-08-26 15:25:11 +02:00
route_list = [html_block.get("route") for html_block in json5_dict.get("html", [])]
return route_list
2024-04-09 11:30:13 +02:00
def lc_get_all_routes():
2024-08-26 15:25:11 +02:00
# All routes in lower case
route_list = [
html_block.get("route").lower() for html_block in json5_dict.get("html", [])
]
return route_list
def has_file_been_modified(file_path):
2024-08-26 15:25:11 +02:00
# Get the file's creation time and last modification time in Unix timestamp
creation_time = os.path.getctime(file_path)
last_modification_time = os.path.getmtime(file_path)
print(f"{creation_time}*{last_modification_time}")
quit()
# Compare the creation time and last modification time
if creation_time < last_modification_time:
return True # File has been modified after creation
else:
return False # File has not been modified after creation
2024-04-09 11:30:13 +02:00
def parse_xml_to_dict(xml_file):
2024-08-26 15:25:11 +02:00
# Parse the XML file
tree = ET.parse(xml_file)
root = tree.getroot()
2024-04-09 11:30:13 +02:00
2024-08-26 15:25:11 +02:00
xml_dict = {} # Initialize an empty dictionary to store the data
2024-04-09 11:30:13 +02:00
2024-08-26 15:25:11 +02:00
# Iterate through the XML tree and extract data
for elem in root:
tag = elem.tag
if elem.text:
xml_dict[tag] = elem.text
else:
cdata_content = elem.find(".//").text # Extract CDATA text
xml_dict[tag] = cdata_content
return xml_dict
2024-04-09 11:30:13 +02:00
2024-04-10 09:41:31 +02:00
def deduplicate_array(arr):
2024-08-26 15:25:11 +02:00
# Convert the array to a set to remove duplicates
unique_set = set(arr)
# Convert the set back to a list to maintain the order
deduplicated_list = list(unique_set)
return deduplicated_list
2024-04-10 09:41:31 +02:00
def get_db_fields():
2024-08-26 15:25:11 +02:00
return []
def get_table_control_data():
2024-08-26 15:25:11 +02:00
return find_values_with_key(json5_html_list, "TableControl")
2024-04-10 09:41:31 +02:00
def format_text(text):
2024-08-26 15:25:11 +02:00
#
# Turn a piece of text into something a bit better formatted - spaces after full stop and comma and also capitalise sentences
#
# Create a list to hold the formatted sentences
formatted_sentences = []
# Split the text into sentences
sentences = text.split(".")
words = sentences[0].split(" ")
# print(len(sentences))
# Deal with one capitalised word
if sentences[0].isupper() and len(sentences) == 1 and len(words) == 1:
return sentences[0].capitalize()
else:
for sentence in sentences:
# print(sentence)
# and splt into sub phrases, based on comma
formatted_phrases = []
phrases = sentence.split(",")
for phrase in phrases:
# print(phrase)
phrase = phrase.lstrip()
formatted_words = []
words = phrase.split(" ")
for i, word in enumerate(words):
# print(i,word)
# Check if the word is fully uppercase or not the first
word = word.lstrip()
if word.isupper() or i != 0:
formatted_words.append(word)
else:
# Capitalize the word
formatted_words.append(word.capitalize())
# Join the formatted words back together for this phrase
formatted_phrase = " ".join(formatted_words).lstrip()
formatted_phrases.append(formatted_phrase)
# and sentence
formatted_sentence = ", ".join(formatted_phrases).lstrip()
formatted_sentences.append(formatted_sentence)
# Join the formatted sentences back together
formatted_text = ". ".join(formatted_sentences).lstrip()
return formatted_text
def get_completion(prompt):
2024-08-26 15:25:11 +02:00
#
# temperature: Controls the randomness of the response. Lower values make the output more focused and deterministic, while higher values make it more creative and random.
# top_p: Controls the diversity of the response. It relates to nucleus sampling, where the model considers only the tokens with the top p probability mass.
# n: Number of completions to generate for each prompt.
# stop: A string or list of strings. Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
# presence_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
# frequency_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
# logit_bias: A dictionary that allows you to manipulate the likelihood of specified tokens appearing in the output.
#
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
temperature=0,
max_tokens=100,
)
# Extracting the content message from the JSON data
response_data = response.to_dict()
content_message = response_data["choices"][0]["message"]["content"]
return content_message.strip()
def get_translation(message="Hello", language="french"):
prompt = f"""Translate the following text in triple ~~~ so that it is suitable for a server management application menu
and only return the actual translation; translate it from from English to {language} ~~~{message}~~~"""
2024-08-26 15:25:11 +02:00
translated_message = get_completion(prompt)
# Look for over long messages
if len(message) / len(translated_message) < 0.2:
print(f"{message} translated to {translated_message}")
quit()
return translated_message
2024-09-16 15:01:17 +02:00
def check_file_version(filename):
#
# check modified versusu creation date of file and return +".new" if modified since creation
#
try:
with open(filename, 'r') as file:
# Read the first three lines
header_lines = [file.readline().strip() for _ in range(3)]
# Extract the timestamp
for line in header_lines:
if 'at' in line:
# Split at 'at', expect the timestamp to be in the third part
timestamp_str = line.split('at')[2].strip()
break
else:
raise ValueError("Invalid file format: no timestamp found.")
# Convert the string timestamp to a datetime object
file_timestamp = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
# Get the last modified time of the file, ignoring milliseconds
file_modified_time = datetime.fromtimestamp(os.path.getmtime(filename)).replace(microsecond=0)
2024-09-16 15:01:17 +02:00
# Compare the timestamps
if file_modified_time > (file_timestamp):
2024-09-16 15:01:17 +02:00
return f"{filename}.new"
else:
return filename
except FileNotFoundError:
print(f"Error: The file '{filename}' does not exist.")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None
2024-08-26 15:25:11 +02:00
def convert_lex_to_dict(pairs_string):
2024-08-26 15:25:11 +02:00
# Splitting pairs string by comma and newline, excluding empty lines
pairs_list = [line for line in pairs_string.split(",\n") if line.strip()]
data_dict = {
pair.split(" => ")[0].strip("'"): pair.split(" => ")[1].strip("'")
for pair in pairs_list
}
formatted_dict = [{"id": key, "text": value} for key, value in data_dict.items()]
return formatted_dict
2024-04-09 11:30:13 +02:00
if __name__ == "__main__":
try:
chameleon_version = pkg_resources.get_distribution("Chameleon").version
except pkg_resources.DistributionNotFound:
chameleon_version = "Version information not available"
python_version = sys.version
version_pattern = r"(\d{1,3}\.\d{1,3}\.\d{1,3})"
version_match = re.search(version_pattern, python_version)
python_version = version_match.group(0) if version_match else "Unknown"
current_datetime = datetime.now()
2024-09-16 15:01:17 +02:00
formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
strVersion = (
"SM2Gen version:"
+ SME2Gen_version
+ " Chameleon version:"
+ chameleon_version
+ " On Python:"
+ python_version
+ " at "
+ formatted_datetime
)
json5_dict: dict = {}
json5_html_list: list = []
print(f"SM2 code from JSON5 - {strVersion}")
home_dir = "/home/brianr/clients/SM2/SM2Gen/"
json_filename = f"{home_dir}/json5/nfsshare.json5" # CreateStarterWebsite.json5"
# read open ai key from ini file
# Check if the file exists
if os.path.exists(ini_file_path):
# Create a configparser object and read the file
config = configparser.ConfigParser()
config.read(ini_file_path)
# Read the value of "OPENAI_API_KEY"
if "OPENAI_API_KEY" in config["smegit"]:
OPENAI_API_KEY = config["smegit"]["OPENAI_API_KEY"]
# print("API Key:", OPENAI_API_KEY)
client = OpenAI(api_key=OPENAI_API_KEY)
else:
print("OPENAI_API_KEY not found in the configuration file.")
else:
print("Configuration file not found at:", file_path)
# Command line parameters - not in use
parser = argparse.ArgumentParser(description="SM2Gen")
parser.add_argument(
"-f",
"--filename",
help="Specify a filename for the JSON5 file",
default=json_filename,
)
parser.add_argument(
"-nco",
"--noController",
help="Stop it creating a controller file",
default="yes",
)
parser.add_argument(
"-nh", "--noHtml", help="Stop it creating html files(s)", default="yes"
)
parser.add_argument(
"-nl",
"--noLang",
help="Stop it creating language localise files(s)",
default="yes",
)
parser.add_argument(
"-ncu",
"--noCust",
help="Stop it creating Custom controller file",
default="yes",
)
args = parser.parse_args()
json_filename = args.filename
print(
f"JSON5 from {json_filename} with noController={args.noController}, noHtml={args.noHtml} and noLang={args.noLang}"
) # Not yet activated
# check if json5 file exists
json_file_path = Path(json_filename)
if not json_file_path.exists():
print(f"json5 file: {json_filename} not found")
quit(1)
# check syntax of JSON5
lint_json5(json_filename)
# Get dict of it all
json5_dict = json5_to_dict(json_filename)
# Get dict of just the html bit
json5_html_list = json5_dict["html"]
# Identify message
print(f"\nGenerating mojo panels for {hl('PackageName')}")
print("-----------------------------------")
# Routes for each panel
routes = get_all_routes()
lc_routes = lc_get_all_routes()
# File names
# Define the path for the generated files
directory_path = Path("Targets/" + hl("PackageName"))
# Create the directory if it doesn't exist
directory_path.mkdir(parents=True, exist_ok=True)
target_directory_path = "Targets/" + hl("PackageName") + "/"
controller_file = target_directory_path + hl("PackageName") + ".pm"
custom_controller_file = target_directory_path + hl("PackageName") + "-Custom.pm"
# Call it .new if one is already there (and may have been editted by the developer)
if os.path.exists(custom_controller_file):
custom_controller_file = custom_controller_file + ".new"
layout_file = target_directory_path + hl("PackageName").lower() + ".html.ep"
partial_files = list()
for panel in routes:
partial_files.append(
target_directory_path + '_' + hl("prefix") + "_" + panel + ".html.ep"
)
print(f"Partial files to be created:{partial_files}")
lex_file = target_directory_path + hl("PackageName").lower() + "_en.lex"
tablecontrols = (
get_table_control_data()
) # arrays of hashes used to drive rows in tables
# print(strVersion,tablecontrols,routes)
# Generate controller file
try:
controller_template = PageTemplateFile(
"Templates/controller.pm.tem", CHAMELEON_DEBUG="true"
)
dbentries = get_db_fields() # Params which correspond to Db fields
try:
controller_perl = controller_template.render(
version=strVersion,
tablecontrols=tablecontrols,
dbentries=dbentries,
**json5_dict,
panels=routes,
lcPackageName=json5_dict["PackageName"].lower(),
)
with open(controller_file, "w") as file:
file.write(controller_perl)
print(f"{controller_file} controller generated ok")
except Exception as e:
print(f"A Chameleon controller render error occurred: {e}")
except Exception as e:
print(f"A Chameleon controller template error occurred: {e}")
# Generate Custom controller file
try:
custom_controller_template = PageTemplateFile("Templates/custom.pm.tem")
try:
custom_controller_perl = custom_controller_template.render(
version=strVersion, panels=routes, tablecontrols=tablecontrols
)
# We must be careful to not overwrite the custom file if the developer has already written to it - TBD
with open(custom_controller_file, "w") as file:
file.write(custom_controller_perl)
print(f"{custom_controller_file} custom controller generated ok")
except Exception as e:
print(f"A Chameleon custom controller render error occurred: {e}")
except Exception as e:
print(f"A Chameleon custom controller template error occurred: {e}")
# generate Layout file
layout_template = PageTemplateFile("Templates/layout.html.ep.tem")
try:
try:
layout_mojo = layout_template.render(
version=strVersion, **json5_dict, conditions=routes
)
with open(layout_file, "w") as file:
file.write(layout_mojo)
print(f"{layout_file} mojo template layout file generated ok")
except Exception as e:
print(f"A Chameleon render on layout file error occurred: {e}")
except Exception as e:
print(f"A Chameleon template layout file error occurred: {e}")
# Generate a partial file for each of the entries in the html list
# Pull in the template code for each of the input types
# html_controls = json5_to_dict('Templates/html_controls.html.ep.tem')
html_controls = parse_xml_to_dict("Templates/html_controls.html.ep.xml")
i = 0
for html in json5_html_list:
# Generate a mojo template file, and then add in the controls
# main file first
try:
partial_template = PageTemplateFile("Templates/partial.html.ep.tem")
partial_mojo_context = {**json5_dict, **html}
try:
partial_mojo_template = partial_template.render(
version=strVersion, **partial_mojo_context
)
with open(partial_files[i], "w") as file:
file.write(partial_mojo_template)
print(f"{partial_files[i]} mojo template generated ok - phase 1")
except Exception as e:
print(
f"A Chameleon render error on partial file {html['route']} occurred: {e}"
)
except Exception as e:
print(f"A Chameleon html {html['route']} error occurred: {e}")
# Now generate the controls from the rest of the entries in the dict.
all_controls_html = ""
prefix_is = hl("prefix")
for html_control in html:
inner_html = html[html_control]
if isinstance(inner_html, dict):
try:
control_template = PageTemplate(html_controls[inner_html["Type"]])
try:
control_html = control_template.render(
version=strVersion, **inner_html, prefix=prefix_is
)
all_controls_html = all_controls_html + control_html
except Exception as e:
print(
f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}"
)
except Exception as e:
print(
f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}"
)
else:
# just a simple entry - name less numerics is type
html_Type = "".join(char for char in html_control if not char.isdigit())
try:
simple_control_template = PageTemplate(html_controls[html_Type])
try:
simple_control_html = simple_control_template.render(
2024-09-15 17:01:09 +02:00
version=strVersion, Value=inner_html, prefix=prefix_is
)
all_controls_html = all_controls_html + simple_control_html
except Exception as e:
print(
f"A Chameleon render on partial file control {html_control} error occurred: {e}"
)
except Exception as e:
print(
f"A Chameleon template partial file control {html_control} error occurred: {e}"
)
# Now insert it into the partial file in the correct place.
# Read in the text file and split at "%# Inputs etc in here."
with open(partial_files[i], "r") as file:
lines = file.readlines()
index = next(
(i for i, line in enumerate(lines) if "%# Inputs etc in here." in line),
len(lines),
)
# Insert the string at the specified index
lines.insert(index + 1, all_controls_html + "\n")
# Write the modified content back to the file
with open(partial_files[i], "w") as file:
file.writelines(lines)
print(f"Content modified and saved to {partial_files[i]}")
i += 1
# Now generate the <name>.en file
# Look through the generated files for the /l[\s|(]['|"](.*)['|"]\)/ strings.
# create a combined list of all the files
all_files = [controller_file, layout_file] + partial_files
all_strings = []
for filename in all_files:
with open(filename, "r") as file:
file_content = file.read()
# Define the regular expression pattern to match the strings you want to extract
pattern = r"l[\s|(][\'|\"](.*)[\'|\"]\)"
# Use re.findall to extract all occurrences of the pattern from the file content
extracted_strings = re.findall(pattern, file_content)
all_strings = all_strings + extracted_strings
# Take out any duplicates
all_strings = deduplicate_array(all_strings)
# '<prefix>_english-message' => 'English Message',
string_lib = [] # Array of dicts
for lex_message in all_strings:
# If has a prefix - leave it for left hand side but delete it for the right
# If has no prefix - add one for left hand side but and leave it for the right
# Map all spaces to "_" on left hand side
# amd truncate it to max five words
original_str = lex_message
# Checkif it starts with the prefix (any case|)
if lex_message.lower().startswith(hl("prefix").lower()):
left_str = lex_message
right_str = lex_message[len(hl("prefix")) + 1 :]
# And take out any "_", map to " "
else:
left_str = hl("prefix") + "_" + lex_message
right_str = lex_message
right_str = right_str.replace("_", " ")
# print(f"Right:{right_str}")
right_str = format_text(right_str)
left_str = left_str.replace(" ", "_")
words = left_str.split("_")[:6]
left_str = "_".join(words)
next_lex_str = {"orig": original_str, "left": left_str, "right": right_str}
string_lib.append(next_lex_str)
# And write it to lex file
# Now process them one by one into the lexical file
lex_all = ""
for lex_str in string_lib:
lex_all += f"'{lex_str['left']}' => '{lex_str['right']}',\n"
print(f"Writing {lex_file}")
with open(lex_file, "w") as file:
file.write(lex_all)
# and then play the strings back into the partials and the layout file
print("..and feed the lex string names back into other files")
for filename in all_files:
with open(filename, "r") as file:
file_content = file.read()
# Scan through
for item in string_lib:
original_str = item["orig"]
left_str = item["left"]
right_str = item["right"]
# Replace all occurrences of original string with left string in 'contents'
file_content = file_content.replace(
"l('" + original_str + "')", "l('" + left_str + "')"
)
# and write it back
with open(filename, "w") as file:
file.write(file_content)
print(f"Write out modified:{filename}")
# Now generate all the translated lex files from a list of the languages and codes
# if specifically requested
if not args.noLang:
languages_path = "Templates/languages.json"
with open(languages_path, "r") as file:
languages_str = file.read()
lang_dict = json.loads(languages_str)
with open(lex_file, "r") as file:
lex_str = file.read()
eng_lex_dict = convert_lex_to_dict(lex_str)
for lang_item in lang_dict:
print(f"Translating from english lex file to {lang_item['language']}")
code = lang_item["code"]
translated_lex_file = (
f"{target_directory_path}{hl('PackageName').lower()}_{code}.lex"
)
# Only do it if the lex file is missing
if not os.path.exists(translated_lex_file):
translated_dict = []
for lex_item in eng_lex_dict:
# Get it from ChatGPT
translated_text = get_translation(
lex_item["text"], lang_item["language"]
)
translated_dict.append(
{"id": lex_item["id"], "text": translated_text}
)
print(f"Writing out lex file for {lang_item['code']}")
with open(translated_lex_file, "w") as file:
for item in translated_dict:
# escape any nasties
translated_text = (
item["text"]
.replace("\\", r"\\")
.replace('"', r"\"")
.replace("'", r"\'")
)
line = (
"'" + item["id"] + "' => " + '"' + translated_text + '",\n'
)
file.write(line)
# print(f"{item['id']} => {item['text']}\n")
else:
print(
f"Skipping the creation of {translated_lex_file} as it exists already"
)
quit() # end of the program