Files
SM2Gen/sm2gen.py

557 lines
20 KiB
Python
Raw Normal View History

2024-04-09 09:30:13 +00:00
import json5
import sys
import argparse
from chameleon import PageTemplateFile,PageTemplate
import pkg_resources
import xml.etree.ElementTree as ET
2024-04-10 08:41:31 +01:00
import re
import os
import pkg_resources
from datetime import datetime
import xml.etree.ElementTree as ET
from openai import OpenAI
import configparser
import json
2024-04-09 09:30:13 +00:00
2024-05-06 10:05:43 +01:00
#
#To Do
# 1. Get routing working
# 2. Deal with ? and ! as sentence ends in text-format
# 3. Make lexical scan work with double or single quotes (if possible)
SME2Gen_version = '0.8'
2024-04-09 09:30:13 +00:00
json5_dict: dict = {}
json5_html_list: list = []
# Define the path to the INI file
ini_file_path = os.path.expanduser('~/.smegit/conf')
OPENAI_API_KEY = ""
2024-04-09 09:30:13 +00:00
def parse_json(json_obj, prefix=''):
structured_list = []
if isinstance(json_obj, dict):
for k, v in json_obj.items():
new_key = f"{prefix}.{k}" if prefix else k
structured_list.extend(parse_json(v, new_key))
elif isinstance(json_obj, list):
for i, v in enumerate(json_obj):
new_key = f"{prefix}[{i}]"
structured_list.extend(parse_json(v, new_key))
else:
structured_list.append(f"{prefix}: {json_obj}")
return structured_list
2024-04-09 09:30:13 +00:00
def json5_to_list(filename):
with open(filename, 'r') as file:
data = json5.load(file)
return parse_json(data)
2024-04-09 09:30:13 +00:00
def json5_to_pandas(filename):
with open(filename, 'r') as file:
data = json5.load(file)
print (data)
return data.json_normalize(data)
2024-04-09 09:30:13 +00:00
def json5_to_dict(filename):
with open(filename, 'r') as file:
data = json5.load(file)
return data
2024-04-09 09:30:13 +00:00
def rec_print(data, prefix=''):
# Check if this item is a dictionary.
if isinstance(data, dict):
for key, val in data.items():
rec_print(val, f"{prefix}.{key}")
# Check if this item is a list.
elif isinstance(data, list):
for idx, val in enumerate(data):
rec_print(val, f"{prefix}[{idx}]")
# If neither, it's a basic type.
else:
print(f"{prefix}: {data}")
2024-04-09 09:30:13 +00:00
def find_item(nested_dict, target_key):
for key, val in nested_dict.items():
if key == target_key:
return val
elif isinstance(val, dict):
result = find_item(val, target_key)
if result is not None:
return result
2024-04-09 09:30:13 +00:00
def find_dicts_with_key(data, target_key):
results = []
if isinstance(data, dict):
if target_key in data:
results.append(data)
for val in data.values():
if isinstance(val, (dict, list)):
results.extend(find_dicts_with_key(val, target_key))
elif isinstance(data, list):
for item in data:
if isinstance(item, (dict, list)):
results.extend(find_dicts_with_key(item, target_key))
return results
def find_values_with_key(data, target_key):
results = []
if isinstance(data, dict):
if target_key in data:
results.append(data[target_key])
for val in data.values():
if isinstance(val, (dict, list)):
results.extend(find_values_with_key(val, target_key))
elif isinstance(data, list):
for item in data:
if isinstance(item, (dict, list)):
results.extend(find_values_with_key(item, target_key))
return results
2024-04-09 09:30:13 +00:00
def lint_json5(filename):
try:
with open(filename, 'r') as file:
data = file.read()
json5.loads(data)
print(f"{filename} as JSON5 data is valid")
except Exception as e:
print(f"{filename} as JSON5 data is invalid")
print("Error:", str(e))
sys.exit()
2024-04-09 09:30:13 +00:00
def flatten_hash_of_lists(hash_of_lists):
flattened = {}
for key, value in hash_of_lists.items():
if isinstance(value, list):
for i, item in enumerate(value):
new_key = f"{key}_{i}" # Appending index to the key to maintain uniqueness
flattened[new_key] = item
else:
flattened[key] = value
return flattened
2024-04-09 09:30:13 +00:00
def hl(keyname):
# Return highest level value for the keyname
if keyname in json5_dict:
return json5_dict[keyname]
else:
print(f"{keyname} not found in JSON5 - top level")
return 'None'
def get_all_routes():
route_list = [html_block.get('route') for html_block in json5_dict.get('html', [])]
return route_list
2024-04-09 09:30:13 +00:00
def lc_get_all_routes():
# All routes in lower case
route_list = [html_block.get('route').lower() for html_block in json5_dict.get('html', [])]
return route_list
def has_file_been_modified(file_path):
# Get the file's creation time and last modification time in Unix timestamp
creation_time = os.path.getctime(file_path)
last_modification_time = os.path.getmtime(file_path)
print(f"{creation_time}*{last_modification_time}")
quit()
# Compare the creation time and last modification time
if creation_time < last_modification_time:
return True # File has been modified after creation
else:
return False # File has not been modified after creation
2024-04-09 09:30:13 +00:00
def parse_xml_to_dict(xml_file):
# Parse the XML file
tree = ET.parse(xml_file)
root = tree.getroot()
2024-04-09 09:30:13 +00:00
xml_dict = {} # Initialize an empty dictionary to store the data
2024-04-09 09:30:13 +00:00
# Iterate through the XML tree and extract data
for elem in root:
tag = elem.tag
if elem.text:
xml_dict[tag] = elem.text
else:
cdata_content = elem.find('.//').text # Extract CDATA text
xml_dict[tag] = cdata_content
2024-04-09 09:30:13 +00:00
return xml_dict
2024-04-09 09:30:13 +00:00
2024-04-10 08:41:31 +01:00
def deduplicate_array(arr):
# Convert the array to a set to remove duplicates
unique_set = set(arr)
# Convert the set back to a list to maintain the order
deduplicated_list = list(unique_set)
return deduplicated_list
2024-04-10 08:41:31 +01:00
def get_db_fields():
return []
def get_table_control_data():
return find_values_with_key(json5_html_list,'TableControl')
2024-04-10 08:41:31 +01:00
def format_text(text):
#
# Turn a piece of text into something a bit better formatted - spaces after full stop and comma and also capitalise sentences
#
# Create a list to hold the formatted sentences
formatted_sentences = []
# Split the text into sentences
sentences = text.split(".")
words = sentences[0].split(" ")
#print(len(sentences))
# Deal with one capitalised word
if sentences[0].isupper() and len(sentences) == 1 and len(words) == 1:
return sentences[0].capitalize()
else:
for sentence in sentences:
#print(sentence)
# and splt into sub phrases, based on comma
formatted_phrases = []
phrases = sentence.split(",")
for phrase in phrases:
#print(phrase)
phrase = phrase.lstrip()
formatted_words = []
words = phrase.split(' ')
for i,word in enumerate(words):
#print(i,word)
# Check if the word is fully uppercase or not the first
word = word.lstrip()
if word.isupper() or i != 0:
formatted_words.append(word)
else:
# Capitalize the word
formatted_words.append(word.capitalize())
# Join the formatted words back together for this phrase
formatted_phrase = ' '.join(formatted_words).lstrip()
formatted_phrases.append(formatted_phrase)
# and sentence
formatted_sentence = ", ".join(formatted_phrases).lstrip()
formatted_sentences.append(formatted_sentence)
# Join the formatted sentences back together
formatted_text = ". ".join(formatted_sentences).lstrip()
return formatted_text
def get_completion(prompt):
response = client.chat.completions.create(model="gpt-4o",
messages=[{'role':"user", 'content':prompt}],
max_tokens=100)
# Extracting the content message from the JSON data
response_data = response.to_dict()
content_message = response_data["choices"][0]["message"]["content"]
return content_message.strip()
def get_translation(message="Hello",language="french"):
prompt = f"""Translate the following text in triple ~~~ so that it is suitable for a server management application menu
and only return the actual translation; translate it from from English to {language} ~~~{message}~~~"""
translated_message = get_completion(prompt)
# Look for over long messages
if len(message)/len(translated_message) < 0.2:
print(f"{message} translated to {translated_message}")
quit()
return translated_message
def convert_lex_to_dict(pairs_string):
# Splitting pairs string by comma and newline, excluding empty lines
pairs_list = [line for line in pairs_string.split(",\n") if line.strip()]
data_dict = {pair.split(" => ")[0].strip("'"): pair.split(" => ")[1].strip("'") for pair in pairs_list}
formatted_dict = [{"id": key, "text": value} for key, value in data_dict.items()]
return formatted_dict
2024-04-09 09:30:13 +00:00
if __name__ == "__main__":
try:
chameleon_version = pkg_resources.get_distribution("Chameleon").version
except pkg_resources.DistributionNotFound:
chameleon_version = "Version information not available"
python_version = sys.version
python_version = python_version[:8]
current_datetime = datetime.now()
formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M")
strVersion = "SM2Gen version:"+SME2Gen_version+" Chameleon version:"+chameleon_version+" On Python:"+python_version+" at "+formatted_datetime
json5_dict: dict = {}
json5_html_list: list = []
print(f"SM2 code from JSON5 - {strVersion}")
home_dir = "/home/brianr/clients/SM2/SM2Gen/"
json_filename = f"{home_dir}nfsshare.json5"
# read open ai key from ini file
# Check if the file exists
if os.path.exists(ini_file_path):
# Create a configparser object and read the file
config = configparser.ConfigParser()
config.read(ini_file_path)
# Read the value of "OPENAI_API_KEY"
if 'OPENAI_API_KEY' in config['smegit']:
OPENAI_API_KEY = config['smegit']['OPENAI_API_KEY']
#print("API Key:", OPENAI_API_KEY)
client = OpenAI(api_key=OPENAI_API_KEY)
else:
print("OPENAI_API_KEY not found in the configuration file.")
else:
print("Configuration file not found at:", file_path)
# Command line parameters - not in use
parser = argparse.ArgumentParser(description="SM2Gen")
parser.add_argument('-f', '--filename', help='Specify a filename for the JSON5 file', default=json_filename)
parser.add_argument('-nco', '--noController', help='Stop it creating a controller file', default="no")
parser.add_argument('-nh', '--noHtml', help='Stop it creating html files(s)', default="no")
parser.add_argument('-nl', '--noLang', help='Stop it creating language localise files(s)', default="no")
parser.add_argument('-ncu', '--noCust', help='Stop it creating Custom controller file', default="no")
args = parser.parse_args()
json_filename = args.filename
print(f"JSON5 from {json_filename} with noController={args.noController}, noHtml={args.noHtml} and noLang={args.noLang}") #Not yet activated
2024-04-09 09:30:13 +00:00
# check syntax of JSON5
lint_json5(json_filename);
# Get dict of it all
json5_dict = json5_to_dict(json_filename)
# Get dict of just the html bit
json5_html_list = json5_dict['html']
#Identify message
print(f"\nGenerating mojo panels for {hl('PackageName')}")
print( "-----------------------------------")
# Routes for each panel
routes = get_all_routes();
lc_routes =lc_get_all_routes();
#File names
controller_file = 'Targets/'+hl('PackageName')+'.pm'
custom_controller_file = 'Targets/'+hl('PackageName')+'-Custom.pm'
# Call it .new if one is already there (and may have been editted by the developer)
if os.path.exists(custom_controller_file):
custom_controller_file = custom_controller_file+'.new'
layout_file = 'Targets/'+hl('PackageName').lower()+'.html.ep'
partial_files = list()
for panel in routes:
partial_files.append('Targets/_'+hl('prefix')+"_"+panel+'.html.ep')
print(partial_files)
lex_file = 'Targets/'+hl('PackageName').lower()+'_en.lex'
tablecontrols = get_table_control_data() #arrays of hashes used to drive rows in tables
#Generate controller file
try:
controller_template = PageTemplateFile("Templates/controller.pm.tem")
dbentries = get_db_fields() #Params which correspond to Db fields
try:
controller_perl = controller_template.render(version=strVersion,
2024-04-28 12:03:06 +01:00
tablecontrols=tablecontrols,
dbentries=dbentries,
**json5_dict,
panels=routes,
lcPackageName=json5_dict['PackageName'].lower()
)
with open(controller_file, 'w') as file:
file.write(controller_perl)
print(f"{controller_file} controller generated ok")
except Exception as e:
print(f"A Chameleon controller render error occurred: {e}")
except Exception as e:
print(f"A Chameleon controller template error occurred: {e}")
#Generate Custom controller file
try:
custom_controller_template = PageTemplateFile("Templates/custom.pm.tem")
try:
custom_controller_perl = custom_controller_template.render(version=strVersion,
2024-04-28 12:03:06 +01:00
panels=routes,
tablecontrols=tablecontrols
)
# We must be careful to not overwrite the custom file if the developer has already written to it - TBD
with open(custom_controller_file, 'w') as file:
file.write(custom_controller_perl)
print(f"{custom_controller_file} custom controller generated ok")
except Exception as e:
print(f"A Chameleon custom controller render error occurred: {e}")
except Exception as e:
print(f"A Chameleon custom controller template error occurred: {e}")
#generate Layout file
layout_template = PageTemplateFile("Templates/layout.html.ep.tem")
try:
try:
layout_mojo = layout_template.render(version=strVersion,**json5_dict,conditions=routes)
with open(layout_file, 'w') as file:
file.write(layout_mojo)
print(f"{layout_file} mojo template layout file generated ok")
except Exception as e:
print(f"A Chameleon render on layout file error occurred: {e}")
except Exception as e:
print(f"A Chameleon template layout file error occurred: {e}")
#Generate a partial file for each of the entries in the html list
#Pull in the template code for each of the input types
#html_controls = json5_to_dict('Templates/html_controls.html.ep.tem')
html_controls = parse_xml_to_dict('Templates/html_controls.html.ep.xml')
i = 0
for html in json5_html_list:
# Generate a mojo template file, and then add in the controls
# main file first
try:
partial_template = PageTemplateFile("Templates/partial.html.ep.tem")
partial_mojo_context = {**json5_dict,**html}
try:
partial_mojo_template = partial_template.render(version=strVersion,**partial_mojo_context)
with open( partial_files[i], 'w') as file:
file.write(partial_mojo_template)
print(f"{partial_files[i]} mojo template generated ok - phase 1")
except Exception as e:
print(f"A Chameleon render error on partial file {html['route']} occurred: {e}")
except Exception as e:
print(f"A Chameleon html {html['route']} error occurred: {e}")
#Now generate the controls from the rest of the entries in the dict.
all_controls_html = "";
prefix_is = hl('prefix')
for html_control in html:
inner_html = html[html_control]
if isinstance(inner_html, dict):
try:
control_template = PageTemplate(html_controls[inner_html['Type']])
try:
control_html = control_template.render(version=strVersion,**inner_html,prefix=prefix_is)
all_controls_html = all_controls_html + control_html
except Exception as e:
print(f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}")
except Exception as e:
print(f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}")
else:
#just a simple entry - name less numerics is type
html_Type = ''.join(char for char in html_control if not char.isdigit())
try:
simple_control_template = PageTemplate(html_controls[html_Type])
try:
simple_control_html = simple_control_template.render(version=strVersion,value=inner_html,prefix=prefix_is)
all_controls_html = all_controls_html + simple_control_html
except Exception as e:
print(f"A Chameleon render on partial file control {html_control} error occurred: {e}")
except Exception as e:
print(f"A Chameleon template partial file control {html_control} error occurred: {e}")
# Now insert it into the partial file in the correct place.
# Read in the text file and split at "%# Inputs etc in here."
with open(partial_files[i], 'r') as file:
lines = file.readlines()
index = next((i for i, line in enumerate(lines) if "%# Inputs etc in here." in line), len(lines))
# Insert the string at the specified index
lines.insert(index+1, all_controls_html + '\n')
# Write the modified content back to the file
with open(partial_files[i], 'w') as file:
file.writelines(lines)
print(f"Content modified and saved to {partial_files[i]}")
i += 1
# Now generate the <name>.en file
# Look through the generated files for the /l[\s|(]['|"](.*)['|"]\)/ strings.
# create a combined list of all the files
all_files = [controller_file,layout_file]+partial_files
all_strings = []
for filename in all_files:
with open(filename, 'r') as file:
file_content = file.read()
# Define the regular expression pattern to match the strings you want to extract
pattern = r"l[\s|(][\'|\"](.*)[\'|\"]\)"
# Use re.findall to extract all occurrences of the pattern from the file content
extracted_strings = re.findall(pattern, file_content)
all_strings = all_strings + extracted_strings
#Take out any duplicates
all_strings = deduplicate_array(all_strings)
# '<prefix>_english-message' => 'English Message',
string_lib = [] #Array of dicts
for lex_message in all_strings:
# If has a prefix - leave it for left hand side but delete it for the right
# If has no prefix - add one for left hand side but and leave it for the right
# Map all spaces to "_" on left hand side
# amd truncate it to max five words
original_str = lex_message
# Checkif it starts with the prefix (any case|)
if lex_message.lower().startswith(hl('prefix').lower()):
left_str = lex_message
right_str = lex_message[len(hl('prefix'))+1:]
# And take out any "_", map to " "
else:
left_str = hl('prefix')+"_"+lex_message
right_str = lex_message
right_str = right_str.replace("_"," ")
#print(f"Right:{right_str}")
right_str = format_text(right_str)
left_str = left_str.replace(" ","_")
words = left_str.split('_')[:6]
left_str = "_".join(words)
next_lex_str = {"orig":original_str,"left":left_str,"right":right_str}
string_lib.append(next_lex_str)
#And write it to lex file
# Now process them one by one into the lexical file
lex_all = "";
for lex_str in string_lib:
lex_all += f"\'{lex_str['left']}\' => \'{lex_str['right']}\',\n"
print(f"Writing {lex_file}")
with open( lex_file, 'w') as file:
file.write(lex_all)
#and then play the strings back into the partials and the layout file
print("..and feed the lex string names back into other files")
for filename in all_files:
with open(filename, 'r') as file:
file_content = file.read()
# Scan through
for item in string_lib:
original_str = item["orig"]
left_str = item["left"]
right_str = item["right"]
# Replace all occurrences of original string with left string in 'contents'
file_content = file_content.replace("l('"+original_str+"')", "l('"+left_str+"')")
# and write it back
with open(filename, 'w') as file:
file.write(file_content)
print(f"Write out modified:{filename}")
# Now generate all the translated lex files from a list of the languages and codes
home_dir = os.path.dirname(json_filename)
languages_path =f"{home_dir}/languages.json"
with open(languages_path,'r') as file:
languages_str = file.read()
lang_dict = json.loads(languages_str)
with open(lex_file,'r') as file:
lex_str = file.read()
eng_lex_dict = convert_lex_to_dict(lex_str)
for lang_item in lang_dict:
print(f"Translating from english lex file to {lang_item['language']}")
code = lang_item["code"]
translated_lex_file = f"Targets/{hl('PackageName').lower()}_{code}.lex"
# Only do it if the lex file is missing
if not os.path.exists(translated_lex_file):
translated_dict = []
for lex_item in eng_lex_dict:
# Get it from ChatGPT
translated_text = get_translation(lex_item["text"], lang_item["language"])
translated_dict.append({"id": lex_item["id"], "text": translated_text})
print(f"Writing out lex file for {lang_item['code']}")
with open(translated_lex_file, "w") as file:
for item in translated_dict:
# escape any nasties
translated_text = item['text'].replace('\\', r'\\').replace('"', r'\"').replace("'", r"\'")
line = "'"+item['id']+"' => "+'"'+translated_text+'",\n'
file.write(line)
#print(f"{item['id']} => {item['text']}\n")
else:
print(f"Skipping the creation of {translated_lex_file} as it exists already")
quit() #end of the program