SM2Gen/sm2gen.py

import json5
import sys
import argparse
from chameleon import PageTemplateFile,PageTemplate
import pkg_resources
import xml.etree.ElementTree as ET
import re
import os
import pkg_resources
from datetime import datetime
import xml.etree.ElementTree as ET
from openai import OpenAI
import configparser
import json

#
#To Do
# 1. Get routing working
# 2. Deal with ? and ! as sentence ends in text-format
# 3. Make lexical scan work with double or single quotes (if possible)

SME2Gen_version = '0.8'
json5_dict: dict = {}
json5_html_list: list = []
# Define the path to the INI file
ini_file_path = os.path.expanduser('~/.smegit/conf')
OPENAI_API_KEY = ""

def parse_json(json_obj, prefix=''):
	structured_list = []
	if isinstance(json_obj, dict):
		for k, v in json_obj.items():
			new_key = f"{prefix}.{k}" if prefix else k
			structured_list.extend(parse_json(v, new_key))
	elif isinstance(json_obj, list):
		for i, v in enumerate(json_obj):
			new_key = f"{prefix}[{i}]"
			structured_list.extend(parse_json(v, new_key))
	else:
		structured_list.append(f"{prefix}: {json_obj}")
	return structured_list

def json5_to_list(filename):
	with open(filename, 'r') as file:
		data = json5.load(file)
	return parse_json(data)

def json5_to_pandas(filename):
	with open(filename, 'r') as file:
		data = json5.load(file)
	print (data)
	return data.json_normalize(data)

def json5_to_dict(filename):
	with open(filename, 'r') as file:
		data = json5.load(file)
	return data

def rec_print(data, prefix=''):
	# Check if this item is a dictionary.
	if isinstance(data, dict):
		for key, val in data.items():
			rec_print(val, f"{prefix}.{key}")
	# Check if this item is a list.
	elif isinstance(data, list):
		for idx, val in enumerate(data):
			rec_print(val, f"{prefix}[{idx}]")
	# If neither, it's a basic type.
	else:
		print(f"{prefix}: {data}")
		
def find_item(nested_dict, target_key):
	for key, val in nested_dict.items():
		if key == target_key:
			return val
		elif isinstance(val, dict):
			result = find_item(val, target_key)
			if result is not None:
				return result 
   
def find_dicts_with_key(data, target_key):
	results = []
	if isinstance(data, dict):
		if target_key in data:
			results.append(data)
		for val in data.values():
			if isinstance(val, (dict, list)):
				results.extend(find_dicts_with_key(val, target_key))
	elif isinstance(data, list):
		for item in data:
			if isinstance(item, (dict, list)):
				results.extend(find_dicts_with_key(item, target_key))
	return results
	
def find_values_with_key(data, target_key):
	results = []   
	if isinstance(data, dict):
		if target_key in data:
			results.append(data[target_key])
		for val in data.values():
			if isinstance(val, (dict, list)):
				results.extend(find_values_with_key(val, target_key))
	elif isinstance(data, list):
		for item in data:
			if isinstance(item, (dict, list)):
				results.extend(find_values_with_key(item, target_key))
	return results
	
def lint_json5(filename):
	try:
		with open(filename, 'r') as file:
			data = file.read()
		json5.loads(data)
		print(f"{filename} as JSON5 data is valid")
	except Exception as e:
		print(f"{filename} as JSON5 data is invalid")
		print("Error:", str(e))
		sys.exit()


def flatten_hash_of_lists(hash_of_lists):
	flattened = {}
	for key, value in hash_of_lists.items():
		if isinstance(value, list):
			for i, item in enumerate(value):
				new_key = f"{key}_{i}"  # Appending index to the key to maintain uniqueness
				flattened[new_key] = item
		else:
			flattened[key] = value
	return flattened
	
def hl(keyname):
	# Return highest level value for the keyname
	if keyname in json5_dict:
		return json5_dict[keyname]
	else:
		print(f"{keyname} not found in JSON5 - top level")
		return 'None'

def get_all_routes():
	route_list = [html_block.get('route') for html_block in json5_dict.get('html', [])]
	return route_list 

def lc_get_all_routes():
	# All routes in lower case
	route_list = [html_block.get('route').lower() for html_block in json5_dict.get('html', [])]
	return route_list 
	
def has_file_been_modified(file_path):
	# Get the file's creation time and last modification time in Unix timestamp
	creation_time = os.path.getctime(file_path)
	last_modification_time = os.path.getmtime(file_path)
	print(f"{creation_time}*{last_modification_time}")
	quit()

	# Compare the creation time and last modification time
	if creation_time < last_modification_time:
		return True  # File has been modified after creation
	else:
		return False  # File has not been modified after creation

def parse_xml_to_dict(xml_file):
	# Parse the XML file
	tree = ET.parse(xml_file)
	root = tree.getroot()

	xml_dict = {}  # Initialize an empty dictionary to store the data

	# Iterate through the XML tree and extract data
	for elem in root:
		tag = elem.tag
		if elem.text:
			xml_dict[tag] = elem.text
		else:
			cdata_content = elem.find('.//').text  # Extract CDATA text
			xml_dict[tag] = cdata_content

	return xml_dict

def deduplicate_array(arr):
	# Convert the array to a set to remove duplicates
	unique_set = set(arr)
	
	# Convert the set back to a list to maintain the order
	deduplicated_list = list(unique_set)
	
	return deduplicated_list

def get_db_fields():
	return []
	
def get_table_control_data():
	return find_values_with_key(json5_html_list,'TableControl')

def format_text(text):
	#
	# Turn a piece of text into something a bit better formatted - spaces after full stop and comma and also capitalise sentences
	#
	# Create a list to hold the formatted sentences
	formatted_sentences = []
	# Split the text into sentences
	sentences = text.split(".")
	words = sentences[0].split(" ")
	#print(len(sentences))
	# Deal with one capitalised word
	if sentences[0].isupper() and len(sentences) == 1 and len(words) == 1:
		return sentences[0].capitalize()
	else:
		for sentence in sentences:
			#print(sentence)
			# and splt into sub phrases, based on comma
			formatted_phrases = []
			phrases = sentence.split(",")

			for phrase in phrases:
				#print(phrase)
				phrase = phrase.lstrip()
				formatted_words = []
				words = phrase.split(' ')
				
				for i,word in enumerate(words):
					#print(i,word)
					# Check if the word is fully uppercase or not the first
					word  = word.lstrip()
					if word.isupper() or i != 0:
						formatted_words.append(word)
					else:
						# Capitalize the word
						formatted_words.append(word.capitalize())

				# Join the formatted words back together for this phrase
				formatted_phrase = ' '.join(formatted_words).lstrip()
				formatted_phrases.append(formatted_phrase)            
			# and sentence
			formatted_sentence = ", ".join(formatted_phrases).lstrip()
			formatted_sentences.append(formatted_sentence)
		# Join the formatted sentences back together
		formatted_text = ".  ".join(formatted_sentences).lstrip()
		return formatted_text

def get_completion(prompt):
	response = client.chat.completions.create(model="gpt-4o",
	messages=[{'role':"user", 'content':prompt}],
	max_tokens=100)
	# Extracting the content message from the JSON data
	response_data = response.to_dict()
	content_message = response_data["choices"][0]["message"]["content"]
	return content_message.strip()
	
def get_translation(message="Hello",language="french"):
	prompt = f"""Translate the following text in triple ~~~ so that it is suitable for a server management application menu 
	and only return the actual translation; translate it from from English to {language}  ~~~{message}~~~"""
	translated_message = get_completion(prompt)
	# Look for over long messages
	if len(message)/len(translated_message) < 0.2:
		print(f"{message} translated to {translated_message}")
		quit()
	return translated_message
	
def convert_lex_to_dict(pairs_string):
	# Splitting pairs string by comma and newline, excluding empty lines
	pairs_list = [line for line in pairs_string.split(",\n") if line.strip()]
	data_dict = {pair.split(" => ")[0].strip("'"): pair.split(" => ")[1].strip("'") for pair in pairs_list}
	formatted_dict = [{"id": key, "text": value} for key, value in data_dict.items()]
	return formatted_dict


if __name__ == "__main__":
	try:
		chameleon_version = pkg_resources.get_distribution("Chameleon").version
	except pkg_resources.DistributionNotFound:
		chameleon_version = "Version information not available"
	python_version = sys.version
	python_version = python_version[:8]
	current_datetime = datetime.now()
	formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M")

	strVersion = "SM2Gen version:"+SME2Gen_version+" Chameleon version:"+chameleon_version+" On Python:"+python_version+" at "+formatted_datetime

	json5_dict: dict = {}
	json5_html_list: list = []
	
	print(f"SM2 code from JSON5 - {strVersion}")

	home_dir = "/home/brianr/clients/SM2/SM2Gen/"
	json_filename = f"{home_dir}nfsshare.json5"
	
	# read open ai key from ini file
	# Check if the file exists
	if os.path.exists(ini_file_path):
		# Create a configparser object and read the file
		config = configparser.ConfigParser()
		config.read(ini_file_path)

		# Read the value of "OPENAI_API_KEY"
		if 'OPENAI_API_KEY' in config['smegit']:
			OPENAI_API_KEY = config['smegit']['OPENAI_API_KEY']
			#print("API Key:", OPENAI_API_KEY)
			client = OpenAI(api_key=OPENAI_API_KEY)
		else:
			print("OPENAI_API_KEY not found in the configuration file.")
	else:
		print("Configuration file not found at:", file_path)

	# Command line parameters - not in use
	parser = argparse.ArgumentParser(description="SM2Gen")
	parser.add_argument('-f', '--filename', help='Specify a filename for the JSON5 file', default=json_filename)
	parser.add_argument('-nco', '--noController', help='Stop it creating a controller file', default="no")
	parser.add_argument('-nh', '--noHtml', help='Stop it creating html files(s)', default="no")
	parser.add_argument('-nl', '--noLang', help='Stop it creating language localise files(s)', default="no")
	parser.add_argument('-ncu', '--noCust', help='Stop it creating Custom controller file', default="no")
	args = parser.parse_args()
	json_filename = args.filename
	print(f"JSON5 from {json_filename} with noController={args.noController}, noHtml={args.noHtml} and noLang={args.noLang}") #Not yet activated

	# check syntax of JSON5
	lint_json5(json_filename);
	
	# Get dict of it all
	json5_dict = json5_to_dict(json_filename)
	
	# Get dict of just the html bit
	json5_html_list = json5_dict['html']
	
	#Identify message
	print(f"\nGenerating mojo panels for {hl('PackageName')}")
	print( "-----------------------------------")
	
	# Routes for each panel
	routes = get_all_routes();
	lc_routes =lc_get_all_routes();
	
	#File names
	controller_file = 'Targets/'+hl('PackageName')+'.pm'
	custom_controller_file = 'Targets/'+hl('PackageName')+'-Custom.pm'   
	# Call it .new if one is already there (and may have been editted by the developer)
	if os.path.exists(custom_controller_file):
		custom_controller_file = custom_controller_file+'.new'
	layout_file = 'Targets/'+hl('PackageName').lower()+'.html.ep'
	partial_files = list()
	for panel in routes:
		partial_files.append('Targets/_'+hl('prefix')+"_"+panel+'.html.ep')
	print(partial_files)
	lex_file = 'Targets/'+hl('PackageName').lower()+'_en.lex'
	tablecontrols = get_table_control_data() #arrays of hashes used to drive rows in tables
	
	#Generate controller file
	try:
		controller_template = PageTemplateFile("Templates/controller.pm.tem")
		dbentries = get_db_fields() #Params which correspond to Db fields
		try:
			controller_perl = controller_template.render(version=strVersion,
														tablecontrols=tablecontrols, 
														dbentries=dbentries,
														**json5_dict,
														panels=routes,
														lcPackageName=json5_dict['PackageName'].lower()
														)
			with open(controller_file, 'w') as file:
				file.write(controller_perl)
			print(f"{controller_file} controller generated ok")
		except Exception as e:
			print(f"A Chameleon controller render error occurred: {e}")    
	except Exception as e:
		print(f"A Chameleon controller template error occurred: {e}")
		
	#Generate Custom controller file
	try:
		custom_controller_template = PageTemplateFile("Templates/custom.pm.tem")
		try:
			custom_controller_perl = custom_controller_template.render(version=strVersion,
																panels=routes,
																tablecontrols=tablecontrols
																)
			# We must be careful to not overwrite the custom file if the developer has already written to it - TBD
			with open(custom_controller_file, 'w') as file:
				file.write(custom_controller_perl)
			print(f"{custom_controller_file} custom controller generated ok")
		except Exception as e:
			print(f"A Chameleon custom controller render error occurred: {e}")    
	except Exception as e:
		print(f"A Chameleon custom controller template error occurred: {e}")

	#generate Layout file
	layout_template = PageTemplateFile("Templates/layout.html.ep.tem")
	try:
		try:
			layout_mojo = layout_template.render(version=strVersion,**json5_dict,conditions=routes)
			with open(layout_file, 'w') as file:
				file.write(layout_mojo)
			print(f"{layout_file} mojo template layout file generated ok")
		except Exception as e:
			print(f"A Chameleon render on layout file error occurred: {e}")    
	except Exception as e:
		print(f"A Chameleon template layout file error occurred: {e}")    
	
	#Generate a partial file for each of the entries in the html list
	#Pull in the template code for each of the input types
	#html_controls = json5_to_dict('Templates/html_controls.html.ep.tem')
	html_controls = parse_xml_to_dict('Templates/html_controls.html.ep.xml')
	i = 0
	for html in json5_html_list:
		# Generate a mojo template file, and then add in the controls   
		# main file first
		try:
			partial_template = PageTemplateFile("Templates/partial.html.ep.tem")
			partial_mojo_context = {**json5_dict,**html}          
			try:
				partial_mojo_template = partial_template.render(version=strVersion,**partial_mojo_context)
				with open( partial_files[i], 'w') as file:
					file.write(partial_mojo_template)
				print(f"{partial_files[i]} mojo template generated ok - phase 1")
			except Exception as e:
				print(f"A Chameleon render error on partial file {html['route']} occurred: {e}")    
		except Exception as e:
			print(f"A Chameleon html  {html['route']} error occurred: {e}")    

		#Now generate the controls from the rest of the entries in the dict.
		all_controls_html = "";
		prefix_is = hl('prefix')
		for html_control in html:
			inner_html = html[html_control]
			if isinstance(inner_html, dict):
				try:
					control_template = PageTemplate(html_controls[inner_html['Type']])
					try:
						control_html = control_template.render(version=strVersion,**inner_html,prefix=prefix_is)
						all_controls_html = all_controls_html + control_html
					except Exception as e:
						print(f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}")                  				
				except Exception as e:
					print(f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}")                  
			else:
				#just a simple entry - name less numerics is type
				html_Type =  ''.join(char for char in html_control if not char.isdigit())
				try:
					simple_control_template  = PageTemplate(html_controls[html_Type])
					try:
						simple_control_html = simple_control_template.render(version=strVersion,value=inner_html,prefix=prefix_is)
						all_controls_html = all_controls_html + simple_control_html
					except Exception as e:
						print(f"A Chameleon render on partial file control {html_control} error occurred: {e}")                  								
				except Exception as e:
					print(f"A Chameleon template partial file control {html_control} error occurred: {e}")
					
		# Now insert it into the partial file in the correct place.
		# Read in the text file and split at "%# Inputs etc in here."
		with open(partial_files[i], 'r') as file:
			lines = file.readlines()
			index = next((i for i, line in enumerate(lines) if "%# Inputs etc in here." in line), len(lines))
			
		# Insert the string at the specified index
		lines.insert(index+1, all_controls_html + '\n')
		
		# Write the modified content back to the file
		with open(partial_files[i], 'w') as file:
			file.writelines(lines)
		print(f"Content modified and saved to {partial_files[i]}")
		i += 1
		
	# Now generate the <name>.en file
	# Look through the generated files for the /l[\s|(]['|"](.*)['|"]\)/ strings.

	# create a combined list of all the files
	all_files = [controller_file,layout_file]+partial_files
	all_strings = []
	for filename in all_files:
		with open(filename, 'r') as file:
			file_content = file.read()
		# Define the regular expression pattern to match the strings you want to extract
		pattern = r"l[\s|(][\'|\"](.*)[\'|\"]\)"
		# Use re.findall to extract all occurrences of the pattern from the file content
		extracted_strings = re.findall(pattern, file_content)
		all_strings = all_strings + extracted_strings
	#Take out any duplicates
	all_strings = deduplicate_array(all_strings)
	# '<prefix>_english-message' => 'English Message',
	string_lib = []   #Array of dicts 
	for lex_message in all_strings:
		# If has a prefix - leave it for left hand side but delete it for the right
		# If has no prefix - add one for left hand side but and leave it for the right
		# Map all spaces to "_" on left hand side
		# amd truncate it to max five words
		original_str = lex_message
		# Checkif  it starts with the prefix (any case|)
		if lex_message.lower().startswith(hl('prefix').lower()):
			left_str = lex_message
			right_str = lex_message[len(hl('prefix'))+1:]
			# And take out any "_", map to " "
		else:
			left_str = hl('prefix')+"_"+lex_message
			right_str = lex_message
		right_str = right_str.replace("_"," ")
		#print(f"Right:{right_str}")
		right_str = format_text(right_str)
		left_str = left_str.replace(" ","_")
		words = left_str.split('_')[:6]
		left_str = "_".join(words)
		next_lex_str = {"orig":original_str,"left":left_str,"right":right_str}
		string_lib.append(next_lex_str)
	#And write it to lex file
	# Now process them one by one into the lexical file
	lex_all = "";
	for lex_str in string_lib:
		lex_all += f"\'{lex_str['left']}\' => \'{lex_str['right']}\',\n"
	print(f"Writing {lex_file}")
	with open( lex_file, 'w') as file:
		file.write(lex_all)
	#and then play the strings back into the partials and the layout file
	print("..and feed the lex string names back into other files")
	for filename in all_files:
		with open(filename, 'r') as file:
			file_content = file.read()
		# Scan through 
		for item in string_lib:
			original_str = item["orig"]
			left_str = item["left"]
			right_str = item["right"]
			# Replace all occurrences of original string with left string in 'contents'
			file_content = file_content.replace("l('"+original_str+"')", "l('"+left_str+"')")
		# and write it back
		with open(filename, 'w') as file:
			file.write(file_content)
		print(f"Write out modified:{filename}")
	
	#  Now generate all the translated lex files from a list of the languages and codes
	home_dir = os.path.dirname(json_filename)
	languages_path =f"{home_dir}/languages.json"
	with open(languages_path,'r') as file:
		languages_str = file.read()
	lang_dict = json.loads(languages_str)
	with open(lex_file,'r') as file:
		lex_str = file.read()
	eng_lex_dict = convert_lex_to_dict(lex_str)
	for lang_item in lang_dict:
		print(f"Translating from english lex file to {lang_item['language']}")
		code = lang_item["code"]
		translated_lex_file = f"Targets/{hl('PackageName').lower()}_{code}.lex"
		# Only do it if the lex file is missing
		if not os.path.exists(translated_lex_file):
			translated_dict = []
			for lex_item in eng_lex_dict:
				# Get it from ChatGPT
				translated_text = get_translation(lex_item["text"], lang_item["language"])
				translated_dict.append({"id": lex_item["id"], "text": translated_text})
			print(f"Writing out lex file for {lang_item['code']}")
			with open(translated_lex_file, "w") as file:
				for item in translated_dict:
					# escape any nasties
					translated_text = item['text'].replace('\\', r'\\').replace('"', r'\"').replace("'", r"\'")
					line = "'"+item['id']+"' => "+'"'+translated_text+'",\n'
					file.write(line)
					#print(f"{item['id']} => {item['text']}\n")
		else:
			print(f"Skipping the creation of {translated_lex_file} as it exists already")
	quit() #end of the program