SM2Gen/sm2gen.py

import json5
import sys
import argparse
from chameleon import PageTemplateFile, PageTemplate
import pkg_resources
import xml.etree.ElementTree as ET
import re
import os
from datetime import datetime
from openai import OpenAI
import configparser
import json

#
# To Do
# 1. Get routing working
# 2. Deal with ? and ! as sentence ends in text-format
# 3. Make lexical scan work with double or single quotes (if possible)

SME2Gen_version = "0.8"
json5_dict: dict = {}
json5_html_list: list = []
# Define the path to the INI file
ini_file_path = os.path.expanduser("~/.smegit/conf")
OPENAI_API_KEY = ""


def parse_json(json_obj, prefix=""):
    structured_list = []
    if isinstance(json_obj, dict):
        for k, v in json_obj.items():
            new_key = f"{prefix}.{k}" if prefix else k
            structured_list.extend(parse_json(v, new_key))
    elif isinstance(json_obj, list):
        for i, v in enumerate(json_obj):
            new_key = f"{prefix}[{i}]"
            structured_list.extend(parse_json(v, new_key))
    else:
        structured_list.append(f"{prefix}: {json_obj}")
    return structured_list


def json5_to_list(filename):
    with open(filename, "r") as file:
        data = json5.load(file)
    return parse_json(data)


def json5_to_pandas(filename):
    with open(filename, "r") as file:
        data = json5.load(file)
    print(data)
    return data.json_normalize(data)


def json5_to_dict(filename):
    with open(filename, "r") as file:
        data = json5.load(file)
    return data


def rec_print(data, prefix=""):
    # Check if this item is a dictionary.
    if isinstance(data, dict):
        for key, val in data.items():
            rec_print(val, f"{prefix}.{key}")
    # Check if this item is a list.
    elif isinstance(data, list):
        for idx, val in enumerate(data):
            rec_print(val, f"{prefix}[{idx}]")
    # If neither, it's a basic type.
    else:
        print(f"{prefix}: {data}")


def find_item(nested_dict, target_key):
    for key, val in nested_dict.items():
        if key == target_key:
            return val
        elif isinstance(val, dict):
            result = find_item(val, target_key)
            if result is not None:
                return result


def find_dicts_with_key(data, target_key):
    results = []
    if isinstance(data, dict):
        if target_key in data:
            results.append(data)
        for val in data.values():
            if isinstance(val, (dict, list)):
                results.extend(find_dicts_with_key(val, target_key))
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (dict, list)):
                results.extend(find_dicts_with_key(item, target_key))
    return results


def find_values_with_key(data, target_key):
    results = []
    if isinstance(data, dict):
        if target_key in data:
            results.append(data[target_key])
        for val in data.values():
            if isinstance(val, (dict, list)):
                results.extend(find_values_with_key(val, target_key))
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (dict, list)):
                results.extend(find_values_with_key(item, target_key))
    return results


def lint_json5(filename):
    try:
        with open(filename, "r") as file:
            data = file.read()
        json5.loads(data)
        print(f"{filename} as JSON5 data is valid")
    except Exception as e:
        print(f"{filename} as JSON5 data is invalid")
        print("Error:", str(e))
        sys.exit()


def flatten_hash_of_lists(hash_of_lists):
    flattened = {}
    for key, value in hash_of_lists.items():
        if isinstance(value, list):
            for i, item in enumerate(value):
                new_key = (
                    f"{key}_{i}"  # Appending index to the key to maintain uniqueness
                )
                flattened[new_key] = item
        else:
            flattened[key] = value
    return flattened


def hl(keyname):
    # Return highest level value for the keyname
    if keyname in json5_dict:
        return json5_dict[keyname]
    else:
        print(f"{keyname} not found in JSON5 - top level")
        return "None"


def get_all_routes():
    route_list = [html_block.get("route") for html_block in json5_dict.get("html", [])]
    return route_list


def lc_get_all_routes():
    # All routes in lower case
    route_list = [
        html_block.get("route").lower() for html_block in json5_dict.get("html", [])
    ]
    return route_list


def has_file_been_modified(file_path):
    # Get the file's creation time and last modification time in Unix timestamp
    creation_time = os.path.getctime(file_path)
    last_modification_time = os.path.getmtime(file_path)
    print(f"{creation_time}*{last_modification_time}")
    quit()

    # Compare the creation time and last modification time
    if creation_time < last_modification_time:
        return True  # File has been modified after creation
    else:
        return False  # File has not been modified after creation


def parse_xml_to_dict(xml_file):
    # Parse the XML file
    tree = ET.parse(xml_file)
    root = tree.getroot()

    xml_dict = {}  # Initialize an empty dictionary to store the data

    # Iterate through the XML tree and extract data
    for elem in root:
        tag = elem.tag
        if elem.text:
            xml_dict[tag] = elem.text
        else:
            cdata_content = elem.find(".//").text  # Extract CDATA text
            xml_dict[tag] = cdata_content

    return xml_dict


def deduplicate_array(arr):
    # Convert the array to a set to remove duplicates
    unique_set = set(arr)

    # Convert the set back to a list to maintain the order
    deduplicated_list = list(unique_set)

    return deduplicated_list


def get_db_fields():
    return []


def get_table_control_data():
    return find_values_with_key(json5_html_list, "TableControl")


def format_text(text):
    #
    # Turn a piece of text into something a bit better formatted - spaces after full stop and comma and also capitalise sentences
    #
    # Create a list to hold the formatted sentences
    formatted_sentences = []
    # Split the text into sentences
    sentences = text.split(".")
    words = sentences[0].split(" ")
    # print(len(sentences))
    # Deal with one capitalised word
    if sentences[0].isupper() and len(sentences) == 1 and len(words) == 1:
        return sentences[0].capitalize()
    else:
        for sentence in sentences:
            # print(sentence)
            # and splt into sub phrases, based on comma
            formatted_phrases = []
            phrases = sentence.split(",")

            for phrase in phrases:
                # print(phrase)
                phrase = phrase.lstrip()
                formatted_words = []
                words = phrase.split(" ")

                for i, word in enumerate(words):
                    # print(i,word)
                    # Check if the word is fully uppercase or not the first
                    word = word.lstrip()
                    if word.isupper() or i != 0:
                        formatted_words.append(word)
                    else:
                        # Capitalize the word
                        formatted_words.append(word.capitalize())

                # Join the formatted words back together for this phrase
                formatted_phrase = " ".join(formatted_words).lstrip()
                formatted_phrases.append(formatted_phrase)
            # and sentence
            formatted_sentence = ", ".join(formatted_phrases).lstrip()
            formatted_sentences.append(formatted_sentence)
        # Join the formatted sentences back together
        formatted_text = ".  ".join(formatted_sentences).lstrip()
        return formatted_text


def get_completion(prompt):
    #
    # temperature: Controls the randomness of the response. Lower values make the output more focused and deterministic, while higher values make it more creative and random.
    # top_p: Controls the diversity of the response. It relates to nucleus sampling, where the model considers only the tokens with the top p probability mass.
    # n: Number of completions to generate for each prompt.
    # stop: A string or list of strings. Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
    # presence_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
    # frequency_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
    # logit_bias: A dictionary that allows you to manipulate the likelihood of specified tokens appearing in the output.
    #
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=100,
    )
    # Extracting the content message from the JSON data
    response_data = response.to_dict()
    content_message = response_data["choices"][0]["message"]["content"]
    return content_message.strip()


def get_translation(message="Hello", language="french"):
    prompt = f"""Translate the following text in triple ~~~ so that it is suitable for a server management application menu
	and only return the actual translation; translate it from from English to {language}  ~~~{message}~~~"""
    translated_message = get_completion(prompt)
    # Look for over long messages
    if len(message) / len(translated_message) < 0.2:
        print(f"{message} translated to {translated_message}")
        quit()
    return translated_message


def convert_lex_to_dict(pairs_string):
    # Splitting pairs string by comma and newline, excluding empty lines
    pairs_list = [line for line in pairs_string.split(",\n") if line.strip()]
    data_dict = {
        pair.split(" => ")[0].strip("'"): pair.split(" => ")[1].strip("'")
        for pair in pairs_list
    }
    formatted_dict = [{"id": key, "text": value} for key, value in data_dict.items()]
    return formatted_dict


if __name__ == "__main__":
	try:
		chameleon_version = pkg_resources.get_distribution("Chameleon").version
	except pkg_resources.DistributionNotFound:
		chameleon_version = "Version information not available"
	python_version = sys.version
	version_pattern = r'(\d{1,3}\.\d{1,3}\.\d{1,3})'
	version_match = re.search(version_pattern, python_version)
	python_version = version_match.group(0) if version_match else 'Unknown'
	current_datetime = datetime.now()
	formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M")
	strVersion = (
		"SM2Gen version:"
		+ SME2Gen_version
		+ " Chameleon version:"
		+ chameleon_version
		+ " On Python:"
		+ python_version
		+ " at "
		+ formatted_datetime
	)

	json5_dict: dict = {}
	json5_html_list: list = []

	print(f"SM2 code from JSON5 - {strVersion}")

	home_dir = "/home/brianr/clients/SM2/SM2Gen/"
	json_filename = f"{home_dir}/json5/nfsshare.json5"    #CreateStarterWebsite.json5"

	# read open ai key from ini file
	# Check if the file exists
	if os.path.exists(ini_file_path):
		# Create a configparser object and read the file
		config = configparser.ConfigParser()
		config.read(ini_file_path)

		# Read the value of "OPENAI_API_KEY"
		if "OPENAI_API_KEY" in config["smegit"]:
			OPENAI_API_KEY = config["smegit"]["OPENAI_API_KEY"]
			# print("API Key:", OPENAI_API_KEY)
			client = OpenAI(api_key=OPENAI_API_KEY)
		else:
			print("OPENAI_API_KEY not found in the configuration file.")
	else:
		print("Configuration file not found at:", file_path)

	# Command line parameters - not in use
	parser = argparse.ArgumentParser(description="SM2Gen")
	parser.add_argument(
		"-f",
		"--filename",
		help="Specify a filename for the JSON5 file",
		default=json_filename,
	)
	parser.add_argument(
		"-nco",
		"--noController",
		help="Stop it creating a controller file",
		default="no",
	)
	parser.add_argument(
		"-nh", "--noHtml", help="Stop it creating html files(s)", default="no"
	)
	parser.add_argument(
		"-nl",
		"--noLang",
		help="Stop it creating language localise files(s)",
		default="no",
	)
	parser.add_argument(
		"-ncu", "--noCust", help="Stop it creating Custom controller file", default="no"
	)
	args = parser.parse_args()
	json_filename = args.filename
	print(
		f"JSON5 from {json_filename} with noController={args.noController}, noHtml={args.noHtml} and noLang={args.noLang}"
	)  # Not yet activated

	# check syntax of JSON5
	lint_json5(json_filename)

	# Get dict of it all
	json5_dict = json5_to_dict(json_filename)

	# Get dict of just the html bit
	json5_html_list = json5_dict["html"]

	# Identify message
	print(f"\nGenerating mojo panels for {hl('PackageName')}")
	print("-----------------------------------")

	# Routes for each panel
	routes = get_all_routes()
	lc_routes = lc_get_all_routes()

	# File names
	controller_file = "Targets/" + hl("PackageName") + ".pm"
	custom_controller_file = "Targets/" + hl("PackageName") + "-Custom.pm"
	# Call it .new if one is already there (and may have been editted by the developer)
	if os.path.exists(custom_controller_file):
		custom_controller_file = custom_controller_file + ".new"
	layout_file = "Targets/" + hl("PackageName").lower() + ".html.ep"
	partial_files = list()
	for panel in routes:
		partial_files.append("Targets/_" + hl("prefix") + "_" + panel + ".html.ep")
	print(partial_files)
	lex_file = "Targets/" + hl("PackageName").lower() + "_en.lex"
	tablecontrols = (
		get_table_control_data()
	)  # arrays of hashes used to drive rows in tables

	#    print(strVersion,tablecontrols,routes)

	# Generate controller file
	try:
		controller_template = PageTemplateFile("Templates/controller.pm.tem",CHAMELEON_DEBUG="true")
		dbentries = get_db_fields()  # Params which correspond to Db fields
		try:
			controller_perl = controller_template.render(
				version=strVersion,
				tablecontrols=tablecontrols,
				dbentries=dbentries,
				**json5_dict,
				panels=routes,
				lcPackageName=json5_dict["PackageName"].lower(),
			)
			with open(controller_file, "w") as file:
				file.write(controller_perl)
			print(f"{controller_file} controller generated ok")
		except Exception as e:
			print(f"A Chameleon controller render error occurred: {e}")
	except Exception as e:
		print(f"A Chameleon controller template error occurred: {e}")

	# Generate Custom controller file
	try:
		custom_controller_template = PageTemplateFile("Templates/custom.pm.tem")
		try:
			custom_controller_perl = custom_controller_template.render(
				version=strVersion, panels=routes, tablecontrols=tablecontrols
			)
			# We must be careful to not overwrite the custom file if the developer has already written to it - TBD
			with open(custom_controller_file, "w") as file:
				file.write(custom_controller_perl)
			print(f"{custom_controller_file} custom controller generated ok")
		except Exception as e:
			print(f"A Chameleon custom controller render error occurred: {e}")
	except Exception as e:
		print(f"A Chameleon custom controller template error occurred: {e}")

	# generate Layout file
	layout_template = PageTemplateFile("Templates/layout.html.ep.tem")
	try:
		try:
			layout_mojo = layout_template.render(
				version=strVersion, **json5_dict, conditions=routes
			)
			with open(layout_file, "w") as file:
				file.write(layout_mojo)
			print(f"{layout_file} mojo template layout file generated ok")
		except Exception as e:
			print(f"A Chameleon render on layout file error occurred: {e}")
	except Exception as e:
		print(f"A Chameleon template layout file error occurred: {e}")

	# Generate a partial file for each of the entries in the html list
	# Pull in the template code for each of the input types
	# html_controls = json5_to_dict('Templates/html_controls.html.ep.tem')
	html_controls = parse_xml_to_dict("Templates/html_controls.html.ep.xml")
	i = 0
	for html in json5_html_list:
		# Generate a mojo template file, and then add in the controls
		# main file first
		try:
			partial_template = PageTemplateFile("Templates/partial.html.ep.tem")
			partial_mojo_context = {**json5_dict, **html}
			try:
				partial_mojo_template = partial_template.render(
					version=strVersion, **partial_mojo_context
				)
				with open(partial_files[i], "w") as file:
					file.write(partial_mojo_template)
				print(f"{partial_files[i]} mojo template generated ok - phase 1")
			except Exception as e:
				print(
					f"A Chameleon render error on partial file {html['route']} occurred: {e}"
				)
		except Exception as e:
			print(f"A Chameleon html  {html['route']} error occurred: {e}")

		# Now generate the controls from the rest of the entries in the dict.
		all_controls_html = ""
		prefix_is = hl("prefix")
		for html_control in html:
			inner_html = html[html_control]
			if isinstance(inner_html, dict):
				try:
					control_template = PageTemplate(html_controls[inner_html["Type"]])
					try:
						control_html = control_template.render(
							version=strVersion, **inner_html, prefix=prefix_is
						)
						all_controls_html = all_controls_html + control_html
					except Exception as e:
						print(
							f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}"
						)
				except Exception as e:
					print(
						f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}"
					)
			else:
				# just a simple entry - name less numerics is type
				html_Type = "".join(char for char in html_control if not char.isdigit())
				try:
					simple_control_template = PageTemplate(html_controls[html_Type])
					try:
						simple_control_html = simple_control_template.render(
							version=strVersion, value=inner_html, prefix=prefix_is
						)
						all_controls_html = all_controls_html + simple_control_html
					except Exception as e:
						print(
							f"A Chameleon render on partial file control {html_control} error occurred: {e}"
						)
				except Exception as e:
					print(
						f"A Chameleon template partial file control {html_control} error occurred: {e}"
					)

		# Now insert it into the partial file in the correct place.
		# Read in the text file and split at "%# Inputs etc in here."
		with open(partial_files[i], "r") as file:
			lines = file.readlines()
			index = next(
				(i for i, line in enumerate(lines) if "%# Inputs etc in here." in line),
				len(lines),
			)

		# Insert the string at the specified index
		lines.insert(index + 1, all_controls_html + "\n")

		# Write the modified content back to the file
		with open(partial_files[i], "w") as file:
			file.writelines(lines)
		print(f"Content modified and saved to {partial_files[i]}")
		i += 1

	# Now generate the <name>.en file
	# Look through the generated files for the /l[\s|(]['|"](.*)['|"]\)/ strings.

	# create a combined list of all the files
	all_files = [controller_file, layout_file] + partial_files
	all_strings = []
	for filename in all_files:
		with open(filename, "r") as file:
			file_content = file.read()
		# Define the regular expression pattern to match the strings you want to extract
		pattern = r"l[\s|(][\'|\"](.*)[\'|\"]\)"
		# Use re.findall to extract all occurrences of the pattern from the file content
		extracted_strings = re.findall(pattern, file_content)
		all_strings = all_strings + extracted_strings
	# Take out any duplicates
	all_strings = deduplicate_array(all_strings)
	# '<prefix>_english-message' => 'English Message',
	string_lib = []  # Array of dicts
	for lex_message in all_strings:
		# If has a prefix - leave it for left hand side but delete it for the right
		# If has no prefix - add one for left hand side but and leave it for the right
		# Map all spaces to "_" on left hand side
		# amd truncate it to max five words
		original_str = lex_message
		# Checkif  it starts with the prefix (any case|)
		if lex_message.lower().startswith(hl("prefix").lower()):
			left_str = lex_message
			right_str = lex_message[len(hl("prefix")) + 1 :]
			# And take out any "_", map to " "
		else:
			left_str = hl("prefix") + "_" + lex_message
			right_str = lex_message
		right_str = right_str.replace("_", " ")
		# print(f"Right:{right_str}")
		right_str = format_text(right_str)
		left_str = left_str.replace(" ", "_")
		words = left_str.split("_")[:6]
		left_str = "_".join(words)
		next_lex_str = {"orig": original_str, "left": left_str, "right": right_str}
		string_lib.append(next_lex_str)
	# And write it to lex file
	# Now process them one by one into the lexical file
	lex_all = ""
	for lex_str in string_lib:
		lex_all += f"'{lex_str['left']}' => '{lex_str['right']}',\n"
	print(f"Writing {lex_file}")
	with open(lex_file, "w") as file:
		file.write(lex_all)
	# and then play the strings back into the partials and the layout file
	print("..and feed the lex string names back into other files")
	for filename in all_files:
		with open(filename, "r") as file:
			file_content = file.read()
		# Scan through
		for item in string_lib:
			original_str = item["orig"]
			left_str = item["left"]
			right_str = item["right"]
			# Replace all occurrences of original string with left string in 'contents'
			file_content = file_content.replace(
				"l('" + original_str + "')", "l('" + left_str + "')"
			)
		# and write it back
		with open(filename, "w") as file:
			file.write(file_content)
		print(f"Write out modified:{filename}")

		#  Now generate all the translated lex files from a list of the languages and codes
	#home_dir = os.path.dirname(json_filename)
	languages_path = "Templates/languages.json"
	with open(languages_path, "r") as file:
		languages_str = file.read()
	lang_dict = json.loads(languages_str)
	with open(lex_file, "r") as file:
		lex_str = file.read()
	eng_lex_dict = convert_lex_to_dict(lex_str)
	for lang_item in lang_dict:
		print(f"Translating from english lex file to {lang_item['language']}")
		code = lang_item["code"]
		translated_lex_file = f"Targets/{hl('PackageName').lower()}_{code}.lex"
		# Only do it if the lex file is missing
		if not os.path.exists(translated_lex_file):
			translated_dict = []
			for lex_item in eng_lex_dict:
				# Get it from ChatGPT
				translated_text = get_translation(
					lex_item["text"], lang_item["language"]
				)
				translated_dict.append({"id": lex_item["id"], "text": translated_text})
			print(f"Writing out lex file for {lang_item['code']}")
			with open(translated_lex_file, "w") as file:
				for item in translated_dict:
					# escape any nasties
					translated_text = (
						item["text"]
						.replace("\\", r"\\")
						.replace('"', r"\"")
						.replace("'", r"\'")
					)
					line = "'" + item["id"] + "' => " + '"' + translated_text + '",\n'
					file.write(line)
					# print(f"{item['id']} => {item['text']}\n")
		else:
			print(
				f"Skipping the creation of {translated_lex_file} as it exists already"
			)
	quit()  # end of the program