SM2Gen/sm2gen.py

import json5
import sys
import argparse
from chameleon import PageTemplateFile, PageTemplate
import pkg_resources
import xml.etree.ElementTree as ET
import re
import os
from datetime import datetime
from openai import OpenAI
import configparser
import json
from pathlib import Path


#
# To Do
# 1. Get routing working
# 2. Deal with ? and ! as sentence ends in text-format
# 3. Make lexical scan work with double or single quotes (if possible)

SME2Gen_version = "0.8"
json5_dict: dict = {}
json5_html_list: list = []
# Define the path to the INI file
ini_file_path = os.path.expanduser("~/.smegit/conf")
OPENAI_API_KEY = ""


def parse_json(json_obj, prefix=""):
    structured_list = []
    if isinstance(json_obj, dict):
        for k, v in json_obj.items():
            new_key = f"{prefix}.{k}" if prefix else k
            structured_list.extend(parse_json(v, new_key))
    elif isinstance(json_obj, list):
        for i, v in enumerate(json_obj):
            new_key = f"{prefix}[{i}]"
            structured_list.extend(parse_json(v, new_key))
    else:
        structured_list.append(f"{prefix}: {json_obj}")
    return structured_list


def json5_to_list(filename):
    with open(filename, "r") as file:
        data = json5.load(file)
    return parse_json(data)


def json5_to_pandas(filename):
    with open(filename, "r") as file:
        data = json5.load(file)
    print(data)
    return data.json_normalize(data)


def json5_to_dict(filename):
    with open(filename, "r") as file:
        data = json5.load(file)
    return data


def rec_print(data, prefix=""):
    # Check if this item is a dictionary.
    if isinstance(data, dict):
        for key, val in data.items():
            rec_print(val, f"{prefix}.{key}")
    # Check if this item is a list.
    elif isinstance(data, list):
        for idx, val in enumerate(data):
            rec_print(val, f"{prefix}[{idx}]")
    # If neither, it's a basic type.
    else:
        print(f"{prefix}: {data}")


def find_item(nested_dict, target_key):
    for key, val in nested_dict.items():
        if key == target_key:
            return val
        elif isinstance(val, dict):
            result = find_item(val, target_key)
            if result is not None:
                return result


def find_dicts_with_key(data, target_key):
    results = []
    if isinstance(data, dict):
        if target_key in data:
            results.append(data)
        for val in data.values():
            if isinstance(val, (dict, list)):
                results.extend(find_dicts_with_key(val, target_key))
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (dict, list)):
                results.extend(find_dicts_with_key(item, target_key))
    return results


def find_values_with_key(data, target_key):
    results = []
    if isinstance(data, dict):
        if target_key in data:
            results.append(data[target_key])
        for val in data.values():
            if isinstance(val, (dict, list)):
                results.extend(find_values_with_key(val, target_key))
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (dict, list)):
                results.extend(find_values_with_key(item, target_key))
    return results


def lint_json5(filename):
    try:
        with open(filename, "r") as file:
            data = file.read()
        json5.loads(data)
        print(f"{filename} as JSON5 data is valid")
    except Exception as e:
        print(f"{filename} as JSON5 data is invalid")
        print("Error:", str(e))
        sys.exit()


def flatten_hash_of_lists(hash_of_lists):
    flattened = {}
    for key, value in hash_of_lists.items():
        if isinstance(value, list):
            for i, item in enumerate(value):
                new_key = (
                    f"{key}_{i}"  # Appending index to the key to maintain uniqueness
                )
                flattened[new_key] = item
        else:
            flattened[key] = value
    return flattened


def hl(keyname):
    # Return highest level value for the keyname
    if keyname in json5_dict:
        return json5_dict[keyname]
    else:
        print(f"{keyname} not found in JSON5 - top level")
        return "None"


def get_all_routes():
    route_list = [html_block.get("route") for html_block in json5_dict.get("html", [])]
    return route_list


def lc_get_all_routes():
    # All routes in lower case
    route_list = [
        html_block.get("route").lower() for html_block in json5_dict.get("html", [])
    ]
    return route_list


def has_file_been_modified(file_path):
    # Get the file's creation time and last modification time in Unix timestamp
    creation_time = os.path.getctime(file_path)
    last_modification_time = os.path.getmtime(file_path)
    print(f"{creation_time}*{last_modification_time}")
    quit()

    # Compare the creation time and last modification time
    if creation_time < last_modification_time:
        return True  # File has been modified after creation
    else:
        return False  # File has not been modified after creation


def parse_xml_to_dict(xml_file):
    # Parse the XML file
    tree = ET.parse(xml_file)
    root = tree.getroot()

    xml_dict = {}  # Initialize an empty dictionary to store the data

    # Iterate through the XML tree and extract data
    for elem in root:
        tag = elem.tag
        if elem.text:
            xml_dict[tag] = elem.text
        else:
            cdata_content = elem.find(".//").text  # Extract CDATA text
            xml_dict[tag] = cdata_content

    return xml_dict


def deduplicate_array(arr):
    # Convert the array to a set to remove duplicates
    unique_set = set(arr)

    # Convert the set back to a list to maintain the order
    deduplicated_list = list(unique_set)

    return deduplicated_list


def get_db_fields():
    return []


def get_table_control_data():
    return find_values_with_key(json5_html_list, "TableControl")


def format_text(text):
    #
    # Turn a piece of text into something a bit better formatted - spaces after full stop and comma and also capitalise sentences
    #
    # Create a list to hold the formatted sentences
    formatted_sentences = []
    # Split the text into sentences
    sentences = text.split(".")
    words = sentences[0].split(" ")
    # print(len(sentences))
    # Deal with one capitalised word
    if sentences[0].isupper() and len(sentences) == 1 and len(words) == 1:
        return sentences[0].capitalize()
    else:
        for sentence in sentences:
            # print(sentence)
            # and splt into sub phrases, based on comma
            formatted_phrases = []
            phrases = sentence.split(",")

            for phrase in phrases:
                # print(phrase)
                phrase = phrase.lstrip()
                formatted_words = []
                words = phrase.split(" ")

                for i, word in enumerate(words):
                    # print(i,word)
                    # Check if the word is fully uppercase or not the first
                    word = word.lstrip()
                    if word.isupper() or i != 0:
                        formatted_words.append(word)
                    else:
                        # Capitalize the word
                        formatted_words.append(word.capitalize())

                # Join the formatted words back together for this phrase
                formatted_phrase = " ".join(formatted_words).lstrip()
                formatted_phrases.append(formatted_phrase)
            # and sentence
            formatted_sentence = ", ".join(formatted_phrases).lstrip()
            formatted_sentences.append(formatted_sentence)
        # Join the formatted sentences back together
        formatted_text = ".  ".join(formatted_sentences).lstrip()
        return formatted_text


def get_completion(prompt):
    #
    # temperature: Controls the randomness of the response. Lower values make the output more focused and deterministic, while higher values make it more creative and random.
    # top_p: Controls the diversity of the response. It relates to nucleus sampling, where the model considers only the tokens with the top p probability mass.
    # n: Number of completions to generate for each prompt.
    # stop: A string or list of strings. Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
    # presence_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
    # frequency_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
    # logit_bias: A dictionary that allows you to manipulate the likelihood of specified tokens appearing in the output.
    #
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=100,
    )
    # Extracting the content message from the JSON data
    response_data = response.to_dict()
    content_message = response_data["choices"][0]["message"]["content"]
    return content_message.strip()


def get_translation(message="Hello", language="french"):
    prompt = f"""Translate the following text in triple ~~~ so that it is suitable for a server management application menu 
	and only return the actual translation; translate it from from English to {language}  ~~~{message}~~~"""
    translated_message = get_completion(prompt)
    # Look for over long messages
    if len(message) / len(translated_message) < 0.2:
        print(f"{message} translated to {translated_message}")
        quit()
    return translated_message


def convert_lex_to_dict(pairs_string):
    # Splitting pairs string by comma and newline, excluding empty lines
    pairs_list = [line for line in pairs_string.split(",\n") if line.strip()]
    data_dict = {
        pair.split(" => ")[0].strip("'"): pair.split(" => ")[1].strip("'")
        for pair in pairs_list
    }
    formatted_dict = [{"id": key, "text": value} for key, value in data_dict.items()]
    return formatted_dict


if __name__ == "__main__":
    try:
        chameleon_version = pkg_resources.get_distribution("Chameleon").version
    except pkg_resources.DistributionNotFound:
        chameleon_version = "Version information not available"
    python_version = sys.version
    version_pattern = r"(\d{1,3}\.\d{1,3}\.\d{1,3})"
    version_match = re.search(version_pattern, python_version)
    python_version = version_match.group(0) if version_match else "Unknown"
    current_datetime = datetime.now()
    formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M")
    strVersion = (
        "SM2Gen version:"
        + SME2Gen_version
        + " Chameleon version:"
        + chameleon_version
        + " On Python:"
        + python_version
        + " at "
        + formatted_datetime
    )

    json5_dict: dict = {}
    json5_html_list: list = []

    print(f"SM2 code from JSON5 - {strVersion}")

    home_dir = "/home/brianr/clients/SM2/SM2Gen/"
    json_filename = f"{home_dir}/json5/nfsshare.json5"  # CreateStarterWebsite.json5"

    # read open ai key from ini file
    # Check if the file exists
    if os.path.exists(ini_file_path):
        # Create a configparser object and read the file
        config = configparser.ConfigParser()
        config.read(ini_file_path)

        # Read the value of "OPENAI_API_KEY"
        if "OPENAI_API_KEY" in config["smegit"]:
            OPENAI_API_KEY = config["smegit"]["OPENAI_API_KEY"]
            # print("API Key:", OPENAI_API_KEY)
            client = OpenAI(api_key=OPENAI_API_KEY)
        else:
            print("OPENAI_API_KEY not found in the configuration file.")
    else:
        print("Configuration file not found at:", file_path)

    # Command line parameters - not in use
    parser = argparse.ArgumentParser(description="SM2Gen")
    parser.add_argument(
        "-f",
        "--filename",
        help="Specify a filename for the JSON5 file",
        default=json_filename,
    )
    parser.add_argument(
        "-nco",
        "--noController",
        help="Stop it creating a controller file",
        default="yes",
    )
    parser.add_argument(
        "-nh", "--noHtml", help="Stop it creating html files(s)", default="yes"
    )
    parser.add_argument(
        "-nl",
        "--noLang",
        help="Stop it creating language localise files(s)",
        default="yes",
    )
    parser.add_argument(
        "-ncu",
        "--noCust",
        help="Stop it creating Custom controller file",
        default="yes",
    )
    args = parser.parse_args()
    json_filename = args.filename
    print(
        f"JSON5 from {json_filename} with noController={args.noController}, noHtml={args.noHtml} and noLang={args.noLang}"
    )  # Not yet activated

    # check if json5 file exists
    json_file_path = Path(json_filename)
    if not json_file_path.exists():
        print(f"json5 file: {json_filename} not found")
        quit(1)

    # check syntax of JSON5
    lint_json5(json_filename)

    # Get dict of it all
    json5_dict = json5_to_dict(json_filename)

    # Get dict of just the html bit
    json5_html_list = json5_dict["html"]

    # Identify message
    print(f"\nGenerating mojo panels for {hl('PackageName')}")
    print("-----------------------------------")

    # Routes for each panel
    routes = get_all_routes()
    lc_routes = lc_get_all_routes()

    # File names
    # Define the path for the generated files
    directory_path = Path("Targets/" + hl("PackageName"))
    # Create the directory if it doesn't exist
    directory_path.mkdir(parents=True, exist_ok=True)
    target_directory_path = "Targets/" + hl("PackageName") + "/"

    controller_file = target_directory_path + hl("PackageName") + ".pm"
    custom_controller_file = target_directory_path + hl("PackageName") + "-Custom.pm"
    # Call it .new if one is already there (and may have been editted by the developer)
    if os.path.exists(custom_controller_file):
        custom_controller_file = custom_controller_file + ".new"
    layout_file = target_directory_path + hl("PackageName").lower() + ".html.ep"
    partial_files = list()
    for panel in routes:
        partial_files.append(
            target_directory_path + '_' + hl("prefix") + "_" + panel + ".html.ep"
        )
    print(f"Partial files to be created:{partial_files}")
    lex_file = target_directory_path + hl("PackageName").lower() + "_en.lex"
    tablecontrols = (
        get_table_control_data()
    )  # arrays of hashes used to drive rows in tables

    #    print(strVersion,tablecontrols,routes)

    # Generate controller file
    try:
        controller_template = PageTemplateFile(
            "Templates/controller.pm.tem", CHAMELEON_DEBUG="true"
        )
        dbentries = get_db_fields()  # Params which correspond to Db fields
        try:
            controller_perl = controller_template.render(
                version=strVersion,
                tablecontrols=tablecontrols,
                dbentries=dbentries,
                **json5_dict,
                panels=routes,
                lcPackageName=json5_dict["PackageName"].lower(),
            )
            with open(controller_file, "w") as file:
                file.write(controller_perl)
            print(f"{controller_file} controller generated ok")
        except Exception as e:
            print(f"A Chameleon controller render error occurred: {e}")
    except Exception as e:
        print(f"A Chameleon controller template error occurred: {e}")

    # Generate Custom controller file
    try:
        custom_controller_template = PageTemplateFile("Templates/custom.pm.tem")
        try:
            custom_controller_perl = custom_controller_template.render(
                version=strVersion, panels=routes, tablecontrols=tablecontrols
            )
            # We must be careful to not overwrite the custom file if the developer has already written to it - TBD
            with open(custom_controller_file, "w") as file:
                file.write(custom_controller_perl)
            print(f"{custom_controller_file} custom controller generated ok")
        except Exception as e:
            print(f"A Chameleon custom controller render error occurred: {e}")
    except Exception as e:
        print(f"A Chameleon custom controller template error occurred: {e}")

    # generate Layout file
    layout_template = PageTemplateFile("Templates/layout.html.ep.tem")
    try:
        try:
            layout_mojo = layout_template.render(
                version=strVersion, **json5_dict, conditions=routes
            )
            with open(layout_file, "w") as file:
                file.write(layout_mojo)
            print(f"{layout_file} mojo template layout file generated ok")
        except Exception as e:
            print(f"A Chameleon render on layout file error occurred: {e}")
    except Exception as e:
        print(f"A Chameleon template layout file error occurred: {e}")

    # Generate a partial file for each of the entries in the html list
    # Pull in the template code for each of the input types
    # html_controls = json5_to_dict('Templates/html_controls.html.ep.tem')
    html_controls = parse_xml_to_dict("Templates/html_controls.html.ep.xml")
    i = 0
    for html in json5_html_list:
        # Generate a mojo template file, and then add in the controls
        # main file first
        try:
            partial_template = PageTemplateFile("Templates/partial.html.ep.tem")
            partial_mojo_context = {**json5_dict, **html}
            try:
                partial_mojo_template = partial_template.render(
                    version=strVersion, **partial_mojo_context
                )
                with open(partial_files[i], "w") as file:
                    file.write(partial_mojo_template)
                print(f"{partial_files[i]} mojo template generated ok - phase 1")
            except Exception as e:
                print(
                    f"A Chameleon render error on partial file {html['route']} occurred: {e}"
                )
        except Exception as e:
            print(f"A Chameleon html  {html['route']} error occurred: {e}")

        # Now generate the controls from the rest of the entries in the dict.
        all_controls_html = ""
        prefix_is = hl("prefix")
        for html_control in html:
            inner_html = html[html_control]
            if isinstance(inner_html, dict):
                try:
                    control_template = PageTemplate(html_controls[inner_html["Type"]])
                    try:
                        control_html = control_template.render(
                            version=strVersion, **inner_html, prefix=prefix_is
                        )
                        all_controls_html = all_controls_html + control_html
                    except Exception as e:
                        print(
                            f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}"
                        )
                except Exception as e:
                    print(
                        f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}"
                    )
            else:
                # just a simple entry - name less numerics is type
                html_Type = "".join(char for char in html_control if not char.isdigit())
                try:
                    simple_control_template = PageTemplate(html_controls[html_Type])
                    try:
                        simple_control_html = simple_control_template.render(
                            version=strVersion, value=inner_html, prefix=prefix_is
                        )
                        all_controls_html = all_controls_html + simple_control_html
                    except Exception as e:
                        print(
                            f"A Chameleon render on partial file control {html_control} error occurred: {e}"
                        )
                except Exception as e:
                    print(
                        f"A Chameleon template partial file control {html_control} error occurred: {e}"
                    )

        # Now insert it into the partial file in the correct place.
        # Read in the text file and split at "%# Inputs etc in here."
        with open(partial_files[i], "r") as file:
            lines = file.readlines()
            index = next(
                (i for i, line in enumerate(lines) if "%# Inputs etc in here." in line),
                len(lines),
            )

        # Insert the string at the specified index
        lines.insert(index + 1, all_controls_html + "\n")

        # Write the modified content back to the file
        with open(partial_files[i], "w") as file:
            file.writelines(lines)
        print(f"Content modified and saved to {partial_files[i]}")
        i += 1

    # Now generate the <name>.en file
    # Look through the generated files for the /l[\s|(]['|"](.*)['|"]\)/ strings.

    # create a combined list of all the files
    all_files = [controller_file, layout_file] + partial_files
    all_strings = []
    for filename in all_files:
        with open(filename, "r") as file:
            file_content = file.read()
        # Define the regular expression pattern to match the strings you want to extract
        pattern = r"l[\s|(][\'|\"](.*)[\'|\"]\)"
        # Use re.findall to extract all occurrences of the pattern from the file content
        extracted_strings = re.findall(pattern, file_content)
        all_strings = all_strings + extracted_strings
    # Take out any duplicates
    all_strings = deduplicate_array(all_strings)
    # '<prefix>_english-message' => 'English Message',
    string_lib = []  # Array of dicts
    for lex_message in all_strings:
        # If has a prefix - leave it for left hand side but delete it for the right
        # If has no prefix - add one for left hand side but and leave it for the right
        # Map all spaces to "_" on left hand side
        # amd truncate it to max five words
        original_str = lex_message
        # Checkif  it starts with the prefix (any case|)
        if lex_message.lower().startswith(hl("prefix").lower()):
            left_str = lex_message
            right_str = lex_message[len(hl("prefix")) + 1 :]
            # And take out any "_", map to " "
        else:
            left_str = hl("prefix") + "_" + lex_message
            right_str = lex_message
        right_str = right_str.replace("_", " ")
        # print(f"Right:{right_str}")
        right_str = format_text(right_str)
        left_str = left_str.replace(" ", "_")
        words = left_str.split("_")[:6]
        left_str = "_".join(words)
        next_lex_str = {"orig": original_str, "left": left_str, "right": right_str}
        string_lib.append(next_lex_str)
    # And write it to lex file
    # Now process them one by one into the lexical file
    lex_all = ""
    for lex_str in string_lib:
        lex_all += f"'{lex_str['left']}' => '{lex_str['right']}',\n"
    print(f"Writing {lex_file}")
    with open(lex_file, "w") as file:
        file.write(lex_all)
    # and then play the strings back into the partials and the layout file
    print("..and feed the lex string names back into other files")
    for filename in all_files:
        with open(filename, "r") as file:
            file_content = file.read()
        # Scan through
        for item in string_lib:
            original_str = item["orig"]
            left_str = item["left"]
            right_str = item["right"]
            # Replace all occurrences of original string with left string in 'contents'
            file_content = file_content.replace(
                "l('" + original_str + "')", "l('" + left_str + "')"
            )
        # and write it back
        with open(filename, "w") as file:
            file.write(file_content)
        print(f"Write out modified:{filename}")

    #  Now generate all the translated lex files from a list of the languages and codes
    # if specifically requested
    if not args.noLang:
        languages_path = "Templates/languages.json"
        with open(languages_path, "r") as file:
            languages_str = file.read()
        lang_dict = json.loads(languages_str)
        with open(lex_file, "r") as file:
            lex_str = file.read()
        eng_lex_dict = convert_lex_to_dict(lex_str)
        for lang_item in lang_dict:
            print(f"Translating from english lex file to {lang_item['language']}")
            code = lang_item["code"]
            translated_lex_file = (
                f"{target_directory_path}{hl('PackageName').lower()}_{code}.lex"
            )
            # Only do it if the lex file is missing
            if not os.path.exists(translated_lex_file):
                translated_dict = []
                for lex_item in eng_lex_dict:
                    # Get it from ChatGPT
                    translated_text = get_translation(
                        lex_item["text"], lang_item["language"]
                    )
                    translated_dict.append(
                        {"id": lex_item["id"], "text": translated_text}
                    )
                print(f"Writing out lex file for {lang_item['code']}")
                with open(translated_lex_file, "w") as file:
                    for item in translated_dict:
                        # escape any nasties
                        translated_text = (
                            item["text"]
                            .replace("\\", r"\\")
                            .replace('"', r"\"")
                            .replace("'", r"\'")
                        )
                        line = (
                            "'" + item["id"] + "' => " + '"' + translated_text + '",\n'
                        )
                        file.write(line)
                        # print(f"{item['id']} => {item['text']}\n")
            else:
                print(
                    f"Skipping the creation of {translated_lex_file} as it exists already"
                )
    quit()  # end of the program