import json5 import sys import argparse from chameleon import PageTemplateFile, PageTemplate import pkg_resources import xml.etree.ElementTree as ET import re from openai import OpenAI import configparser import json from pathlib import Path import traceback import os from datetime import datetime, timedelta # # To Do # 1. Get routing working # 2. Deal with ? and ! as sentence ends in text-format # 3. Make lexical scan work with double or single quotes (if possible) SME2Gen_version = "0.8" json5_dict: dict = {} json5_html_list: list = [] # Define the path to the INI file ini_file_path = os.path.expanduser("~/.smegit/conf") OPENAI_API_KEY = "" def assemble_version_string(): try: chameleon_version = pkg_resources.get_distribution("Chameleon").version except pkg_resources.DistributionNotFound: chameleon_version = "No version information" python_version = sys.version version_pattern = r"(\d{1,3}\.\d{1,3}\.\d{1,3})" version_match = re.search(version_pattern, python_version) python_version = version_match.group(0) if version_match else "Unknown" current_datetime = datetime.now() formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M:%S") strVersion = ( "SM2Gen version:" + SME2Gen_version + " Chameleon version:" + chameleon_version + " On Python:" + python_version + " at " + formatted_datetime ) return strVersion def parse_json(json_obj, prefix=""): structured_list = [] if isinstance(json_obj, dict): for k, v in json_obj.items(): new_key = f"{prefix}.{k}" if prefix else k structured_list.extend(parse_json(v, new_key)) elif isinstance(json_obj, list): for i, v in enumerate(json_obj): new_key = f"{prefix}[{i}]" structured_list.extend(parse_json(v, new_key)) else: structured_list.append(f"{prefix}: {json_obj}") return structured_list def json5_to_list(filename): with open(filename, "r") as file: data = json5.load(file) return parse_json(data) def json5_to_pandas(filename): with open(filename, "r") as file: data = json5.load(file) print(data) return data.json_normalize(data) def json5_to_dict(filename): with open(filename, "r") as file: data = json5.load(file) return data def rec_print(data, prefix=""): # Check if this item is a dictionary. if isinstance(data, dict): for key, val in data.items(): rec_print(val, f"{prefix}.{key}") # Check if this item is a list. elif isinstance(data, list): for idx, val in enumerate(data): rec_print(val, f"{prefix}[{idx}]") # If neither, it's a basic type. else: print(f"{prefix}: {data}") def find_item(nested_dict, target_key): for key, val in nested_dict.items(): if key == target_key: return val elif isinstance(val, dict): result = find_item(val, target_key) if result is not None: return result def find_dicts_with_key(data, target_key): results = [] if isinstance(data, dict): if target_key in data: results.append(data) for val in data.values(): if isinstance(val, (dict, list)): results.extend(find_dicts_with_key(val, target_key)) elif isinstance(data, list): for item in data: if isinstance(item, (dict, list)): results.extend(find_dicts_with_key(item, target_key)) return results def find_values_with_key(data, target_key): results = [] if isinstance(data, dict): if target_key in data: results.append(data[target_key]) for val in data.values(): if isinstance(val, (dict, list)): results.extend(find_values_with_key(val, target_key)) elif isinstance(data, list): for item in data: if isinstance(item, (dict, list)): results.extend(find_values_with_key(item, target_key)) return results def lint_json5(filename): try: with open(filename, "r") as file: data = file.read() json5.loads(data) print(f"{filename} as JSON5 data is valid") except Exception as e: print(f"{filename} as JSON5 data is invalid") print("Error:", str(e)) sys.exit() def flatten_hash_of_lists(hash_of_lists): flattened = {} for key, value in hash_of_lists.items(): if isinstance(value, list): for i, item in enumerate(value): new_key = ( f"{key}_{i}" # Appending index to the key to maintain uniqueness ) flattened[new_key] = item else: flattened[key] = value return flattened def hl(keyname): # Return highest level value for the keyname if keyname in json5_dict: return json5_dict[keyname] else: print(f"{keyname} not found in JSON5 - top level") return "None" def get_all_routes(): route_list = [html_block.get("route") for html_block in json5_dict.get("html", [])] return route_list def lc_get_all_routes(): # All routes in lower case route_list = [ html_block.get("route").lower() for html_block in json5_dict.get("html", []) ] return route_list def has_file_been_modified(file_path): # Get the file's creation time and last modification time in Unix timestamp creation_time = os.path.getctime(file_path) last_modification_time = os.path.getmtime(file_path) print(f"{creation_time}*{last_modification_time}") quit() # Compare the creation time and last modification time if creation_time < last_modification_time: return True # File has been modified after creation else: return False # File has not been modified after creation def parse_xml_to_dict(xml_file): # Parse the XML file tree = ET.parse(xml_file) root = tree.getroot() xml_dict = {} # Initialize an empty dictionary to store the data # Iterate through the XML tree and extract data for elem in root: tag = elem.tag if elem.text: xml_dict[tag] = elem.text else: cdata_content = elem.find(".//").text # Extract CDATA text xml_dict[tag] = cdata_content return xml_dict def deduplicate_array(arr): # Convert the array to a set to remove duplicates unique_set = set(arr) # Convert the set back to a list to maintain the order deduplicated_list = list(unique_set) return deduplicated_list def get_db_fields(): return [] def get_table_control_data(): return find_values_with_key(json5_html_list, "TableControl") def format_text(text): # # Turn a piece of text into something a bit better formatted - spaces after full stop and comma and also capitalise sentences # # Create a list to hold the formatted sentences formatted_sentences = [] # Split the text into sentences sentences = text.split(".") words = sentences[0].split(" ") # print(len(sentences)) # Deal with one capitalised word if sentences[0].isupper() and len(sentences) == 1 and len(words) == 1: return sentences[0].capitalize() else: for sentence in sentences: # print(sentence) # and splt into sub phrases, based on comma formatted_phrases = [] phrases = sentence.split(",") for phrase in phrases: # print(phrase) phrase = phrase.lstrip() formatted_words = [] words = phrase.split(" ") for i, word in enumerate(words): # print(i,word) # Check if the word is fully uppercase or not the first word = word.lstrip() if word.isupper() or i != 0: formatted_words.append(word) else: # Capitalize the word formatted_words.append(word.capitalize()) # Join the formatted words back together for this phrase formatted_phrase = " ".join(formatted_words).lstrip() formatted_phrases.append(formatted_phrase) # and sentence formatted_sentence = ", ".join(formatted_phrases).lstrip() formatted_sentences.append(formatted_sentence) # Join the formatted sentences back together formatted_text = ". ".join(formatted_sentences).lstrip() return formatted_text def get_completion(prompt): # # temperature: Controls the randomness of the response. Lower values make the output more focused and deterministic, while higher values make it more creative and random. # top_p: Controls the diversity of the response. It relates to nucleus sampling, where the model considers only the tokens with the top p probability mass. # n: Number of completions to generate for each prompt. # stop: A string or list of strings. Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. # presence_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. # frequency_penalty: A number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. # logit_bias: A dictionary that allows you to manipulate the likelihood of specified tokens appearing in the output. # response = client.chat.completions.create( model="gpt-4o", messages=[{"role": "user", "content": prompt}], temperature=0, max_tokens=100, ) # Extracting the content message from the JSON data response_data = response.to_dict() content_message = response_data["choices"][0]["message"]["content"] return content_message.strip() def get_translation(message="Hello", language="french"): prompt = f"""Translate the following text in triple ~~~ so that it is suitable for a server management application menu and only return the actual translation; translate it from from English to {language} ~~~{message}~~~""" translated_message = get_completion(prompt) # Look for over long messages if len(message) / len(translated_message) < 0.2: print(f"{message} translated to {translated_message}") quit() return translated_message import os from datetime import datetime, timedelta def check_file_version(filename,force_Files=False): # # Check modified versus creation date of the file and return +".new" if modified since creation + ThresholdSecs # if force_Files: return filename ThresholdSecs = 3 #allow secs since creation date before treat as changed. try: with open(filename, 'r') as file: # Read the first three lines header_lines = [file.readline().strip() for _ in range(5)] # Extract the timestamp timestamp_str = None for line in header_lines: if ' at ' in line: # Split at 'at', expect the timestamp to be in the third part #print(line) timestamp_str = line.split('at')[2].strip() break if timestamp_str is None: print("Warning: No timestamp found. Returning original filename.") return filename # Return the original filename if no timestamp is found # Convert the string timestamp to a datetime object file_timestamp = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S') # Add the threshold seconds to the creation date file_timestamp += timedelta(seconds=ThresholdSecs) # Get the last modified time of the file, ignoring milliseconds file_modified_time = datetime.fromtimestamp(os.path.getmtime(filename)).replace(microsecond=0) #print(file_modified_time,file_timestamp) # Compare the timestamps if file_modified_time > file_timestamp: return f"{filename}.new" else: return filename except FileNotFoundError: #print(f"Error: The file '{filename}' does not exist.") return filename except Exception as e: print(f"An error occurred: {traceback.format_exc()}") return filename def convert_lex_to_dict(pairs_string): # Splitting pairs string by comma and newline, excluding empty lines pairs_list = [line for line in pairs_string.split(",\n") if line.strip()] data_dict = { pair.split(" => ")[0].strip("'"): pair.split(" => ")[1].strip("'") for pair in pairs_list } formatted_dict = [{"id": key, "text": value} for key, value in data_dict.items()] return formatted_dict if __name__ == "__main__": strVersion = assemble_version_string() json5_dict: dict = {} json5_html_list: list = [] print(f"SM2 code from JSON5 - {strVersion}") home_dir = "/home/brianr/clients/SM2/SM2Gen/" json_filename = f"{home_dir}/json5/nfsshare.json5" # CreateStarterWebsite.json5" # read open ai key from ini file # Check if the file exists if os.path.exists(ini_file_path): # Create a configparser object and read the file config = configparser.ConfigParser() config.read(ini_file_path) # Read the value of "OPENAI_API_KEY" if "OPENAI_API_KEY" in config["smegit"]: OPENAI_API_KEY = config["smegit"]["OPENAI_API_KEY"] # print("API Key:", OPENAI_API_KEY) client = OpenAI(api_key=OPENAI_API_KEY) else: print("OPENAI_API_KEY not found in the configuration file.") else: print("Configuration file not found at:", file_path) # Command line parameters - not in use parser = argparse.ArgumentParser(description="SM2Gen") parser.add_argument( "-f", "--filename", help="Specify a filename for the JSON5 file", default=json_filename, ) parser.add_argument( "--force", action = 'store_true', help="Force an overwrite of all files", ) parser.add_argument( "--noController", action = 'store_false', help="Stop it creating a controller file (not currently implemented)", ) parser.add_argument( "--noHtml", action = 'store_false', help="Stop it creating html files(s) (not currently implemented)", default=False ) parser.add_argument( "--noLang", action = 'store_false', help="Stop it creating language localise files(s)", ) parser.add_argument( "--noCust", action = 'store_false', help="Stop it creating Custom controller file (not currently implemented)", ) args = parser.parse_args() json_filename = args.filename print( f"JSON5 from {json_filename} with noController={args.noController}, noHtml={args.noHtml} and noLang={args.noLang}" ) # Not yet activated # check if json5 file exists json_file_path = Path(json_filename) if not json_file_path.exists(): print(f"json5 file: {json_filename} not found") quit(1) # check syntax of JSON5 lint_json5(json_filename) # Get dict of it all try: json5_dict = json5_to_dict(json_filename) except Exception as e: print(f"json5 file {json_filename} failed lint test (e)") quit(1) # Get dict of just the html bit json5_html_list = json5_dict["html"] # Identify message print(f"\nGenerating mojo panels for {hl('PackageName')}") print("-----------------------------------") # Routes for each panel routes = get_all_routes() lc_routes = lc_get_all_routes() # # File names # # Define the path for the generated files # force them to be overwritten force_Files = args.force directory_path = Path("Targets/" + hl("PackageName")) # Create the directory if it doesn't exist directory_path.mkdir(parents=True, exist_ok=True) target_directory_path = "Targets/" + hl("PackageName") + "/" controller_file = check_file_version(target_directory_path + hl("PackageName") + ".pm",force_Files) custom_controller_file = check_file_version(target_directory_path + hl("PackageName") + "-Custom.pm",force_Files) #print(custom_controller_file) layout_file = check_file_version(target_directory_path + hl("PackageName").lower() + ".html.ep",force_Files) css_file = check_file_version(target_directory_path + hl("PackageName").lower() + ".css",force_Files) partial_files = list() for panel in routes: partial_files.append(check_file_version( target_directory_path + '_' + hl("prefix") + "_" + panel + ".html.ep",force_Files) ) print(f"Partial files to be created:{partial_files}") lex_file = check_file_version(target_directory_path + hl("PackageName").lower() + "_en.lex",force_Files) print(lex_file) tablecontrols = ( get_table_control_data() ) # arrays of hashes used to drive rows in tables # print(strVersion,tablecontrols,routes) # Generate controller file try: controller_template = PageTemplateFile( "Templates/controller.pm.tem", CHAMELEON_DEBUG="true" ) dbentries = get_db_fields() # Params which correspond to Db fields try: controller_perl = controller_template.render( version=strVersion, tablecontrols=tablecontrols, dbentries=dbentries, **json5_dict, panels=routes, lcPackageName=json5_dict["PackageName"].lower(), ) with open(controller_file, "w") as file: file.write(controller_perl) print(f"{controller_file} controller generated ok") except Exception as e: print(f"A Chameleon controller render error occurred: {e}") except Exception as e: print(f"A Chameleon controller template error occurred: {e}") # Generate Custom controller file try: custom_controller_template = PageTemplateFile("Templates/custom.pm.tem") try: custom_controller_perl = custom_controller_template.render( version=strVersion, panels=routes, tablecontrols=tablecontrols ) # We must be careful to not overwrite the custom file if the developer has already written to it - TBD with open(custom_controller_file, "w") as file: file.write(custom_controller_perl) print(f"{custom_controller_file} custom controller generated ok") except Exception as e: print(f"A Chameleon custom controller render error occurred: {e}") except Exception as e: print(f"A Chameleon custom controller template error occurred: {e}") # generate Layout file layout_template = PageTemplateFile("Templates/layout.html.ep.tem") try: try: layout_mojo = layout_template.render( version=strVersion, **json5_dict, conditions=routes, lcPackageName=json5_dict["PackageName"].lower() ) with open(layout_file, "w") as file: file.write(layout_mojo) print(f"{layout_file} mojo template layout file generated ok") except Exception as e: print(f"A Chameleon render on layout file error occurred: {e}") except Exception as e: print(f"A Chameleon template layout file error occurred: {e}") # Create the css file (empty for now, except the header) # maybe it should have empty expamples of all the relevant classes? with open(css_file, "w") as file: file.write(f"/*\n Generated by SM2Gen version: {strVersion}\n*/\n") # Generate a partial file for each of the entries in the html list # Pull in the template code for each of the input types # html_controls = json5_to_dict('Templates/html_controls.html.ep.tem') html_controls = parse_xml_to_dict("Templates/html_controls.html.ep.xml") i = 0 for html in json5_html_list: # Generate a mojo template file, and then add in the controls # main file first try: partial_template = PageTemplateFile("Templates/partial.html.ep.tem") partial_mojo_context = {**json5_dict, **html} try: partial_mojo_template = partial_template.render( version=strVersion, **partial_mojo_context ) with open(partial_files[i], "w") as file: file.write(partial_mojo_template) print(f"{partial_files[i]} mojo template generated ok - phase 1") except Exception as e: print( f"A Chameleon render error on partial file {html['route']} occurred: {e}" ) except Exception as e: print(f"A Chameleon html {html['route']} error occurred: {e}") # Now generate the controls from the rest of the entries in the dict. all_controls_html = "" prefix_is = hl("prefix") for html_control in html: inner_html = html[html_control] if isinstance(inner_html, dict): try: control_template = PageTemplate(html_controls[inner_html["Type"]]) try: control_html = control_template.render( version=strVersion, **inner_html, prefix=prefix_is ) all_controls_html = all_controls_html + control_html except Exception as e: print( f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}" ) except Exception as e: print( f"A Chameleon render on partial file control {inner_html['Name']} error occurred: {e}" ) else: # just a simple entry - name less numerics is type html_Type = "".join(char for char in html_control if not char.isdigit()) try: simple_control_template = PageTemplate(html_controls[html_Type]) try: simple_control_html = simple_control_template.render( version=strVersion, Value=inner_html, prefix=prefix_is ) all_controls_html = all_controls_html + simple_control_html except Exception as e: print( f"A Chameleon render on partial file control {html_control} error occurred: {e}" ) except Exception as e: print( f"A Chameleon template partial file control {html_control} error occurred: {e}" ) # Now insert it into the partial file in the correct place. # Read in the text file and split at "%# Inputs etc in here." with open(partial_files[i], "r") as file: lines = file.readlines() index = next( (i for i, line in enumerate(lines) if "%# Inputs etc in here." in line), len(lines), ) # Insert the string at the specified index lines.insert(index + 1, all_controls_html + "\n") # Write the modified content back to the file with open(partial_files[i], "w") as file: file.writelines(lines) print(f"Content modified and saved to {partial_files[i]}") i += 1 # Now generate the .en file # Look through the generated files for the /l[\s|(]['|"](.*)['|"]\)/ strings. # create a combined list of all the files all_files = [controller_file, layout_file] + partial_files all_strings = [] for filename in all_files: with open(filename, "r") as file: file_content = file.read() # Define the regular expression pattern to match the strings you want to extract pattern = r"l[\s|(][\'|\"](.*)[\'|\"]\)" # Use re.findall to extract all occurrences of the pattern from the file content extracted_strings = re.findall(pattern, file_content) all_strings = all_strings + extracted_strings # Take out any duplicates all_strings = deduplicate_array(all_strings) # '_english-message' => 'English Message', string_lib = [] # Array of dicts for lex_message in all_strings: # If has a prefix - leave it for left hand side but delete it for the right # If has no prefix - add one for left hand side but and leave it for the right # Map all spaces to "_" on left hand side # amd truncate it to max five words original_str = lex_message # Checkif it starts with the prefix (any case|) if lex_message.lower().startswith(hl("prefix").lower()): left_str = lex_message right_str = lex_message[len(hl("prefix")) + 1 :] # And take out any "_", map to " " else: left_str = hl("prefix") + "_" + lex_message right_str = lex_message right_str = right_str.replace("_", " ") # print(f"Right:{right_str}") right_str = format_text(right_str) left_str = left_str.replace(" ", "_") words = left_str.split("_")[:6] left_str = "_".join(words) next_lex_str = {"orig": original_str, "left": left_str, "right": right_str} string_lib.append(next_lex_str) # And write it to lex file # Now process them one by one into the lexical file lex_all = "" for lex_str in string_lib: lex_all += f"'{lex_str['left']}' => '{lex_str['right']}',\n" print(f"Writing {lex_file}") with open(lex_file, "w") as file: file.write(f"#\n# Generated by SM2Gen version: {strVersion}\n#\n") file.write(lex_all) # and then play the strings back into the partials and the layout file print("..and feed the lex string names back into other files") for filename in all_files: with open(filename, "r") as file: file_content = file.read() # Scan through for item in string_lib: original_str = item["orig"] left_str = item["left"] right_str = item["right"] # Replace all occurrences of original string with left string in 'contents' file_content = file_content.replace( "l('" + original_str + "')", "l('" + left_str + "')" ) # and write it back with open(filename, "w") as file: file.write(file_content) print(f"Write out modified:{filename}") # Now generate all the translated lex files from a list of the languages and codes # if specifically requested if args.noLang: languages_path = "Templates/languages.json" with open(languages_path, "r") as file: languages_str = file.read() lang_dict = json.loads(languages_str) with open(lex_file, "r") as file: lex_str = file.read() eng_lex_dict = convert_lex_to_dict(lex_str) for lang_item in lang_dict: print(f"Translating from english lex file to {lang_item['language']}") code = lang_item["code"] translated_lex_file = check_file_version( f"{target_directory_path}{hl('PackageName').lower()}_{code}.lex" ) # Only do it if the lex file is missing - Removed bjr 18Sept2024 #if not os.path.exists(translated_lex_file): translated_dict = [] for lex_item in eng_lex_dict: # Get it from ChatGPT translated_text = get_translation( lex_item["text"], lang_item["language"] ) translated_dict.append( {"id": lex_item["id"], "text": translated_text} ) print(f"Writing out lex file for {lang_item['code']}") with open(translated_lex_file, "w") as file: file.write(f"#\n# Generated by SM2Gen version: {strVersion}\n#\n") for item in translated_dict: # escape any nasties translated_text = ( item["text"] .replace("\\", r"\\") .replace('"', r"\"") .replace("'", r"\'") ) line = ( "'" + item["id"] + "' => " + '"' + translated_text + '",\n' ) file.write(line) # print(f"{item['id']} => {item['text']}\n") #else: # print( # f"Skipping the creation of {translated_lex_file} as it exists already" # ) quit() # end of the program