Sort formatting for english lex and add localise to tables columns

2024-05-05 09:31:10 +01:00
parent 145c45f483
commit 228a42ed60
8 changed files with 103 additions and 47 deletions
--- a/sm2gen.py
+++ b/sm2gen.py
@@ -11,7 +11,7 @@ import pkg_resources
 from datetime import datetime
 import xml.etree.ElementTree as ET

-SME2Gen_version = '0.6'
+SME2Gen_version = '0.7'
 json5_dict: dict = {}
 json5_html_list: list = []

@@ -181,6 +181,52 @@ def get_db_fields():
 def get_table_control_data():
 	return find_values_with_key(json5_html_list,'TableControl')

+def format_text(text):
+    #
+    # Turn a piece of text into something a bit better formatted - spaces after full stop and comma and also capitalise sentences
+    #
+    # Create a list to hold the formatted sentences
+    formatted_sentences = []
+    # Split the text into sentences
+    sentences = text.split(".")
+    words = sentences[0].split(" ")
+    #print(len(sentences))
+    # Deal with one capitalised word
+    if sentences[0].isupper() and len(sentences) == 1 and len(words) == 1:
+        return sentences[0].capitalize()
+    else:
+        for sentence in sentences:
+            #print(sentence)
+            # and splt into sub phrases, based on comma
+            formatted_phrases = []
+            phrases = sentence.split(",")
+
+            for phrase in phrases:
+                #print(phrase)
+                phrase = phrase.lstrip()
+                formatted_words = []
+                words = phrase.split(' ')
+                
+                for i,word in enumerate(words):
+                    #print(i,word)
+                    # Check if the word is fully uppercase or not the first
+                    word  = word.lstrip()
+                    if word.isupper() or i != 0:
+                        formatted_words.append(word)
+                    else:
+                        # Capitalize the word
+                        formatted_words.append(word.capitalize())
+
+                # Join the formatted words back together for this phrase
+                formatted_phrase = ' '.join(formatted_words).lstrip()
+                formatted_phrases.append(formatted_phrase)            
+            # and sentence
+            formatted_sentence = ", ".join(formatted_phrases).lstrip()
+            formatted_sentences.append(formatted_sentence)
+        # Join the formatted sentences back together
+        formatted_text = ".  ".join(formatted_sentences).lstrip()
+        return formatted_text
+
 if __name__ == "__main__":
    try:
        chameleon_version = pkg_resources.get_distribution("Chameleon").version
@@ -385,18 +431,22 @@ if __name__ == "__main__":
        # Map all spaces to "_" on left hand side
        # amd truncate it to max five words
        original_str = lex_message
-        if lex_message.startswith(hl('prefix')):
+        # Checkif  it starts with the prefix (any case|)
+        if lex_message.lower().startswith(hl('prefix').lower()):
            left_str = lex_message
            right_str = lex_message[len(hl('prefix'))+1:]
+            # And take out any "_", map to " "
        else:
            left_str = hl('prefix')+"_"+lex_message
            right_str = lex_message
+        right_str = right_str.replace("_"," ")
+        right_str = format_text(right_str)
        left_str = left_str.replace(" ","_")
        words = left_str.split('_')[:6]
        left_str = "_".join(words)
        next_lex_str = {"orig":original_str,"left":left_str,"right":right_str}
        string_lib.append(next_lex_str)
-    print(string_lib)
+    #print(string_lib)
    #And write it to lex file
    # Now process them one by one into the lexical file
    lex_all = "";
@@ -406,6 +456,7 @@ if __name__ == "__main__":
    with open( lex_file, 'w') as file:
        file.write(lex_all)
    #and then play the strings back into the partials and the layout file
+    print("..and feed the lex string names back into other files")
    for filename in all_files:
        with open(filename, 'r') as file:
            file_content = file.read()
@@ -414,12 +465,12 @@ if __name__ == "__main__":
            original_str = item["orig"]
            left_str = item["left"]
            right_str = item["right"]
-
            # Replace all occurrences of original string with left string in 'contents'
            file_content = file_content.replace(original_str, left_str)
        # and write it back
        with open(filename, 'w') as file:
            file.write(file_content)
+        print(f"Write out modified:{filename}")