Add in preformat and sm1 html to json5 extractor program

2024-09-12 18:54:38 +01:00
parent 5c5a3bfba2
commit 2ee6bd3bb6
7 changed files with 718 additions and 0 deletions
--- a/Templates/html_controls.html.ep.xml
+++ b/Templates/html_controls.html.ep.xml
@@ -118,4 +118,11 @@
 		]]>
 	</Table>
 	<Preformatted><![CDATA[
 		<pre>
 			${value}'
 		</pre>
 	]]>
 	</Preformatted>
 </root>
--- a/html/CreateStarterWebsite.html
+++ b/html/CreateStarterWebsite.html
@@ -0,0 +1,102 @@
 <!DOCTYPE html
    PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
    "http://www.w3.org/TR/html4/loose.dtd">
 <html>
 <head>
 <title>SME Server sme11.thereadclan.me.uk</title>
  <link rev="made" href="mailto:bugs%40koozali.org">
  <meta name="copyright" content="(head.tmpl)Copyright 2003-2004 Mitel Corporation">
  <link rel="stylesheet" type="text/css" href="/server-common/css/sme_core.css">
  <style type="text/css"> 
  @import url("/server-common/css/sme_main.css");
  </style>
 </head>
 <body>
 <div class="sme-error"><h5>
      Warning: a reconfigure and reboot is required before proceeding! Failure to do so now
      may leave your system in an unknown state!</h5></div><div class="sme-error"><h5>
      URGENT NOTICE: As per June 30th 2024, SME Server 10 is obsolete, and potentially INSECURE. NO support will be offered for any issue found with this installed version.
      Please migrate IMMEDIATELY to Koozali SME Server 11 or higher version. Failure to upgrade may lead to the compromise of this server.
      </br>Please, consult <a href="https://wiki.koozali.org/SME_Server:Download" target="_blank">https://wiki.koozali.org/SME_Server:Download</a> to get last available version.</h5></div>
 <h1>Create a starter website</h1>
 <form method="POST" action="starterwebsite" enctype="application/x-www-form-urlencoded">
  <input type="hidden" name="page" value="0">
  <input type="hidden" name="page_stack" value="">
  <input type=hidden name=".id" value="53ee9f713d94b5ba86a563429440d21e">
  <input type="hidden" name="csrf_token" value="iEskwobsBAfGQ8A05yW8QPL7guJPZVEylHcHb4RlmkB">
  <table class="sme-noborders">
  <tr><td colspan="2"><p><p>
 		To create a simple web page for your company, fill 
 	  in the fields below and click on <b>Create</b>.
 	</p>
 	<p>  
 	  You can leave any field blank if you do not need it.
 	</p>
 	<p>
 	  The text that you enter below will be line wrapped 
 	  for a nicer appearance in your web page. Leave a blank line 
 	  whenever you want to start a new paragraph. If you need 
 	  to force a line break without starting a new paragraph (for 
 	  example after each line of a mailing address), then type 
 	  the four-character sequence
 	<blockquote>&lt;BR&gt;</blockquote>
 	  where you would like each line break.
 	</p>
 	<p>
 	<em>Do not use this option</em>
 	  if you have already customized your web site, since it will 
 	  overwrite the "index.htm" file in your web site directory.
 	</p></p></td>
  </tr>
    <tr>
      <td class="sme-noborders-label">Company name
      <td class="sme-noborders-content"><INPUT TYPE="text" SIZE="48" VALUE="" NAME="companyName"></td>
    </tr>
    <tr>
      <td colspan="2"><p>First header, typically used for short phrases such 
 	  as "Leader in the field of textile manufacturing"</p></td>
    </tr>    <tr>
      <td class="sme-noborders-label">
      <td class="sme-noborders-content"><INPUT TYPE="text" NAME="header1" SIZE="48" VALUE=""></td>
    </tr>
    <tr>
      <td colspan="2"><p>Text following first header, typically used for a 
 	  paragraph of marketing information.</p></td>
    </tr>    <tr>
      <td class="sme-noborders-label">
      <td class="sme-noborders-content"><TEXTAREA COLS="60" ROWS="5" NAME="text1"></TEXTAREA></td>
    </tr>
    <tr>
      <td colspan="2"><p>Second header, typically used for short phrases such 
 	  as "For more information" or "To order our products":</p></td>
    </tr>    <tr>
      <td class="sme-noborders-label">
      <td class="sme-noborders-content"><INPUT TYPE="text" NAME="header2" VALUE="" SIZE="48"></td>
    </tr>
    <tr>
      <td colspan="2"><p>Text following second header, typically used for contact 
 	  or ordering information:</p></td>
    </tr>    <tr>
      <td class="sme-noborders-label">
      <td class="sme-noborders-content"><TEXTAREA ROWS="5" NAME="text2" COLS="60"></TEXTAREA></td>
    </tr>
 <tr><td colspan=2><p>When you create this web page, the file
 	  "index.htm" will be overwritten
 	  in your web site directory.</p>
          <p>Do you wish to proceed?</p></td></tr>    </table>
      <table width=100%><tr><th class="sme-layout"><input type="submit" name="Next" value="Create"></th></tr></table>
  </table>
 </form>
 <HR class="sme-copyrightbar">
 <FONT class="sme-copyright">
  SME Server 11.0.0<BR>Copyright 1999-2006 Mitel Corporation<BR>All rights reserved.
 <BR>Copyright (c) 2013 - 2021 Koozali Foundation Inc.<BR>
 </FONT>
 </BODY>
 </HTML>
--- a/html/DiskUsage.html
+++ b/html/DiskUsage.html
@@ -0,0 +1,93 @@
 <!DOCTYPE html
    PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
    "http://www.w3.org/TR/html4/loose.dtd">
 <html>
 <head>
    <title>SME Server sme10.thereadclan.me.uk</title>
    <link rev="made" href="mailto:bugs%40koozali.org">
    <meta name="copyright" content="(head.tmpl)Copyright 2003-2004 Mitel Corporation">
    <link rel="stylesheet" type="text/css" href="/server-common/css/sme_core.css">
    <style type="text/css">
        @import url("/server-common/css/sme_main.css");
    </style>
 </head>
 <body>
    <h1>Disk usage</h1>
    <form method="POST" action="diskusage" enctype="application/x-www-form-urlencoded">
        <input type="hidden" name="page" value="0">
        <input type="hidden" name="page_stack" value="">
        <input type=hidden name=".id" value="65306129a4e734ea46f31f7f8630a65d">
        <input type="hidden" name="csrf_token" value="QbCmcx81coESLmdiNA5k9GiTKg2k7sJQLiY00BxtwXS">
        <table class="sme-noborders"> 
            <p>&nbsp
            </p>
            <p>Wed Sep 11 19:01:00 BST 2024
            </p>
            <p><pre>Filesystem             Size  Used Avail Use% Mounted on
 /dev/mapper/main-root   29G  4.8G   24G  17% /
 devtmpfs               2.0G     0  2.0G   0% /dev
 tmpfs                  2.0G   12K  2.0G   1% /dev/shm
 tmpfs                  2.0G     0  2.0G   0% /sys/fs/cgroup
 tmpfs                  2.0G  648K  2.0G   1% /run
 /dev/sda1              497M  111M  386M  23% /boot
 </pre></p>
            <table class="sme-border">
                <tr>
                    <th colspan="1" class="sme-border">I-bays</th>
                    <th colspan="1" class="sme-border">Usage</th>
                    <th colspan="1" class="sme-border">Path</th>
                </tr>
                <tr>
                    <td colspan="1" class="sme-border">Primary</td>
                    <td colspan="1" class="sme-border">4.0K</td>
                    <td colspan="1" class="sme-border">/home/e-smith/files/ibays/Primary</td>
                </tr>
                <tr>
                    <td colspan="1" class="sme-border">anibay</td>
                    <td colspan="1" class="sme-border">4.0K</td>
                    <td colspan="1" class="sme-border">/home/e-smith/files/ibays/anibay</td>
                </tr>
                <tr>
                    <td colspan="1" class="sme-border">opt</td>
                    <td colspan="1" class="sme-border">410M</td>
                    <td colspan="1" class="sme-border">/opt</td>
                </tr>
            </table>
            <p>&nbsp
            </p>
            <table class="sme-border">
                <tr>
                    <th colspan="1" class="sme-border">Users</th>
                    <th colspan="1" class="sme-border">Usage</th>
                    <th colspan="1" class="sme-border">Path</th>
                </tr>
                <tr>
                    <td colspan="1" class="sme-border">root</td>
                    <td colspan="1" class="sme-border">2.1M</td>
                    <td colspan="1" class="sme-border">/root</td>
                </tr>
                <tr>
                    <td colspan="1" class="sme-border">brianr</td>
                    <td colspan="1" class="sme-border">16K</td>
                    <td colspan="1" class="sme-border">/home/e-smith/files/users/brianr</td>
                </tr>
            </table>
        </table>
    </form>
    <HR class="sme-copyrightbar">
    <FONT class="sme-copyright">
        SME Server 10.1
        <BR>Copyright 1999-2006 Mitel Corporation
        <BR>All rights reserved.
        <BR>Copyright (c) 2013 - 2021 Koozali Foundation Inc.
        <BR>
    </FONT>
 </BODY>
 </HTML>
--- a/json5/CreateStarterWebsite.json5
+++ b/json5/CreateStarterWebsite.json5
@@ -0,0 +1,61 @@
 {
    'PackageName': 'CreateStarterWebsite',
    'prefix': 'CSW',
    'MenuHeading': 'Miscellaneous',
    'MenuDescription': 'Create Starter Website',
    'MenuNavigation': '2000 400',
    'firstPanel': 'PARAMS',
    'signalEvent': 'smeserver-createstarterwebsite-update',
    'html': {
        'Name': 'params',
        'route': 'PARAMS',
        'Header': 'Create a starter website',
        'SubHeader': 'Manage CreateStarterWebsite settings:',
        'Paragraph1': 'To create a simple web page for your company, fill \n\t  in the fields below and click onCreate.',
        'Paragraph2': 'You can leave any field blank if you do not need it.',
        'Paragraph3': 'The text that you enter below will be line wrapped \n\t  for a nicer appearance in your web page. Leave a blank line \n\t  whenever you want to start a new paragraph. If you need \n\t  to force a line break without starting a new paragraph (for \n\t  example after each line of a mailing address), then type \n\t  the four-character sequence',
        'Paragraph4': 'Do not use this optionif you have already customized your web site, since it will \n\t  overwrite the \'index.htm\' file in your web site directory.',
        'Input1': {
            'Type': 'Text',
            'Value': '',
            'Name': 'companyName',
            'Label': null
        },
        'Paragraph5': 'First header, typically used for short phrases such \n\t  as \'Leader in the field of textile manufacturing\'',
        'Input2': {
            'Type': 'Text',
            'Value': '',
            'Name': 'header1',
            'Label': null
        },
        'Paragraph6': 'Text following first header, typically used for a \n\t  paragraph of marketing information.',
        'Input3': {
            'Type': 'Textarea',
            'Value': '',
            'Name': 'text1',
            'Label': null
        },
        'Paragraph7': 'Second header, typically used for short phrases such \n\t  as \'For more information\' or \'To order our products\':',
        'Input4': {
            'Type': 'Text',
            'Value': '',
            'Name': 'header2',
            'Label': null
        },
        'Paragraph8': 'Text following second header, typically used for contact \n\t  or ordering information:',
        'Input5': {
            'Type': 'Textarea',
            'Value': '',
            'Name': 'text2',
            'Label': null
        },
        'Paragraph9': 'When you create this web page, the file\n\t  \'index.htm\' will be overwritten\n\t  in your web site directory.',
        'Paragraph10': 'Do you wish to proceed?',
        'Input6': {
            'Type': 'Submit',
            'Value': 'Create',
            'Name': 'Next',
            'Label': null
        }
    }
 }
--- a/json5/DiskUsage.json5
+++ b/json5/DiskUsage.json5
@@ -0,0 +1,47 @@
 {
    'PackageName': 'DiskUsage',
    'prefix': 'DU',
    'MenuHeading': 'Miscellaneous',
    'MenuDescription': 'Disk Usage',
    'MenuNavigation': '2000 400',
    'firstPanel': 'PARAMS',
    'signalEvent': 'smeserver-diskusage-update',
    'html': {
        'Name': 'params',
        'route': 'PARAMS',
        'Header': 'Disk usage',
        'SubHeader': 'Manage DiskUsage settings:',
        'Paragraph1': '&nbsp',
        'Paragraph2': 'Wed Sep 11 19:01:00 BST 2024',
        'Preformatted1': 'Filesystem             Size  Used Avail Use% Mounted on\n/dev/mapper/main-root   29G  4.8G   24G  17% /\ndevtmpfs               2.0G     0  2.0G   0% /dev\ntmpfs                  2.0G   12K  2.0G   1% /dev/shm\ntmpfs                  2.0G     0  2.0G   0% /sys/fs/cgroup\ntmpfs                  2.0G  648K  2.0G   1% /run\n/dev/sda1              497M  111M  386M  23% /boot',
        'Table1': {
            'Type': 'Table',
            'TableControl': 'Table1',
            'TopHeadings': [
                'I-bays',
                'Usage',
                'Path'
            ],
            'Columns': [
                'Table1-I-bays',
                'Table1-Usage',
                'Table1-Path'
            ]
        },
        'Paragraph3': '&nbsp',
        'Table2': {
            'Type': 'Table',
            'TableControl': 'Table2',
            'TopHeadings': [
                'Users',
                'Usage',
                'Path'
            ],
            'Columns': [
                'Table2-Users',
                'Table2-Usage',
                'Table2-Path'
            ]
        }
    }
 }
--- a/json5/nfsshare.json5
+++ b/json5/nfsshare.json5
@@ -0,0 +1,147 @@
 {
  PackageName: 'Nfsshare',
  prefix: 'nfs',
  MenuHeading: 'Network',
  MenuDescription: 'NFS data share',
  MenuNavigation: '2000 400',
  firstPanel: 'TABLE',
  signalEvent: 'smeserver-nfsshare-update',
  html: [
    {
      Name: 'params',
      route: 'PARAMS',
      Header: 'NFS Share Contrib',
      SubHeader: 'Manage NFS Ibay settings:',
      Paragraph1: 'These parameters will be effective only if the share is enabled. The share is in /home/e-smith/files/ibays//files',
      Input1: {
        Name: 'IbayName',
        Type: 'Text',
        Label: 'Information Bay name',
        Value: 'stash("IbayName")',
      },
      Input2: {
        Name: 'ShareOwnerGrp',
        Type: 'Selection',
        Label: 'Share owner Group',
        Value: [
          'Write = admin, Read = group',
          'Write = group, Read = everyone',
          'Write = group, Read = group',
        ],
        Default: 0,
      },
      Input3: {
        Name: 'EnableNFSshare',
        Type: 'Selection',
        Label: 'Enable the NFS Share',
        Value: [
          'Disabled',
          'Enabled',
        ],
        Default: 0,
      },
      Input4: {
        Name: 'ShareOnLocalNetwork',
        Type: 'Selection',
        Label: 'EnableShare on local network',
        Value: [
          'Disabled',
          'Enabled',
        ],
        Default: 0,
      },
      Paragraph2: 'For writing permissions,allowing the root user and using insecure ports, you must configure a list of one IP per line, being part of the local network(s).',
      Input5: {
        Name: 'NFSClientsAllowed',
        Type: 'Textarea',
        Label: 'NFS Client(s) allowed',
        rows : 5
      },
      Input6: {
        Name: 'FileSystemPermissions',
        Type: 'Selection',
        Label: 'File system permissions',
        Value: [
          'Read only',
          'Read and Write',
        ],
        Default: 0,
      },
      Input7: {
        Name: 'WriteAsync',
        Type: 'Selection',
        Label: 'Write (a)synchronously',
        Value: [
          'Synchronous',
          'Asynchronous',
        ],
      },
      Input8: {
        Name: 'DelayWrite',
        Type: 'Selection',
        Label: 'Delays the disk writing',
        Value: [
          'Write delay',
          'No write delay',
        ],
        Default: 1,
      },
      Input9: {
        Name: 'Squash',
        Type: 'Selection',
        Label: 'Squash the power of users',
        Value: [
          'All users squash',
          'No root squash',
          'root squash',
        ],
        Default: 2,
      },
      Input10: {
        Name: 'BrowseParents',
        Type: 'Selection',
        Label: 'Browse the parent folders',
        Value: [
          'Hide folder',
          'Show folder',
        ],
        Default: 0,
      },
      Input11: {
        Name: 'SecurePorts',
        Type: 'Selection',
        Label: 'Requests on secure ports',
        Value: [
          'Secure',
          'Insecure',
        ],
        Default: 0,
      },
      Paragraph3: 'Set the uid and gid if you want all requests appear to be from one user or one group, otherwise leave blank',
      Input12: {
        Name: 'SetUID',
        Type: 'Textinput',
        Label: 'Set the UID.',
      },
      Input13: {
        Name: 'SetGID',
        Type: 'Textinput',
        Label: 'Set the GID.',
      },
      Submit: 'Save',
    },
    {
      Name: 'select_ibay',
      route:'TABLE',
      Header: 'NFS Share Contrib',
      SubHeader: 'Manage NFS Ibay settings:',
      Nextpanel: 'PARAMS',
      Table1: {
 		  Type:'Table',
 		  TableControl:"ibays",
 		  TopHeadings: ['Name','Description','Nfs status', 'Action'],
 		  Columns: ['Name','Description','flag','Modify']
 	  }
    }
  ]
 }
--- a/sm1-html-2-json5.py
+++ b/sm1-html-2-json5.py
@@ -0,0 +1,261 @@
 import json
 import os
 import re
 from bs4 import BeautifulSoup
 from lxml import etree  # Import lxml for HTML validation
 def read_html_file(filename):
    """Read HTML content from a file."""
    with open(filename, 'r', encoding='utf-8') as file:
        return file.read()
 def validate_html(html):
    """Validate the HTML content."""
    try:
        parser = etree.HTMLParser()
        etree.fromstring(html, parser)  # Attempt to parse the HTML
    except Exception as e:
        raise ValueError("Invalid HTML document") from e
 def extract_data(html):
    """Extract paragraphs, inputs, tables, and pre blocks from HTML and organize them in order."""
    soup = BeautifulSoup(html, 'lxml')
    records = []
    hidden_input_names = [
        'page',
        'page_stack',
        '.id',
        'csrf_token'
    ]
    header_text = None
    sub_header_text = None
    # Counter for tables
    table_counter = 0
    # Extract elements while preserving order
    for element in soup.find_all(['h1', 'h2', 'p', 'pre', 'input', 'select', 'textarea', 'button', 'table']):
        if element.name == 'h1':
            header_text = element.get_text(strip=True)
            records.append({
                'Type': 'Header',
                'Text': header_text
            })
        elif element.name == 'h2':
            sub_header_text = element.get_text(strip=True)
            records.append({
                'Type': 'SubHeader',
                'Text': sub_header_text
            })
        elif element.name == 'p':
            text = element.get_text(strip=True)
            if text:  # Ignore empty paragraphs
                records.append({
                    'Type': 'Paragraph',
                    'Text': text
                })
        elif element.name == 'pre':
            text = element.get_text(strip=True)
            if text:  # Ensure non-empty before adding
                records.append({
                    'Type': 'Preformatted',
                    'Text': text
                })
        elif element.name == 'input':
            if element.get('type') == 'hidden' or element.get('name') in hidden_input_names:
                continue
            input_info = {
                'Type': element.get('type', 'text').capitalize(),
                'Name': element.get('name'),
                'Value': element.get('value', ''),
            }
            label = element.find_next('label')
            input_info['Label'] = label.get_text(strip=True) if label else None
            records.append(input_info)
        elif element.name == 'select':
            options = [{'Value': option.get('value'), 'Text': option.get_text(strip=True)} for option in element.find_all('option')]
            select_info = {
                'Type': 'Select',
                'Name': element.get('name'),
                'Options': options,
                'Label': element.find_previous('label').get_text(strip=True) if element.find_previous('label') else None,
            }
            records.append(select_info)
        elif element.name == 'textarea':
            textarea_info = {
                'Type': 'Textarea',
                'Name': element.get('name'),
                'Value': element.get_text(strip=True),
            }
            label = element.find_previous('label')
            textarea_info['Label'] = label.get_text(strip=True) if label else None
            records.append(textarea_info)
        elif element.name == 'button':
            button_info = {
                'Type': 'Button',
                'Name': element.get('name'),
                'Value': element.get_text(strip=True),
                'Label': element.find_previous('label').get_text(strip=True) if label else None,
            }
            records.append(button_info)
        elif element.name == 'table' and 'sme-border' in element.get('class', []):
            # Increment the table counter
            table_counter += 1
            # Prepare the TableControl format
            table_control = f"Table{table_counter}"  # e.g., "Table1", "Table2"
            top_headings = []
            columns = []
            # Extract headings from the first row
            first_row = element.find('tr')
            if first_row:
                for th in first_row.find_all('th'):
                    top_headings.append(th.get_text(strip=True))
            # Extract only the first data row's cell values for Columns
            data_rows = element.find_all('tr')[1:]  # Skip the heading row
            if data_rows:
                first_data_row = data_rows[0]  # Take the first row of data
                for idx, th in enumerate(first_row.find_all('th')):
                    td = first_data_row.find_all('td')[idx] if idx < len(first_data_row.find_all('td')) else None
                    if td:
                        columns.append(f"{table_control}-{th.get_text(strip=True)}")  # Format as desired
            records.append({
                'Type': 'Table',
                'TableControl': table_control,
                'TopHeadings': top_headings,
                'Columns': columns,
            })
    return records, header_text, sub_header_text
 def insert_spaces_before_caps(text):
    """Insert spaces before each capital letter in a given string."""
    return re.sub(r'(?<!^)(?=[A-Z])', ' ', text)
 def save_to_json5(data, output_filename, package_name, header, sub_header):
    """Save extracted data to a JSON5 file with a specific structure."""
    # Generate prefix from uppercase letters in PackageName
    prefix = ''.join(re.findall(r'[A-Z]', package_name))
    # Prepare structured html list
    structured_html = []
    paragraph_count = 1
    preformatted_count = 1
    input_count = 1
    table_count = 1
    for record in data:
        if record['Type'] == 'Paragraph':
            structured_html.append({
                f'Paragraph{paragraph_count}': record['Text']
            })
            paragraph_count += 1
        elif record['Type'] == 'Preformatted':
            structured_html.append({
                f'Preformatted{preformatted_count}': record['Text']
            })
            preformatted_count += 1
        elif record['Type'] == 'Header' or record['Type'] == 'SubHeader':
            continue  # Skip headers for input count
        elif record['Type'] == 'Table':
            # Construct the table entry
            table_structure = {
                'Type': record['Type'],
                'TableControl': record['TableControl'],
                'TopHeadings': record['TopHeadings'],
                'Columns': record['Columns']
            }
            structured_html.append({
                f'Table{table_count}': table_structure
            })
            table_count += 1
        else:  # For inputs, selects, textareas, and buttons
            input_structure = {
                'Type': record['Type'],
                'Value': record.get('Value', ''),  # Safely access Value
            }
            # Use .get() for the Name key to avoid KeyError
            input_structure['Name'] = record.get('Name', None)  # Set to None if not present
            input_structure['Label'] = record.get('Label', None)  # Set to None if not present
            # Handle specific case for Select options
            if 'Options' in record:
                input_structure['Options'] = record['Options']
            structured_html.append({
                f'Input{input_count}': input_structure
            })
            input_count += 1
    # Wrap the records with the required fields
    json5_data = {
        'PackageName': package_name,
        'prefix': prefix,
        'MenuHeading': 'Miscellaneous',
        'MenuDescription': insert_spaces_before_caps(package_name),
        'MenuNavigation': '2000 400',
        'firstPanel': 'PARAMS',
        'signalEvent': f'smeserver-{package_name.lower()}-update',
        'html': {
            'Name': 'params',
            'route': 'PARAMS',
            'Header': header if header else f'{package_name} Contrib',
            'SubHeader': sub_header if sub_header else f'Manage {package_name} settings:',
            **{k: v for item in structured_html for k, v in item.items()}  # Flatten the structured_html into the dict
        }
    }
    # Save in JSON5 format (JSON with comments and unquoted keys)
    with open(output_filename, 'w', encoding='utf-8') as json_file:
        json.dump(json5_data, json_file, ensure_ascii=False, indent=4)
    # Manually format as JSON5 by adding single quotes (for simplicity)
    with open(output_filename, 'r+', encoding='utf-8') as json_file:
        content = json_file.read()
        content = content.replace('"', "'")  # Replace double quotes with single quotes for JSON5
        json_file.seek(0)
        json_file.write(content)
        json_file.truncate()  # Remove any old content beyond the new content length
 def main():
    input_file = '/home/brianr/clients/SM2/SM1-JSONGen/DiskUsage.html'  # Specify the input HTML file path
    # Read HTML content
    html_content = read_html_file(input_file)
    # Validate the HTML before extracting data
    validate_html(html_content)
    # Extract data from HTML
    data, header, sub_header = extract_data(html_content)
    # Generate output JSON5 filename based on input file name
    base_name = os.path.basename(input_file)  # Get the file name (with extension)
    package_name = os.path.splitext(base_name)[0]  # Use the filename without extension
    json_filename = package_name + '.json5'  # Change extension to .json5
    # Create the output file path in the same directory
    output_directory = os.path.dirname(input_file)
    output_file = os.path.join(output_directory, json_filename)
    # Save extracted data to JSON5
    save_to_json5(data, output_file, package_name, header, sub_header)
    print(f"Extracted data saved to '{output_file}'.")
 if __name__ == '__main__':
    main()