Add in preformat and sm1 html to json5 extractor program

This commit is contained in:
Brian Read 2024-09-12 18:54:38 +01:00
parent 5c5a3bfba2
commit 2ee6bd3bb6
7 changed files with 718 additions and 0 deletions

View File

@ -118,4 +118,11 @@
]]>
</Table>
<Preformatted><![CDATA[
<pre>
${value}'
</pre>
]]>
</Preformatted>
</root>

View File

@ -0,0 +1,102 @@
<!DOCTYPE html
PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>SME Server sme11.thereadclan.me.uk</title>
<link rev="made" href="mailto:bugs%40koozali.org">
<meta name="copyright" content="(head.tmpl)Copyright 2003-2004 Mitel Corporation">
<link rel="stylesheet" type="text/css" href="/server-common/css/sme_core.css">
<style type="text/css">
@import url("/server-common/css/sme_main.css");
</style>
</head>
<body>
<div class="sme-error"><h5>
Warning: a reconfigure and reboot is required before proceeding! Failure to do so now
may leave your system in an unknown state!</h5></div><div class="sme-error"><h5>
URGENT NOTICE: As per June 30th 2024, SME Server 10 is obsolete, and potentially INSECURE. NO support will be offered for any issue found with this installed version.
Please migrate IMMEDIATELY to Koozali SME Server 11 or higher version. Failure to upgrade may lead to the compromise of this server.
</br>Please, consult <a href="https://wiki.koozali.org/SME_Server:Download" target="_blank">https://wiki.koozali.org/SME_Server:Download</a> to get last available version.</h5></div>
<h1>Create a starter website</h1>
<form method="POST" action="starterwebsite" enctype="application/x-www-form-urlencoded">
<input type="hidden" name="page" value="0">
<input type="hidden" name="page_stack" value="">
<input type=hidden name=".id" value="53ee9f713d94b5ba86a563429440d21e">
<input type="hidden" name="csrf_token" value="iEskwobsBAfGQ8A05yW8QPL7guJPZVEylHcHb4RlmkB">
<table class="sme-noborders">
<tr><td colspan="2"><p><p>
To create a simple web page for your company, fill
in the fields below and click on <b>Create</b>.
</p>
<p>
You can leave any field blank if you do not need it.
</p>
<p>
The text that you enter below will be line wrapped
for a nicer appearance in your web page. Leave a blank line
whenever you want to start a new paragraph. If you need
to force a line break without starting a new paragraph (for
example after each line of a mailing address), then type
the four-character sequence
<blockquote>&lt;BR&gt;</blockquote>
where you would like each line break.
</p>
<p>
<em>Do not use this option</em>
if you have already customized your web site, since it will
overwrite the "index.htm" file in your web site directory.
</p></p></td>
</tr>
<tr>
<td class="sme-noborders-label">Company name
<td class="sme-noborders-content"><INPUT TYPE="text" SIZE="48" VALUE="" NAME="companyName"></td>
</tr>
<tr>
<td colspan="2"><p>First header, typically used for short phrases such
as "Leader in the field of textile manufacturing"</p></td>
</tr> <tr>
<td class="sme-noborders-label">
<td class="sme-noborders-content"><INPUT TYPE="text" NAME="header1" SIZE="48" VALUE=""></td>
</tr>
<tr>
<td colspan="2"><p>Text following first header, typically used for a
paragraph of marketing information.</p></td>
</tr> <tr>
<td class="sme-noborders-label">
<td class="sme-noborders-content"><TEXTAREA COLS="60" ROWS="5" NAME="text1"></TEXTAREA></td>
</tr>
<tr>
<td colspan="2"><p>Second header, typically used for short phrases such
as "For more information" or "To order our products":</p></td>
</tr> <tr>
<td class="sme-noborders-label">
<td class="sme-noborders-content"><INPUT TYPE="text" NAME="header2" VALUE="" SIZE="48"></td>
</tr>
<tr>
<td colspan="2"><p>Text following second header, typically used for contact
or ordering information:</p></td>
</tr> <tr>
<td class="sme-noborders-label">
<td class="sme-noborders-content"><TEXTAREA ROWS="5" NAME="text2" COLS="60"></TEXTAREA></td>
</tr>
<tr><td colspan=2><p>When you create this web page, the file
"index.htm" will be overwritten
in your web site directory.</p>
<p>Do you wish to proceed?</p></td></tr> </table>
<table width=100%><tr><th class="sme-layout"><input type="submit" name="Next" value="Create"></th></tr></table>
</table>
</form>
<HR class="sme-copyrightbar">
<FONT class="sme-copyright">
SME Server 11.0.0<BR>Copyright 1999-2006 Mitel Corporation<BR>All rights reserved.
<BR>Copyright (c) 2013 - 2021 Koozali Foundation Inc.<BR>
</FONT>
</BODY>
</HTML>

93
html/DiskUsage.html Normal file
View File

@ -0,0 +1,93 @@
<!DOCTYPE html
PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>SME Server sme10.thereadclan.me.uk</title>
<link rev="made" href="mailto:bugs%40koozali.org">
<meta name="copyright" content="(head.tmpl)Copyright 2003-2004 Mitel Corporation">
<link rel="stylesheet" type="text/css" href="/server-common/css/sme_core.css">
<style type="text/css">
@import url("/server-common/css/sme_main.css");
</style>
</head>
<body>
<h1>Disk usage</h1>
<form method="POST" action="diskusage" enctype="application/x-www-form-urlencoded">
<input type="hidden" name="page" value="0">
<input type="hidden" name="page_stack" value="">
<input type=hidden name=".id" value="65306129a4e734ea46f31f7f8630a65d">
<input type="hidden" name="csrf_token" value="QbCmcx81coESLmdiNA5k9GiTKg2k7sJQLiY00BxtwXS">
<table class="sme-noborders">
<p>&nbsp
</p>
<p>Wed Sep 11 19:01:00 BST 2024
</p>
<p><pre>Filesystem Size Used Avail Use% Mounted on
/dev/mapper/main-root 29G 4.8G 24G 17% /
devtmpfs 2.0G 0 2.0G 0% /dev
tmpfs 2.0G 12K 2.0G 1% /dev/shm
tmpfs 2.0G 0 2.0G 0% /sys/fs/cgroup
tmpfs 2.0G 648K 2.0G 1% /run
/dev/sda1 497M 111M 386M 23% /boot
</pre></p>
<table class="sme-border">
<tr>
<th colspan="1" class="sme-border">I-bays</th>
<th colspan="1" class="sme-border">Usage</th>
<th colspan="1" class="sme-border">Path</th>
</tr>
<tr>
<td colspan="1" class="sme-border">Primary</td>
<td colspan="1" class="sme-border">4.0K</td>
<td colspan="1" class="sme-border">/home/e-smith/files/ibays/Primary</td>
</tr>
<tr>
<td colspan="1" class="sme-border">anibay</td>
<td colspan="1" class="sme-border">4.0K</td>
<td colspan="1" class="sme-border">/home/e-smith/files/ibays/anibay</td>
</tr>
<tr>
<td colspan="1" class="sme-border">opt</td>
<td colspan="1" class="sme-border">410M</td>
<td colspan="1" class="sme-border">/opt</td>
</tr>
</table>
<p>&nbsp
</p>
<table class="sme-border">
<tr>
<th colspan="1" class="sme-border">Users</th>
<th colspan="1" class="sme-border">Usage</th>
<th colspan="1" class="sme-border">Path</th>
</tr>
<tr>
<td colspan="1" class="sme-border">root</td>
<td colspan="1" class="sme-border">2.1M</td>
<td colspan="1" class="sme-border">/root</td>
</tr>
<tr>
<td colspan="1" class="sme-border">brianr</td>
<td colspan="1" class="sme-border">16K</td>
<td colspan="1" class="sme-border">/home/e-smith/files/users/brianr</td>
</tr>
</table>
</table>
</form>
<HR class="sme-copyrightbar">
<FONT class="sme-copyright">
SME Server 10.1
<BR>Copyright 1999-2006 Mitel Corporation
<BR>All rights reserved.
<BR>Copyright (c) 2013 - 2021 Koozali Foundation Inc.
<BR>
</FONT>
</BODY>
</HTML>

View File

@ -0,0 +1,61 @@
{
'PackageName': 'CreateStarterWebsite',
'prefix': 'CSW',
'MenuHeading': 'Miscellaneous',
'MenuDescription': 'Create Starter Website',
'MenuNavigation': '2000 400',
'firstPanel': 'PARAMS',
'signalEvent': 'smeserver-createstarterwebsite-update',
'html': {
'Name': 'params',
'route': 'PARAMS',
'Header': 'Create a starter website',
'SubHeader': 'Manage CreateStarterWebsite settings:',
'Paragraph1': 'To create a simple web page for your company, fill \n\t in the fields below and click onCreate.',
'Paragraph2': 'You can leave any field blank if you do not need it.',
'Paragraph3': 'The text that you enter below will be line wrapped \n\t for a nicer appearance in your web page. Leave a blank line \n\t whenever you want to start a new paragraph. If you need \n\t to force a line break without starting a new paragraph (for \n\t example after each line of a mailing address), then type \n\t the four-character sequence',
'Paragraph4': 'Do not use this optionif you have already customized your web site, since it will \n\t overwrite the \'index.htm\' file in your web site directory.',
'Input1': {
'Type': 'Text',
'Value': '',
'Name': 'companyName',
'Label': null
},
'Paragraph5': 'First header, typically used for short phrases such \n\t as \'Leader in the field of textile manufacturing\'',
'Input2': {
'Type': 'Text',
'Value': '',
'Name': 'header1',
'Label': null
},
'Paragraph6': 'Text following first header, typically used for a \n\t paragraph of marketing information.',
'Input3': {
'Type': 'Textarea',
'Value': '',
'Name': 'text1',
'Label': null
},
'Paragraph7': 'Second header, typically used for short phrases such \n\t as \'For more information\' or \'To order our products\':',
'Input4': {
'Type': 'Text',
'Value': '',
'Name': 'header2',
'Label': null
},
'Paragraph8': 'Text following second header, typically used for contact \n\t or ordering information:',
'Input5': {
'Type': 'Textarea',
'Value': '',
'Name': 'text2',
'Label': null
},
'Paragraph9': 'When you create this web page, the file\n\t \'index.htm\' will be overwritten\n\t in your web site directory.',
'Paragraph10': 'Do you wish to proceed?',
'Input6': {
'Type': 'Submit',
'Value': 'Create',
'Name': 'Next',
'Label': null
}
}
}

47
json5/DiskUsage.json5 Normal file
View File

@ -0,0 +1,47 @@
{
'PackageName': 'DiskUsage',
'prefix': 'DU',
'MenuHeading': 'Miscellaneous',
'MenuDescription': 'Disk Usage',
'MenuNavigation': '2000 400',
'firstPanel': 'PARAMS',
'signalEvent': 'smeserver-diskusage-update',
'html': {
'Name': 'params',
'route': 'PARAMS',
'Header': 'Disk usage',
'SubHeader': 'Manage DiskUsage settings:',
'Paragraph1': '&nbsp',
'Paragraph2': 'Wed Sep 11 19:01:00 BST 2024',
'Preformatted1': 'Filesystem Size Used Avail Use% Mounted on\n/dev/mapper/main-root 29G 4.8G 24G 17% /\ndevtmpfs 2.0G 0 2.0G 0% /dev\ntmpfs 2.0G 12K 2.0G 1% /dev/shm\ntmpfs 2.0G 0 2.0G 0% /sys/fs/cgroup\ntmpfs 2.0G 648K 2.0G 1% /run\n/dev/sda1 497M 111M 386M 23% /boot',
'Table1': {
'Type': 'Table',
'TableControl': 'Table1',
'TopHeadings': [
'I-bays',
'Usage',
'Path'
],
'Columns': [
'Table1-I-bays',
'Table1-Usage',
'Table1-Path'
]
},
'Paragraph3': '&nbsp',
'Table2': {
'Type': 'Table',
'TableControl': 'Table2',
'TopHeadings': [
'Users',
'Usage',
'Path'
],
'Columns': [
'Table2-Users',
'Table2-Usage',
'Table2-Path'
]
}
}
}

147
json5/nfsshare.json5 Normal file
View File

@ -0,0 +1,147 @@
{
PackageName: 'Nfsshare',
prefix: 'nfs',
MenuHeading: 'Network',
MenuDescription: 'NFS data share',
MenuNavigation: '2000 400',
firstPanel: 'TABLE',
signalEvent: 'smeserver-nfsshare-update',
html: [
{
Name: 'params',
route: 'PARAMS',
Header: 'NFS Share Contrib',
SubHeader: 'Manage NFS Ibay settings:',
Paragraph1: 'These parameters will be effective only if the share is enabled. The share is in /home/e-smith/files/ibays//files',
Input1: {
Name: 'IbayName',
Type: 'Text',
Label: 'Information Bay name',
Value: 'stash("IbayName")',
},
Input2: {
Name: 'ShareOwnerGrp',
Type: 'Selection',
Label: 'Share owner Group',
Value: [
'Write = admin, Read = group',
'Write = group, Read = everyone',
'Write = group, Read = group',
],
Default: 0,
},
Input3: {
Name: 'EnableNFSshare',
Type: 'Selection',
Label: 'Enable the NFS Share',
Value: [
'Disabled',
'Enabled',
],
Default: 0,
},
Input4: {
Name: 'ShareOnLocalNetwork',
Type: 'Selection',
Label: 'EnableShare on local network',
Value: [
'Disabled',
'Enabled',
],
Default: 0,
},
Paragraph2: 'For writing permissions,allowing the root user and using insecure ports, you must configure a list of one IP per line, being part of the local network(s).',
Input5: {
Name: 'NFSClientsAllowed',
Type: 'Textarea',
Label: 'NFS Client(s) allowed',
rows : 5
},
Input6: {
Name: 'FileSystemPermissions',
Type: 'Selection',
Label: 'File system permissions',
Value: [
'Read only',
'Read and Write',
],
Default: 0,
},
Input7: {
Name: 'WriteAsync',
Type: 'Selection',
Label: 'Write (a)synchronously',
Value: [
'Synchronous',
'Asynchronous',
],
},
Input8: {
Name: 'DelayWrite',
Type: 'Selection',
Label: 'Delays the disk writing',
Value: [
'Write delay',
'No write delay',
],
Default: 1,
},
Input9: {
Name: 'Squash',
Type: 'Selection',
Label: 'Squash the power of users',
Value: [
'All users squash',
'No root squash',
'root squash',
],
Default: 2,
},
Input10: {
Name: 'BrowseParents',
Type: 'Selection',
Label: 'Browse the parent folders',
Value: [
'Hide folder',
'Show folder',
],
Default: 0,
},
Input11: {
Name: 'SecurePorts',
Type: 'Selection',
Label: 'Requests on secure ports',
Value: [
'Secure',
'Insecure',
],
Default: 0,
},
Paragraph3: 'Set the uid and gid if you want all requests appear to be from one user or one group, otherwise leave blank',
Input12: {
Name: 'SetUID',
Type: 'Textinput',
Label: 'Set the UID.',
},
Input13: {
Name: 'SetGID',
Type: 'Textinput',
Label: 'Set the GID.',
},
Submit: 'Save',
},
{
Name: 'select_ibay',
route:'TABLE',
Header: 'NFS Share Contrib',
SubHeader: 'Manage NFS Ibay settings:',
Nextpanel: 'PARAMS',
Table1: {
Type:'Table',
TableControl:"ibays",
TopHeadings: ['Name','Description','Nfs status', 'Action'],
Columns: ['Name','Description','flag','Modify']
}
}
]
}

261
sm1-html-2-json5.py Normal file
View File

@ -0,0 +1,261 @@
import json
import os
import re
from bs4 import BeautifulSoup
from lxml import etree # Import lxml for HTML validation
def read_html_file(filename):
"""Read HTML content from a file."""
with open(filename, 'r', encoding='utf-8') as file:
return file.read()
def validate_html(html):
"""Validate the HTML content."""
try:
parser = etree.HTMLParser()
etree.fromstring(html, parser) # Attempt to parse the HTML
except Exception as e:
raise ValueError("Invalid HTML document") from e
def extract_data(html):
"""Extract paragraphs, inputs, tables, and pre blocks from HTML and organize them in order."""
soup = BeautifulSoup(html, 'lxml')
records = []
hidden_input_names = [
'page',
'page_stack',
'.id',
'csrf_token'
]
header_text = None
sub_header_text = None
# Counter for tables
table_counter = 0
# Extract elements while preserving order
for element in soup.find_all(['h1', 'h2', 'p', 'pre', 'input', 'select', 'textarea', 'button', 'table']):
if element.name == 'h1':
header_text = element.get_text(strip=True)
records.append({
'Type': 'Header',
'Text': header_text
})
elif element.name == 'h2':
sub_header_text = element.get_text(strip=True)
records.append({
'Type': 'SubHeader',
'Text': sub_header_text
})
elif element.name == 'p':
text = element.get_text(strip=True)
if text: # Ignore empty paragraphs
records.append({
'Type': 'Paragraph',
'Text': text
})
elif element.name == 'pre':
text = element.get_text(strip=True)
if text: # Ensure non-empty before adding
records.append({
'Type': 'Preformatted',
'Text': text
})
elif element.name == 'input':
if element.get('type') == 'hidden' or element.get('name') in hidden_input_names:
continue
input_info = {
'Type': element.get('type', 'text').capitalize(),
'Name': element.get('name'),
'Value': element.get('value', ''),
}
label = element.find_next('label')
input_info['Label'] = label.get_text(strip=True) if label else None
records.append(input_info)
elif element.name == 'select':
options = [{'Value': option.get('value'), 'Text': option.get_text(strip=True)} for option in element.find_all('option')]
select_info = {
'Type': 'Select',
'Name': element.get('name'),
'Options': options,
'Label': element.find_previous('label').get_text(strip=True) if element.find_previous('label') else None,
}
records.append(select_info)
elif element.name == 'textarea':
textarea_info = {
'Type': 'Textarea',
'Name': element.get('name'),
'Value': element.get_text(strip=True),
}
label = element.find_previous('label')
textarea_info['Label'] = label.get_text(strip=True) if label else None
records.append(textarea_info)
elif element.name == 'button':
button_info = {
'Type': 'Button',
'Name': element.get('name'),
'Value': element.get_text(strip=True),
'Label': element.find_previous('label').get_text(strip=True) if label else None,
}
records.append(button_info)
elif element.name == 'table' and 'sme-border' in element.get('class', []):
# Increment the table counter
table_counter += 1
# Prepare the TableControl format
table_control = f"Table{table_counter}" # e.g., "Table1", "Table2"
top_headings = []
columns = []
# Extract headings from the first row
first_row = element.find('tr')
if first_row:
for th in first_row.find_all('th'):
top_headings.append(th.get_text(strip=True))
# Extract only the first data row's cell values for Columns
data_rows = element.find_all('tr')[1:] # Skip the heading row
if data_rows:
first_data_row = data_rows[0] # Take the first row of data
for idx, th in enumerate(first_row.find_all('th')):
td = first_data_row.find_all('td')[idx] if idx < len(first_data_row.find_all('td')) else None
if td:
columns.append(f"{table_control}-{th.get_text(strip=True)}") # Format as desired
records.append({
'Type': 'Table',
'TableControl': table_control,
'TopHeadings': top_headings,
'Columns': columns,
})
return records, header_text, sub_header_text
def insert_spaces_before_caps(text):
"""Insert spaces before each capital letter in a given string."""
return re.sub(r'(?<!^)(?=[A-Z])', ' ', text)
def save_to_json5(data, output_filename, package_name, header, sub_header):
"""Save extracted data to a JSON5 file with a specific structure."""
# Generate prefix from uppercase letters in PackageName
prefix = ''.join(re.findall(r'[A-Z]', package_name))
# Prepare structured html list
structured_html = []
paragraph_count = 1
preformatted_count = 1
input_count = 1
table_count = 1
for record in data:
if record['Type'] == 'Paragraph':
structured_html.append({
f'Paragraph{paragraph_count}': record['Text']
})
paragraph_count += 1
elif record['Type'] == 'Preformatted':
structured_html.append({
f'Preformatted{preformatted_count}': record['Text']
})
preformatted_count += 1
elif record['Type'] == 'Header' or record['Type'] == 'SubHeader':
continue # Skip headers for input count
elif record['Type'] == 'Table':
# Construct the table entry
table_structure = {
'Type': record['Type'],
'TableControl': record['TableControl'],
'TopHeadings': record['TopHeadings'],
'Columns': record['Columns']
}
structured_html.append({
f'Table{table_count}': table_structure
})
table_count += 1
else: # For inputs, selects, textareas, and buttons
input_structure = {
'Type': record['Type'],
'Value': record.get('Value', ''), # Safely access Value
}
# Use .get() for the Name key to avoid KeyError
input_structure['Name'] = record.get('Name', None) # Set to None if not present
input_structure['Label'] = record.get('Label', None) # Set to None if not present
# Handle specific case for Select options
if 'Options' in record:
input_structure['Options'] = record['Options']
structured_html.append({
f'Input{input_count}': input_structure
})
input_count += 1
# Wrap the records with the required fields
json5_data = {
'PackageName': package_name,
'prefix': prefix,
'MenuHeading': 'Miscellaneous',
'MenuDescription': insert_spaces_before_caps(package_name),
'MenuNavigation': '2000 400',
'firstPanel': 'PARAMS',
'signalEvent': f'smeserver-{package_name.lower()}-update',
'html': {
'Name': 'params',
'route': 'PARAMS',
'Header': header if header else f'{package_name} Contrib',
'SubHeader': sub_header if sub_header else f'Manage {package_name} settings:',
**{k: v for item in structured_html for k, v in item.items()} # Flatten the structured_html into the dict
}
}
# Save in JSON5 format (JSON with comments and unquoted keys)
with open(output_filename, 'w', encoding='utf-8') as json_file:
json.dump(json5_data, json_file, ensure_ascii=False, indent=4)
# Manually format as JSON5 by adding single quotes (for simplicity)
with open(output_filename, 'r+', encoding='utf-8') as json_file:
content = json_file.read()
content = content.replace('"', "'") # Replace double quotes with single quotes for JSON5
json_file.seek(0)
json_file.write(content)
json_file.truncate() # Remove any old content beyond the new content length
def main():
input_file = '/home/brianr/clients/SM2/SM1-JSONGen/DiskUsage.html' # Specify the input HTML file path
# Read HTML content
html_content = read_html_file(input_file)
# Validate the HTML before extracting data
validate_html(html_content)
# Extract data from HTML
data, header, sub_header = extract_data(html_content)
# Generate output JSON5 filename based on input file name
base_name = os.path.basename(input_file) # Get the file name (with extension)
package_name = os.path.splitext(base_name)[0] # Use the filename without extension
json_filename = package_name + '.json5' # Change extension to .json5
# Create the output file path in the same directory
output_directory = os.path.dirname(input_file)
output_file = os.path.join(output_directory, json_filename)
# Save extracted data to JSON5
save_to_json5(data, output_file, package_name, header, sub_header)
print(f"Extracted data saved to '{output_file}'.")
if __name__ == '__main__':
main()