Add in preformat and sm1 html to json5 extractor program
This commit is contained in:
parent
5c5a3bfba2
commit
2ee6bd3bb6
@ -118,4 +118,11 @@
|
||||
]]>
|
||||
</Table>
|
||||
|
||||
<Preformatted><![CDATA[
|
||||
<pre>
|
||||
${value}'
|
||||
</pre>
|
||||
]]>
|
||||
</Preformatted>
|
||||
|
||||
</root>
|
||||
|
102
html/CreateStarterWebsite.html
Normal file
102
html/CreateStarterWebsite.html
Normal file
@ -0,0 +1,102 @@
|
||||
|
||||
<!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||
"http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>SME Server sme11.thereadclan.me.uk</title>
|
||||
<link rev="made" href="mailto:bugs%40koozali.org">
|
||||
<meta name="copyright" content="(head.tmpl)Copyright 2003-2004 Mitel Corporation">
|
||||
<link rel="stylesheet" type="text/css" href="/server-common/css/sme_core.css">
|
||||
<style type="text/css">
|
||||
@import url("/server-common/css/sme_main.css");
|
||||
</style>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<div class="sme-error"><h5>
|
||||
Warning: a reconfigure and reboot is required before proceeding! Failure to do so now
|
||||
may leave your system in an unknown state!</h5></div><div class="sme-error"><h5>
|
||||
URGENT NOTICE: As per June 30th 2024, SME Server 10 is obsolete, and potentially INSECURE. NO support will be offered for any issue found with this installed version.
|
||||
Please migrate IMMEDIATELY to Koozali SME Server 11 or higher version. Failure to upgrade may lead to the compromise of this server.
|
||||
</br>Please, consult <a href="https://wiki.koozali.org/SME_Server:Download" target="_blank">https://wiki.koozali.org/SME_Server:Download</a> to get last available version.</h5></div>
|
||||
<h1>Create a starter website</h1>
|
||||
<form method="POST" action="starterwebsite" enctype="application/x-www-form-urlencoded">
|
||||
<input type="hidden" name="page" value="0">
|
||||
<input type="hidden" name="page_stack" value="">
|
||||
<input type=hidden name=".id" value="53ee9f713d94b5ba86a563429440d21e">
|
||||
<input type="hidden" name="csrf_token" value="iEskwobsBAfGQ8A05yW8QPL7guJPZVEylHcHb4RlmkB">
|
||||
<table class="sme-noborders">
|
||||
<tr><td colspan="2"><p><p>
|
||||
To create a simple web page for your company, fill
|
||||
in the fields below and click on <b>Create</b>.
|
||||
</p>
|
||||
<p>
|
||||
You can leave any field blank if you do not need it.
|
||||
</p>
|
||||
<p>
|
||||
The text that you enter below will be line wrapped
|
||||
for a nicer appearance in your web page. Leave a blank line
|
||||
whenever you want to start a new paragraph. If you need
|
||||
to force a line break without starting a new paragraph (for
|
||||
example after each line of a mailing address), then type
|
||||
the four-character sequence
|
||||
<blockquote><BR></blockquote>
|
||||
where you would like each line break.
|
||||
</p>
|
||||
<p>
|
||||
<em>Do not use this option</em>
|
||||
if you have already customized your web site, since it will
|
||||
overwrite the "index.htm" file in your web site directory.
|
||||
</p></p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="sme-noborders-label">Company name
|
||||
<td class="sme-noborders-content"><INPUT TYPE="text" SIZE="48" VALUE="" NAME="companyName"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2"><p>First header, typically used for short phrases such
|
||||
as "Leader in the field of textile manufacturing"</p></td>
|
||||
</tr> <tr>
|
||||
<td class="sme-noborders-label">
|
||||
<td class="sme-noborders-content"><INPUT TYPE="text" NAME="header1" SIZE="48" VALUE=""></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2"><p>Text following first header, typically used for a
|
||||
paragraph of marketing information.</p></td>
|
||||
</tr> <tr>
|
||||
<td class="sme-noborders-label">
|
||||
<td class="sme-noborders-content"><TEXTAREA COLS="60" ROWS="5" NAME="text1"></TEXTAREA></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2"><p>Second header, typically used for short phrases such
|
||||
as "For more information" or "To order our products":</p></td>
|
||||
</tr> <tr>
|
||||
<td class="sme-noborders-label">
|
||||
<td class="sme-noborders-content"><INPUT TYPE="text" NAME="header2" VALUE="" SIZE="48"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2"><p>Text following second header, typically used for contact
|
||||
or ordering information:</p></td>
|
||||
</tr> <tr>
|
||||
<td class="sme-noborders-label">
|
||||
<td class="sme-noborders-content"><TEXTAREA ROWS="5" NAME="text2" COLS="60"></TEXTAREA></td>
|
||||
</tr>
|
||||
<tr><td colspan=2><p>When you create this web page, the file
|
||||
"index.htm" will be overwritten
|
||||
in your web site directory.</p>
|
||||
<p>Do you wish to proceed?</p></td></tr> </table>
|
||||
<table width=100%><tr><th class="sme-layout"><input type="submit" name="Next" value="Create"></th></tr></table>
|
||||
</table>
|
||||
</form>
|
||||
|
||||
<HR class="sme-copyrightbar">
|
||||
<FONT class="sme-copyright">
|
||||
SME Server 11.0.0<BR>Copyright 1999-2006 Mitel Corporation<BR>All rights reserved.
|
||||
|
||||
<BR>Copyright (c) 2013 - 2021 Koozali Foundation Inc.<BR>
|
||||
</FONT>
|
||||
</BODY>
|
||||
|
||||
</HTML>
|
||||
|
93
html/DiskUsage.html
Normal file
93
html/DiskUsage.html
Normal file
@ -0,0 +1,93 @@
|
||||
<!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||
"http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<title>SME Server sme10.thereadclan.me.uk</title>
|
||||
<link rev="made" href="mailto:bugs%40koozali.org">
|
||||
<meta name="copyright" content="(head.tmpl)Copyright 2003-2004 Mitel Corporation">
|
||||
<link rel="stylesheet" type="text/css" href="/server-common/css/sme_core.css">
|
||||
<style type="text/css">
|
||||
@import url("/server-common/css/sme_main.css");
|
||||
</style>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Disk usage</h1>
|
||||
<form method="POST" action="diskusage" enctype="application/x-www-form-urlencoded">
|
||||
<input type="hidden" name="page" value="0">
|
||||
<input type="hidden" name="page_stack" value="">
|
||||
<input type=hidden name=".id" value="65306129a4e734ea46f31f7f8630a65d">
|
||||
<input type="hidden" name="csrf_token" value="QbCmcx81coESLmdiNA5k9GiTKg2k7sJQLiY00BxtwXS">
|
||||
<table class="sme-noborders">
|
||||
<p> 
|
||||
</p>
|
||||
<p>Wed Sep 11 19:01:00 BST 2024
|
||||
</p>
|
||||
<p><pre>Filesystem Size Used Avail Use% Mounted on
|
||||
/dev/mapper/main-root 29G 4.8G 24G 17% /
|
||||
devtmpfs 2.0G 0 2.0G 0% /dev
|
||||
tmpfs 2.0G 12K 2.0G 1% /dev/shm
|
||||
tmpfs 2.0G 0 2.0G 0% /sys/fs/cgroup
|
||||
tmpfs 2.0G 648K 2.0G 1% /run
|
||||
/dev/sda1 497M 111M 386M 23% /boot
|
||||
</pre></p>
|
||||
<table class="sme-border">
|
||||
<tr>
|
||||
<th colspan="1" class="sme-border">I-bays</th>
|
||||
<th colspan="1" class="sme-border">Usage</th>
|
||||
<th colspan="1" class="sme-border">Path</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="1" class="sme-border">Primary</td>
|
||||
<td colspan="1" class="sme-border">4.0K</td>
|
||||
<td colspan="1" class="sme-border">/home/e-smith/files/ibays/Primary</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="1" class="sme-border">anibay</td>
|
||||
<td colspan="1" class="sme-border">4.0K</td>
|
||||
<td colspan="1" class="sme-border">/home/e-smith/files/ibays/anibay</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="1" class="sme-border">opt</td>
|
||||
<td colspan="1" class="sme-border">410M</td>
|
||||
<td colspan="1" class="sme-border">/opt</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> 
|
||||
</p>
|
||||
<table class="sme-border">
|
||||
<tr>
|
||||
<th colspan="1" class="sme-border">Users</th>
|
||||
<th colspan="1" class="sme-border">Usage</th>
|
||||
<th colspan="1" class="sme-border">Path</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="1" class="sme-border">root</td>
|
||||
<td colspan="1" class="sme-border">2.1M</td>
|
||||
<td colspan="1" class="sme-border">/root</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="1" class="sme-border">brianr</td>
|
||||
<td colspan="1" class="sme-border">16K</td>
|
||||
<td colspan="1" class="sme-border">/home/e-smith/files/users/brianr</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</table>
|
||||
</form>
|
||||
|
||||
<HR class="sme-copyrightbar">
|
||||
<FONT class="sme-copyright">
|
||||
SME Server 10.1
|
||||
<BR>Copyright 1999-2006 Mitel Corporation
|
||||
<BR>All rights reserved.
|
||||
|
||||
<BR>Copyright (c) 2013 - 2021 Koozali Foundation Inc.
|
||||
<BR>
|
||||
</FONT>
|
||||
</BODY>
|
||||
|
||||
</HTML>
|
61
json5/CreateStarterWebsite.json5
Normal file
61
json5/CreateStarterWebsite.json5
Normal file
@ -0,0 +1,61 @@
|
||||
{
|
||||
'PackageName': 'CreateStarterWebsite',
|
||||
'prefix': 'CSW',
|
||||
'MenuHeading': 'Miscellaneous',
|
||||
'MenuDescription': 'Create Starter Website',
|
||||
'MenuNavigation': '2000 400',
|
||||
'firstPanel': 'PARAMS',
|
||||
'signalEvent': 'smeserver-createstarterwebsite-update',
|
||||
'html': {
|
||||
'Name': 'params',
|
||||
'route': 'PARAMS',
|
||||
'Header': 'Create a starter website',
|
||||
'SubHeader': 'Manage CreateStarterWebsite settings:',
|
||||
'Paragraph1': 'To create a simple web page for your company, fill \n\t in the fields below and click onCreate.',
|
||||
'Paragraph2': 'You can leave any field blank if you do not need it.',
|
||||
'Paragraph3': 'The text that you enter below will be line wrapped \n\t for a nicer appearance in your web page. Leave a blank line \n\t whenever you want to start a new paragraph. If you need \n\t to force a line break without starting a new paragraph (for \n\t example after each line of a mailing address), then type \n\t the four-character sequence',
|
||||
'Paragraph4': 'Do not use this optionif you have already customized your web site, since it will \n\t overwrite the \'index.htm\' file in your web site directory.',
|
||||
'Input1': {
|
||||
'Type': 'Text',
|
||||
'Value': '',
|
||||
'Name': 'companyName',
|
||||
'Label': null
|
||||
},
|
||||
'Paragraph5': 'First header, typically used for short phrases such \n\t as \'Leader in the field of textile manufacturing\'',
|
||||
'Input2': {
|
||||
'Type': 'Text',
|
||||
'Value': '',
|
||||
'Name': 'header1',
|
||||
'Label': null
|
||||
},
|
||||
'Paragraph6': 'Text following first header, typically used for a \n\t paragraph of marketing information.',
|
||||
'Input3': {
|
||||
'Type': 'Textarea',
|
||||
'Value': '',
|
||||
'Name': 'text1',
|
||||
'Label': null
|
||||
},
|
||||
'Paragraph7': 'Second header, typically used for short phrases such \n\t as \'For more information\' or \'To order our products\':',
|
||||
'Input4': {
|
||||
'Type': 'Text',
|
||||
'Value': '',
|
||||
'Name': 'header2',
|
||||
'Label': null
|
||||
},
|
||||
'Paragraph8': 'Text following second header, typically used for contact \n\t or ordering information:',
|
||||
'Input5': {
|
||||
'Type': 'Textarea',
|
||||
'Value': '',
|
||||
'Name': 'text2',
|
||||
'Label': null
|
||||
},
|
||||
'Paragraph9': 'When you create this web page, the file\n\t \'index.htm\' will be overwritten\n\t in your web site directory.',
|
||||
'Paragraph10': 'Do you wish to proceed?',
|
||||
'Input6': {
|
||||
'Type': 'Submit',
|
||||
'Value': 'Create',
|
||||
'Name': 'Next',
|
||||
'Label': null
|
||||
}
|
||||
}
|
||||
}
|
47
json5/DiskUsage.json5
Normal file
47
json5/DiskUsage.json5
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
'PackageName': 'DiskUsage',
|
||||
'prefix': 'DU',
|
||||
'MenuHeading': 'Miscellaneous',
|
||||
'MenuDescription': 'Disk Usage',
|
||||
'MenuNavigation': '2000 400',
|
||||
'firstPanel': 'PARAMS',
|
||||
'signalEvent': 'smeserver-diskusage-update',
|
||||
'html': {
|
||||
'Name': 'params',
|
||||
'route': 'PARAMS',
|
||||
'Header': 'Disk usage',
|
||||
'SubHeader': 'Manage DiskUsage settings:',
|
||||
'Paragraph1': ' ',
|
||||
'Paragraph2': 'Wed Sep 11 19:01:00 BST 2024',
|
||||
'Preformatted1': 'Filesystem Size Used Avail Use% Mounted on\n/dev/mapper/main-root 29G 4.8G 24G 17% /\ndevtmpfs 2.0G 0 2.0G 0% /dev\ntmpfs 2.0G 12K 2.0G 1% /dev/shm\ntmpfs 2.0G 0 2.0G 0% /sys/fs/cgroup\ntmpfs 2.0G 648K 2.0G 1% /run\n/dev/sda1 497M 111M 386M 23% /boot',
|
||||
'Table1': {
|
||||
'Type': 'Table',
|
||||
'TableControl': 'Table1',
|
||||
'TopHeadings': [
|
||||
'I-bays',
|
||||
'Usage',
|
||||
'Path'
|
||||
],
|
||||
'Columns': [
|
||||
'Table1-I-bays',
|
||||
'Table1-Usage',
|
||||
'Table1-Path'
|
||||
]
|
||||
},
|
||||
'Paragraph3': ' ',
|
||||
'Table2': {
|
||||
'Type': 'Table',
|
||||
'TableControl': 'Table2',
|
||||
'TopHeadings': [
|
||||
'Users',
|
||||
'Usage',
|
||||
'Path'
|
||||
],
|
||||
'Columns': [
|
||||
'Table2-Users',
|
||||
'Table2-Usage',
|
||||
'Table2-Path'
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
147
json5/nfsshare.json5
Normal file
147
json5/nfsshare.json5
Normal file
@ -0,0 +1,147 @@
|
||||
{
|
||||
PackageName: 'Nfsshare',
|
||||
prefix: 'nfs',
|
||||
MenuHeading: 'Network',
|
||||
MenuDescription: 'NFS data share',
|
||||
MenuNavigation: '2000 400',
|
||||
firstPanel: 'TABLE',
|
||||
signalEvent: 'smeserver-nfsshare-update',
|
||||
html: [
|
||||
{
|
||||
Name: 'params',
|
||||
route: 'PARAMS',
|
||||
Header: 'NFS Share Contrib',
|
||||
SubHeader: 'Manage NFS Ibay settings:',
|
||||
Paragraph1: 'These parameters will be effective only if the share is enabled. The share is in /home/e-smith/files/ibays//files',
|
||||
Input1: {
|
||||
Name: 'IbayName',
|
||||
Type: 'Text',
|
||||
Label: 'Information Bay name',
|
||||
Value: 'stash("IbayName")',
|
||||
},
|
||||
Input2: {
|
||||
Name: 'ShareOwnerGrp',
|
||||
Type: 'Selection',
|
||||
Label: 'Share owner Group',
|
||||
Value: [
|
||||
'Write = admin, Read = group',
|
||||
'Write = group, Read = everyone',
|
||||
'Write = group, Read = group',
|
||||
],
|
||||
Default: 0,
|
||||
},
|
||||
Input3: {
|
||||
Name: 'EnableNFSshare',
|
||||
Type: 'Selection',
|
||||
Label: 'Enable the NFS Share',
|
||||
Value: [
|
||||
'Disabled',
|
||||
'Enabled',
|
||||
],
|
||||
Default: 0,
|
||||
},
|
||||
Input4: {
|
||||
Name: 'ShareOnLocalNetwork',
|
||||
Type: 'Selection',
|
||||
Label: 'EnableShare on local network',
|
||||
Value: [
|
||||
'Disabled',
|
||||
'Enabled',
|
||||
],
|
||||
Default: 0,
|
||||
},
|
||||
Paragraph2: 'For writing permissions,allowing the root user and using insecure ports, you must configure a list of one IP per line, being part of the local network(s).',
|
||||
Input5: {
|
||||
Name: 'NFSClientsAllowed',
|
||||
Type: 'Textarea',
|
||||
Label: 'NFS Client(s) allowed',
|
||||
rows : 5
|
||||
},
|
||||
Input6: {
|
||||
Name: 'FileSystemPermissions',
|
||||
Type: 'Selection',
|
||||
Label: 'File system permissions',
|
||||
Value: [
|
||||
'Read only',
|
||||
'Read and Write',
|
||||
],
|
||||
Default: 0,
|
||||
},
|
||||
Input7: {
|
||||
Name: 'WriteAsync',
|
||||
Type: 'Selection',
|
||||
Label: 'Write (a)synchronously',
|
||||
Value: [
|
||||
'Synchronous',
|
||||
'Asynchronous',
|
||||
],
|
||||
},
|
||||
Input8: {
|
||||
Name: 'DelayWrite',
|
||||
Type: 'Selection',
|
||||
Label: 'Delays the disk writing',
|
||||
Value: [
|
||||
'Write delay',
|
||||
'No write delay',
|
||||
],
|
||||
Default: 1,
|
||||
},
|
||||
Input9: {
|
||||
Name: 'Squash',
|
||||
Type: 'Selection',
|
||||
Label: 'Squash the power of users',
|
||||
Value: [
|
||||
'All users squash',
|
||||
'No root squash',
|
||||
'root squash',
|
||||
],
|
||||
Default: 2,
|
||||
},
|
||||
Input10: {
|
||||
Name: 'BrowseParents',
|
||||
Type: 'Selection',
|
||||
Label: 'Browse the parent folders',
|
||||
Value: [
|
||||
'Hide folder',
|
||||
'Show folder',
|
||||
],
|
||||
Default: 0,
|
||||
},
|
||||
Input11: {
|
||||
Name: 'SecurePorts',
|
||||
Type: 'Selection',
|
||||
Label: 'Requests on secure ports',
|
||||
Value: [
|
||||
'Secure',
|
||||
'Insecure',
|
||||
],
|
||||
Default: 0,
|
||||
},
|
||||
Paragraph3: 'Set the uid and gid if you want all requests appear to be from one user or one group, otherwise leave blank',
|
||||
Input12: {
|
||||
Name: 'SetUID',
|
||||
Type: 'Textinput',
|
||||
Label: 'Set the UID.',
|
||||
},
|
||||
Input13: {
|
||||
Name: 'SetGID',
|
||||
Type: 'Textinput',
|
||||
Label: 'Set the GID.',
|
||||
},
|
||||
Submit: 'Save',
|
||||
},
|
||||
{
|
||||
Name: 'select_ibay',
|
||||
route:'TABLE',
|
||||
Header: 'NFS Share Contrib',
|
||||
SubHeader: 'Manage NFS Ibay settings:',
|
||||
Nextpanel: 'PARAMS',
|
||||
Table1: {
|
||||
Type:'Table',
|
||||
TableControl:"ibays",
|
||||
TopHeadings: ['Name','Description','Nfs status', 'Action'],
|
||||
Columns: ['Name','Description','flag','Modify']
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
261
sm1-html-2-json5.py
Normal file
261
sm1-html-2-json5.py
Normal file
@ -0,0 +1,261 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import etree # Import lxml for HTML validation
|
||||
|
||||
def read_html_file(filename):
|
||||
"""Read HTML content from a file."""
|
||||
with open(filename, 'r', encoding='utf-8') as file:
|
||||
return file.read()
|
||||
|
||||
def validate_html(html):
|
||||
"""Validate the HTML content."""
|
||||
try:
|
||||
parser = etree.HTMLParser()
|
||||
etree.fromstring(html, parser) # Attempt to parse the HTML
|
||||
except Exception as e:
|
||||
raise ValueError("Invalid HTML document") from e
|
||||
|
||||
def extract_data(html):
|
||||
"""Extract paragraphs, inputs, tables, and pre blocks from HTML and organize them in order."""
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
records = []
|
||||
|
||||
hidden_input_names = [
|
||||
'page',
|
||||
'page_stack',
|
||||
'.id',
|
||||
'csrf_token'
|
||||
]
|
||||
|
||||
header_text = None
|
||||
sub_header_text = None
|
||||
|
||||
# Counter for tables
|
||||
table_counter = 0
|
||||
|
||||
# Extract elements while preserving order
|
||||
for element in soup.find_all(['h1', 'h2', 'p', 'pre', 'input', 'select', 'textarea', 'button', 'table']):
|
||||
if element.name == 'h1':
|
||||
header_text = element.get_text(strip=True)
|
||||
records.append({
|
||||
'Type': 'Header',
|
||||
'Text': header_text
|
||||
})
|
||||
|
||||
elif element.name == 'h2':
|
||||
sub_header_text = element.get_text(strip=True)
|
||||
records.append({
|
||||
'Type': 'SubHeader',
|
||||
'Text': sub_header_text
|
||||
})
|
||||
|
||||
elif element.name == 'p':
|
||||
text = element.get_text(strip=True)
|
||||
if text: # Ignore empty paragraphs
|
||||
records.append({
|
||||
'Type': 'Paragraph',
|
||||
'Text': text
|
||||
})
|
||||
|
||||
elif element.name == 'pre':
|
||||
text = element.get_text(strip=True)
|
||||
if text: # Ensure non-empty before adding
|
||||
records.append({
|
||||
'Type': 'Preformatted',
|
||||
'Text': text
|
||||
})
|
||||
|
||||
elif element.name == 'input':
|
||||
if element.get('type') == 'hidden' or element.get('name') in hidden_input_names:
|
||||
continue
|
||||
|
||||
input_info = {
|
||||
'Type': element.get('type', 'text').capitalize(),
|
||||
'Name': element.get('name'),
|
||||
'Value': element.get('value', ''),
|
||||
}
|
||||
label = element.find_next('label')
|
||||
input_info['Label'] = label.get_text(strip=True) if label else None
|
||||
records.append(input_info)
|
||||
|
||||
elif element.name == 'select':
|
||||
options = [{'Value': option.get('value'), 'Text': option.get_text(strip=True)} for option in element.find_all('option')]
|
||||
select_info = {
|
||||
'Type': 'Select',
|
||||
'Name': element.get('name'),
|
||||
'Options': options,
|
||||
'Label': element.find_previous('label').get_text(strip=True) if element.find_previous('label') else None,
|
||||
}
|
||||
records.append(select_info)
|
||||
|
||||
elif element.name == 'textarea':
|
||||
textarea_info = {
|
||||
'Type': 'Textarea',
|
||||
'Name': element.get('name'),
|
||||
'Value': element.get_text(strip=True),
|
||||
}
|
||||
label = element.find_previous('label')
|
||||
textarea_info['Label'] = label.get_text(strip=True) if label else None
|
||||
records.append(textarea_info)
|
||||
|
||||
elif element.name == 'button':
|
||||
button_info = {
|
||||
'Type': 'Button',
|
||||
'Name': element.get('name'),
|
||||
'Value': element.get_text(strip=True),
|
||||
'Label': element.find_previous('label').get_text(strip=True) if label else None,
|
||||
}
|
||||
records.append(button_info)
|
||||
|
||||
elif element.name == 'table' and 'sme-border' in element.get('class', []):
|
||||
# Increment the table counter
|
||||
table_counter += 1
|
||||
|
||||
# Prepare the TableControl format
|
||||
table_control = f"Table{table_counter}" # e.g., "Table1", "Table2"
|
||||
top_headings = []
|
||||
columns = []
|
||||
|
||||
# Extract headings from the first row
|
||||
first_row = element.find('tr')
|
||||
if first_row:
|
||||
for th in first_row.find_all('th'):
|
||||
top_headings.append(th.get_text(strip=True))
|
||||
|
||||
# Extract only the first data row's cell values for Columns
|
||||
data_rows = element.find_all('tr')[1:] # Skip the heading row
|
||||
if data_rows:
|
||||
first_data_row = data_rows[0] # Take the first row of data
|
||||
for idx, th in enumerate(first_row.find_all('th')):
|
||||
td = first_data_row.find_all('td')[idx] if idx < len(first_data_row.find_all('td')) else None
|
||||
if td:
|
||||
columns.append(f"{table_control}-{th.get_text(strip=True)}") # Format as desired
|
||||
|
||||
records.append({
|
||||
'Type': 'Table',
|
||||
'TableControl': table_control,
|
||||
'TopHeadings': top_headings,
|
||||
'Columns': columns,
|
||||
})
|
||||
|
||||
return records, header_text, sub_header_text
|
||||
|
||||
def insert_spaces_before_caps(text):
|
||||
"""Insert spaces before each capital letter in a given string."""
|
||||
return re.sub(r'(?<!^)(?=[A-Z])', ' ', text)
|
||||
|
||||
def save_to_json5(data, output_filename, package_name, header, sub_header):
|
||||
"""Save extracted data to a JSON5 file with a specific structure."""
|
||||
# Generate prefix from uppercase letters in PackageName
|
||||
prefix = ''.join(re.findall(r'[A-Z]', package_name))
|
||||
|
||||
# Prepare structured html list
|
||||
structured_html = []
|
||||
paragraph_count = 1
|
||||
preformatted_count = 1
|
||||
input_count = 1
|
||||
table_count = 1
|
||||
|
||||
for record in data:
|
||||
if record['Type'] == 'Paragraph':
|
||||
structured_html.append({
|
||||
f'Paragraph{paragraph_count}': record['Text']
|
||||
})
|
||||
paragraph_count += 1
|
||||
elif record['Type'] == 'Preformatted':
|
||||
structured_html.append({
|
||||
f'Preformatted{preformatted_count}': record['Text']
|
||||
})
|
||||
preformatted_count += 1
|
||||
elif record['Type'] == 'Header' or record['Type'] == 'SubHeader':
|
||||
continue # Skip headers for input count
|
||||
elif record['Type'] == 'Table':
|
||||
# Construct the table entry
|
||||
table_structure = {
|
||||
'Type': record['Type'],
|
||||
'TableControl': record['TableControl'],
|
||||
'TopHeadings': record['TopHeadings'],
|
||||
'Columns': record['Columns']
|
||||
}
|
||||
structured_html.append({
|
||||
f'Table{table_count}': table_structure
|
||||
})
|
||||
table_count += 1
|
||||
else: # For inputs, selects, textareas, and buttons
|
||||
input_structure = {
|
||||
'Type': record['Type'],
|
||||
'Value': record.get('Value', ''), # Safely access Value
|
||||
}
|
||||
|
||||
# Use .get() for the Name key to avoid KeyError
|
||||
input_structure['Name'] = record.get('Name', None) # Set to None if not present
|
||||
input_structure['Label'] = record.get('Label', None) # Set to None if not present
|
||||
|
||||
# Handle specific case for Select options
|
||||
if 'Options' in record:
|
||||
input_structure['Options'] = record['Options']
|
||||
|
||||
structured_html.append({
|
||||
f'Input{input_count}': input_structure
|
||||
})
|
||||
input_count += 1
|
||||
|
||||
# Wrap the records with the required fields
|
||||
json5_data = {
|
||||
'PackageName': package_name,
|
||||
'prefix': prefix,
|
||||
'MenuHeading': 'Miscellaneous',
|
||||
'MenuDescription': insert_spaces_before_caps(package_name),
|
||||
'MenuNavigation': '2000 400',
|
||||
'firstPanel': 'PARAMS',
|
||||
'signalEvent': f'smeserver-{package_name.lower()}-update',
|
||||
'html': {
|
||||
'Name': 'params',
|
||||
'route': 'PARAMS',
|
||||
'Header': header if header else f'{package_name} Contrib',
|
||||
'SubHeader': sub_header if sub_header else f'Manage {package_name} settings:',
|
||||
**{k: v for item in structured_html for k, v in item.items()} # Flatten the structured_html into the dict
|
||||
}
|
||||
}
|
||||
|
||||
# Save in JSON5 format (JSON with comments and unquoted keys)
|
||||
with open(output_filename, 'w', encoding='utf-8') as json_file:
|
||||
json.dump(json5_data, json_file, ensure_ascii=False, indent=4)
|
||||
|
||||
# Manually format as JSON5 by adding single quotes (for simplicity)
|
||||
with open(output_filename, 'r+', encoding='utf-8') as json_file:
|
||||
content = json_file.read()
|
||||
content = content.replace('"', "'") # Replace double quotes with single quotes for JSON5
|
||||
json_file.seek(0)
|
||||
json_file.write(content)
|
||||
json_file.truncate() # Remove any old content beyond the new content length
|
||||
|
||||
def main():
|
||||
input_file = '/home/brianr/clients/SM2/SM1-JSONGen/DiskUsage.html' # Specify the input HTML file path
|
||||
|
||||
# Read HTML content
|
||||
html_content = read_html_file(input_file)
|
||||
|
||||
# Validate the HTML before extracting data
|
||||
validate_html(html_content)
|
||||
|
||||
# Extract data from HTML
|
||||
data, header, sub_header = extract_data(html_content)
|
||||
|
||||
# Generate output JSON5 filename based on input file name
|
||||
base_name = os.path.basename(input_file) # Get the file name (with extension)
|
||||
package_name = os.path.splitext(base_name)[0] # Use the filename without extension
|
||||
json_filename = package_name + '.json5' # Change extension to .json5
|
||||
|
||||
# Create the output file path in the same directory
|
||||
output_directory = os.path.dirname(input_file)
|
||||
output_file = os.path.join(output_directory, json_filename)
|
||||
|
||||
# Save extracted data to JSON5
|
||||
save_to_json5(data, output_file, package_name, header, sub_header)
|
||||
print(f"Extracted data saved to '{output_file}'.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user