Switch graphs to matplotlib which does not require internet access

This commit is contained in:
2025-03-31 08:44:27 +01:00
parent 4d29da7f3d
commit da71021889
7 changed files with 319 additions and 388 deletions

View File

@@ -56,14 +56,15 @@
# pip3 install numpy
# pip3 install plotly
# pip3 install pandas
# pip3 install matplotlib
#
# Rocky8: (probably - not yet checked this)
#
# dnf install python3-chameleon --enablerepo=epel
# dnf install html2text --enablerepo=epel
# dnf install python3-matplotlib
# pip3 install numpy
# pip3 pymysql
# pip3 install plotly
# pip3 install pandas
#
#
@@ -89,6 +90,12 @@ import plotly.express as px
import colorsys
import pymysql
import json
enable_graphs = True;
try:
import matplotlib.pyplot as plt
except ImportError:
print("Matplotlib is not installed - no graphs")
enable_graphs = False;
Mailstats_version = '1.2'
build_date_time = "2024-06-18 12:03:40OURCE"
@@ -129,251 +136,145 @@ PERCENT = TOTALS + 1
ColTotals = 24
ColPercent = 25
def sanitize_and_filter_data_for_stacked_bar(data2d, xLabels, yLabels, exclude_columns_labels, exclude_rows_labels):
def transform_to_dict(data, keys, iso_date):
"""
Sanitize data by removing unwanted columns and rows, and converting to numeric values.
Parameters:
- data2d (list of lists): A 2D list containing the data.
- xLabels (list): Current labels for the x-axis.
- yLabels (list): Current labels for the y-axis.
- exclude_columns_labels (list): Labels of columns to exclude from the data and x-axis.
- exclude_rows_labels (list): Labels of rows to exclude from the y-axis.
Transforms a 26x17 list of lists into a list of dictionaries with specified keys.
Args:
data (list): A 26x17 list of lists.
keys (list): A 1D array specifying the keys for the dictionaries.
iso_date (str): A date in ISO format to prepend to each row number.
Returns:
- numpy.ndarray: Sanitized 2D numpy array with numeric data.
- list: Filtered x-axis labels.
- list: Filtered y-axis labels.
list: A list of dictionaries with transformed data.
"""
def to_numeric(value):
try:
if isinstance(value, str):
# Remove any extra characters like '%' and convert to float
return float(value.replace('%', '').strip())
else:
return float(value)
except ValueError:
return 0.0 # Default to 0 if conversion fails
# Filter out columns based on their labels
exclude_columns_indices = [xLabels.index(label) for label in exclude_columns_labels if label in xLabels]
# Validate input dimensions
if len(data) != 26:
raise ValueError("Input data must have 26 rows.")
if len(keys) != len(data[0]): # Account for the new column
raise ValueError(f"Keys must match the number of columns after transformation {len(keys)} {len(data[0])}")
filtered_data2d = [
[to_numeric(value) for idx, value in enumerate(row) if idx not in exclude_columns_indices]
for row in data2d
]
# Remove rows 25 and 26
filtered_data = data[:24]
filtered_xLabels = [label for idx, label in enumerate(xLabels) if idx not in exclude_columns_indices]
# Filter out rows based on their labels
filtered_data2d = [row for label, row in zip(yLabels, filtered_data2d) if label not in exclude_rows_labels]
filtered_yLabels = [label for label in yLabels if label not in exclude_rows_labels]
# Convert filtered data to numpy array
return np.array(filtered_data2d), filtered_xLabels, filtered_yLabels
def generate_distinct_colors(num_colors):
"""Generate distinct colors using HSV color space."""
colors = []
for i in range(num_colors):
hue = i / num_colors
saturation = 0.7
value = 0.9
r, g, b = colorsys.hsv_to_rgb(hue, saturation, value)
colors.append(f'rgb({int(r * 255)},{int(g * 255)},{int(b * 255)})')
return colors
# and same for keys
modified_keys = keys[1:-2]
def create_stacked_bar_graph(data2d, xLabels, yLabels, save_path='stacked_bar_graph.html'):
"""
Creates and saves a stacked bar graph from given 2D numpy array data using Plotly.
Parameters:
- data2d (list of lists or numpy.ndarray): A 2D list or numpy array containing the data.
- xLabels (list): A list of category labels for the x-axis.
- yLabels (list): A list of labels for the y-axis (e.g., hours).
- save_path (str): The path where the plot image will be saved.
"""
# Identify columns to be removed based on their headers (label names) and indices (hours 24 and 25)
exclude_columns_labels = ["Count", "PERCENT","TOTALS"]
exclude_rows_labels = ["24:00", "25:00"]
# Add new column with ISO date and row number
transformed_data = []
for i, row in enumerate(filtered_data):
new_column_value = f"{i}" #f"{iso_date},{i}"
transformed_row = [new_column_value] + row[1:-2] # Remove first and last two columns
transformed_data.append(transformed_row)
# Ensure input yLabels correspond to the data
if len(yLabels) != len(data2d):
raise ValueError(f"The length of yLabels {len(yLabels)} must match the number of rows in the data {len(data2d)}.")
# Sanitize and filter the data
sanitized_data, filtered_xLabels, filtered_yLabels = sanitize_and_filter_data_for_stacked_bar(data2d, xLabels, yLabels, exclude_columns_labels, exclude_rows_labels)
# Ensure that the length of yLabels matches the number of rows (0 to n should be n+1 rows)
if len(filtered_yLabels) != sanitized_data.shape[0]:
raise ValueError(f"The length of filtered_yLabels {len(filtered_yLabels)} must match the number of rows in the data {sanitized_data.shape[0]}.")
# Transpose the data so that hours are on the x-axis and categories are stacked in the y-axis
transposed_data = sanitized_data.T
fig = go.Figure()
# Get unique colors for each category
extended_colors = generate_distinct_colors(len(filtered_xLabels))
for i, category in enumerate(filtered_xLabels):
fig.add_trace(go.Bar(
name=category,
x=filtered_yLabels,
y=transposed_data[i],
marker_color=extended_colors[i % len(extended_colors)] # Cycle through the colors if there are more categories than colors
# Convert each row into a dictionary using supplied keys
result = [dict(zip(["Time"] + modified_keys, row)) for row in transformed_data]
))
fig.update_layout(
barmode='stack',
title='Stacked Bar Graph by Hour',
xaxis=dict(title='Hour'),
yaxis=dict(title='Values'),
legend_title_text='Categories',
margin = {
'l': 50, #left margin
'r': 120, #right margin
't': 50, #top margin
'b': 50 #bottom margin
}
)
# Save the graph to an HTML file
fig.write_html(save_path)
# Write it to a var and return the string
graph_html = fig.to_html(full_html=False,include_plotlyjs='https://cdn.plot.ly/plotly-latest.min.js')
return graph_html
def sanitize_and_filter_data(data2d, exclude_labels, xLabels):
"""
Sanitize data by removing unwanted columns and converting to numeric values.
Parameters:
- data2d (list of lists): A 2D list containing the data.
- exclude_labels (list): Labels to exclude from the data and x-axis.
- xLabels (list): Current labels for the x-axis.
Returns:
- numpy.ndarray: Sanitized 2D numpy array with numeric data.
- list: Filtered x-axis labels.
"""
def to_numeric(value):
try:
if isinstance(value, str):
# Remove any extra characters like '%' and convert to float
return float(value.replace('%', '').strip())
else:
return float(value)
except ValueError:
return 0.0 # Default to 0 if conversion fails
return result
# Create a boolean array for columns to keep (not in exclude_labels)
columns_to_keep = [label not in exclude_labels for label in xLabels]
# Filter out the columns both from the data and xLabels
filtered_data2d = []
for row in data2d:
filtered_row = [to_numeric(value) for keep, value in zip(columns_to_keep, row) if keep]
filtered_data2d.append(filtered_row)
def create_graph(data_dict, graph_type="line", output_file="graph.png",iso_date='1970-01-01'):
"""
Creates a graph from nested list data with hours as x-axis.
filtered_xLabels = [label for label, keep in zip(xLabels, columns_to_keep) if keep]
Args:
data_dict (list): List structure where:
- Each element is a list representing hour data
- First element is the hour (0-23)
- Remaining elements are counts for different types/categories
graph_type (str): Type of graph to create ("line", "bar", "scatter", "pie").
output_file (str): Path to save the image file.
"""
# Check if data is empty
if not data_dict:
raise ValueError("Input data cannot be empty")
return np.array(filtered_data2d), filtered_xLabels
# Extract hours (from the "NewColumn" key)
hours = [row["Time"] for row in data_dict] # First column is the ISO date + row number
def create_heatmap(data2d, xLabels, yLabels, save_path='heatmap.html'):
"""
Creates and saves a heatmap from given 2D numpy array data using Plotly.
Parameters:
- data2d (list of lists or numpy.ndarray): A 2D list or numpy array containing the data.
- xLabels (list): A list of category labels for the x-axis.
- yLabels (list): A list of labels for the y-axis (e.g., hours).
- save_path (str): The path where the plot image will be saved.
"""
excluded_columns = ["Count", "PERCENT", "TOTALS"]
# Remove rows 24 and 25 by slicing the data and labels
data2d = data2d[:24]
yLabels = yLabels[:24] # Ensure yLabels also excludes those rows
# Extract types (keys excluding "NewColumn")
types = [key for key in data_dict[0].keys() if key != "Time"] # Dynamically get keys except "NewColumn"
# Sanitize and filter the data
sanitized_data, filtered_xLabels = sanitize_and_filter_data(data2d, excluded_columns, xLabels)
# Extract counts for each type
counts = {typ: [row[typ] for row in data_dict] for typ in types}
# Ensure that the length of yLabels matches the number of rows (0 to n should be n+1 rows)
if len(yLabels) != sanitized_data.shape[0]:
raise ValueError("The length of yLabels must match the number of rows in the data.")
plt.figure(figsize=(10, 6)) # Create a figure
# Create the heatmap
# Define a custom color scale where 0 is white
color_scale = [
[0, "lightgrey"],
[0.3, "blue"],
[0.6, 'green'],
[0.75,'yellow'],
[1,'red']
]
fig = px.imshow(sanitized_data,
labels=dict(x="Category", y="Hour", color="Count"),
x=filtered_xLabels,
y=yLabels,
color_continuous_scale=color_scale)
fig.update_layout(
title='Heatmap of Counts by Category per Hour',
xaxis_nticks=len(filtered_xLabels),
yaxis_nticks=len(yLabels),
margin=dict(l=0, r=0, t=30, b=0)
# Generate different types of graphs based on the input parameter
if graph_type == "line":
for typ in types:
plt.plot(hours, counts[typ], label=typ, marker='o')
plt.title(f"Line Graph for {iso_date}")
plt.xlabel("Hours")
plt.ylabel("Counts")
)
fig.update_xaxes(showticklabels=True, side='bottom', showline=True, linewidth=2, linecolor='black', mirror=True)
fig.update_yaxes(showticklabels=True, showline=True, linewidth=2, linecolor='black', mirror=True)
fig.write_html(save_path)
# Write it to a var and return the string
graph_html = fig.to_html(full_html=False,include_plotlyjs='https://cdn.plot.ly/plotly-latest.min.js')
return graph_html
def create_line_chart(data2d, xLabels, yLabels, save_path='line_chart.html'):
fig = go.Figure()
excluded_columns = ["Count", "PERCENT", "TOTALS"]
# Remove rows 24 and 25 by slicing the data and labels
data2d = data2d[:24]
yLabels = yLabels[:24] # Ensure yLabels also excludes those rows
elif graph_type == "bar":
bottom = [0] * len(hours)
for typ in types:
plt.bar(hours, counts[typ], bottom=bottom, label=typ)
bottom = [b + y for b, y in zip(bottom, counts[typ])]
plt.title(f"Bar Graph for {iso_date}")
plt.xlabel("Hours")
plt.ylabel("Counts")
# Sanitize and filter the data
sanitized_data, filtered_xLabels = sanitize_and_filter_data(data2d, excluded_columns, xLabels)
elif graph_type == "scatter":
for typ in types:
plt.scatter(hours, counts[typ], label=typ)
plt.title(f"Scatter Plot for {iso_date}")
plt.xlabel("Hours")
plt.ylabel("Counts")
# Ensure that the length of yLabels matches the number of rows (0 to n should be n+1 rows)
if len(yLabels) != sanitized_data.shape[0]:
raise ValueError("The length of yLabels must match the number of rows in the data.")
elif graph_type == "pie":
total_counts = {typ: sum(counts[typ]) for typ in types}
total_sum = sum(total_counts.values())
threshold_percent = 0.01 * total_sum
# Remove rows with all zero elements and the corresponding categories
nonzero_rows_indices = np.where(~np.all(sanitized_data == 0, axis=0))[0] # find rows with non-zero elements
sanitized_data = sanitized_data[:, nonzero_rows_indices]
filtered_xLabels = [filtered_xLabels[i] for i in nonzero_rows_indices] # update filtered_xLabels
# Separate filtered counts and "Other" counts
filtered_counts = {}
other_total = 0
for i, category in enumerate(filtered_xLabels):
fig.add_trace(go.Scatter(
mode='lines+markers',
name=category,
x= [f'{j:02d}:00' for j in range(sanitized_data.shape[0])],
y=sanitized_data[:, i]
))
for typ, value in total_counts.items():
if value > 0 and value >= threshold_percent:
filtered_counts[typ] = value
else:
other_total += value
fig.update_layout(
title='Line Chart of Counts by Category per Hour',
xaxis=dict(title='Hour'),
yaxis=dict(title='Count'),
legend_title_text='Category'
)
fig.write_html(save_path)
# Write it to a var and return the string
graph_html = fig.to_html(full_html=False,include_plotlyjs='https://cdn.plot.ly/plotly-latest.min.js')
return graph_html
# Add "Other" category if there are values below the threshold
if other_total > 0:
filtered_counts["Other"] = other_total
# Prepare data for the pie chart
labels = filtered_counts.keys()
sizes = filtered_counts.values()
# Plot the pie chart
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
plt.title(f"Pie Chart for {iso_date}")
else:
raise ValueError(f"Unsupported graph type: {graph_type}")
if graph_type != "pie":
plt.xticks(hours)
plt.grid(alpha=0.3)
plt.legend()
# Save the graph to a file
plt.tight_layout()
plt.savefig(output_file)
plt.close()
# def convert_to_numeric(data):
# """
# Converts all values in a nested list or dictionary to numeric types (int or float).
# """
# for i in range(len(data)):
# for j in range(1, len(data[i])): # Skip the first column (hour)
# try:
# data[i][j] = float(data[i][j]) # Convert to float
# except ValueError:
# raise ValueError(f"Non-numeric value found: {data[i][j]}")
# return data
def save_summaries_to_db(cursor, conn, date_str, hour, parsed_data):
# Convert parsed_data to JSON string
@@ -529,6 +430,8 @@ def read_in_relevant_log_file(file_path,analysis_date=yesterday):
timestamp = timestamp.replace(year=yesterday_year)
except (ValueError, TypeError) as e:
print(f"Error {e} line {line_count} on timestamp extract {timestamp_str}:{entry[1]}")
ignore_record_count += 1
continue
#print(f"Stamps: {timestamp.date()} {analysis_date.date()}")
if timestamp.date() == analysis_date.date():
log_entries.append((timestamp, entry[1]))
@@ -571,7 +474,7 @@ def parse_data(data):
# for part in fields:
# print(f"{i}: {part}")
# i = i +1
# quit()
# (quit)()
# and mapping:
try:
return_dict = {
@@ -861,7 +764,7 @@ def read_html_from_file(filepath):
# Read in CSS
with open(css_path, 'r', encoding='utf-8') as file:
css_contents = file.read()
html_contents = insert_string_after(html_contents,"\n"+css_contents,"<!--css here-->")
html_contents = insert_string_after(html_contents,"\n<style>"+css_contents+"</style>","<!--css here-->")
return html_contents
def read_text_from_file(filepath):
@@ -1643,7 +1546,7 @@ if __name__ == "__main__":
connection_type_counts[connection_type] += 1
#print(f"Count:{connection_type_counts[connection_type]}")
continue
#Compute next and previous dates
day_format = "%Y-%m-%d"
@@ -1658,10 +1561,27 @@ if __name__ == "__main__":
previous_date_str = previous_date.strftime(day_format)
# Create graphs of data
yLabels = [f'{i:02d}:00' for i in range(len(columnCounts_2d))]
stacked_Bar_html = create_stacked_bar_graph(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'stacked_bar_'+analysis_date+'.html')
heatmap_html = create_heatmap(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'heatmap_'+analysis_date+'.html')
line_graph_html = create_line_chart(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'line_graph_'+analysis_date+'.html')
# yLabels = [f'{i:02d}:00' for i in range(len(columnCounts_2d))]
# stacked_Bar_html = create_stacked_bar_graph(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'stacked_bar_'+analysis_date+'.html')
# heatmap_html = create_heatmap(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'heatmap_'+analysis_date+'.html')
# line_graph_html = create_line_chart(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'line_graph_'+analysis_date+'.html')
columnCounts_2d_dict = transform_to_dict(columnCounts_2d,columnHeaders,analysis_date)
#Export as json for testing
# with open("/opt/mailstats/html/colCounts_2d.json", "w") as json_file:
# json.dump(columnCounts_2d, json_file)
# with open("/opt/mailstats/html/colCounts_2d-dict", "w") as json_file:
# json.dump(columnCounts_2d_dict, json_file)
# with open("/opt/mailstats/html/keys.json", "w") as json_file:
# json.dump(columnHeaders, json_file)
if enable_graphs:
create_graph(columnCounts_2d_dict, "line", html_page_dir+"line_graph_"+analysis_date+".png",analysis_date)
create_graph(columnCounts_2d_dict, "bar", html_page_dir+"bar_graph_"+analysis_date+".png",analysis_date)
create_graph(columnCounts_2d_dict, "scatter", html_page_dir+"scatter_graph_"+analysis_date+".png",analysis_date)
create_graph(columnCounts_2d_dict, "pie", html_page_dir+"pie_chart_"+analysis_date+".png",analysis_date)
#Now apply the results to the chameleon template - main table
# Path to the template file
@@ -1682,13 +1602,11 @@ if __name__ == "__main__":
reporting_date=analysis_date, title=html_title,
version=version_string,
nolinks=nolinks,
stacked_bar_graph=stacked_Bar_html,
heatmap=heatmap_html,
line_graph=line_graph_html,
PreviousDate=previous_date_str,
NextDate=next_date_str,
DomainName=DomainName,
SystemName=SystemName
SystemName=SystemName,
enable_graphs=enable_graphs
)
except Exception as e:
print(f"Chameleon template Exception {e}")
@@ -1785,7 +1703,7 @@ if __name__ == "__main__":
filepath = html_page_dir+"mailstats_for_"+analysis_date+".html"
html_content = read_html_from_file(filepath)
# Replace the Navigation by a "See in browser" prompt
replace_str = f"<div class='divseeinbrowser' style='text-align:center;'><a class='seeinbrowser' href='http://{SystemName}.{DomainName}/mailstats/mailstats_for_{analysis_date}.html'>See in browser</a></div>"
replace_str = f"<div class='divseeinbrowser'><a class='seeinbrowser' href='http://{SystemName}.{DomainName}/mailstats/mailstats_for_{analysis_date}.html'>See in browser</a></div>"
html_content = replace_between(html_content, "<div class='linksattop'>", ">Next</a></div>", replace_str)
if not noemailfile:
# Write out the email html to a web page