From a9be56deae941913c4649caef09cd2c78808f019 Mon Sep 17 00:00:00 2001
From: Brian Read <brianr@koozali.org>
Date: Sat, 13 Jul 2024 11:12:18 +0100
Subject: [PATCH] Refine the stacked bar graph

---
 root/usr/bin/mailstats.py | 134 ++++++++++++++++++++++++--------------
 1 file changed, 84 insertions(+), 50 deletions(-)

diff --git a/root/usr/bin/mailstats.py b/root/usr/bin/mailstats.py
index d0d2e63..c488b22 100644
--- a/root/usr/bin/mailstats.py
+++ b/root/usr/bin/mailstats.py
@@ -82,6 +82,7 @@ import mysql.connector
 import numpy as np
 import plotly.graph_objects as go
 import plotly.express as px
+import colorsys
 
 Mailstats_version = '1.2'
 build_date_time = "2024-06-18 12:03:40OURCE" 
@@ -125,15 +126,21 @@ ColPercent = 25
 import mysql.connector
 import json
 
-def sanitize_data(data2d):
+def sanitize_and_filter_data_for_stacked_bar(data2d, xLabels, yLabels, exclude_columns_labels, exclude_rows_labels):
     """
-    Convert data to numeric values, stripping out non-numeric characters.
+    Sanitize data by removing unwanted columns and rows, and converting to numeric values.
     
     Parameters:
     - data2d (list of lists): A 2D list containing the data.
+    - xLabels (list): Current labels for the x-axis.
+    - yLabels (list): Current labels for the y-axis.
+    - exclude_columns_labels (list): Labels of columns to exclude from the data and x-axis.
+    - exclude_rows_labels (list): Labels of rows to exclude from the y-axis.
     
     Returns:
     - numpy.ndarray: Sanitized 2D numpy array with numeric data.
+    - list: Filtered x-axis labels.
+    - list: Filtered y-axis labels.
     """
     def to_numeric(value):
         try:
@@ -145,67 +152,92 @@ def sanitize_data(data2d):
         except ValueError:
             return 0.0  # Default to 0 if conversion fails
 
-    sanitized_data = []
-    for row in data2d:
-        sanitized_row = [to_numeric(value) for value in row]
-        sanitized_data.append(sanitized_row)
+    # Filter out columns based on their labels
+    exclude_columns_indices = [xLabels.index(label) for label in exclude_columns_labels if label in xLabels]
     
-    return np.array(sanitized_data)
+    filtered_data2d = [
+        [to_numeric(value) for idx, value in enumerate(row) if idx not in exclude_columns_indices]
+        for row in data2d
+    ]
+    
+    filtered_xLabels = [label for idx, label in enumerate(xLabels) if idx not in exclude_columns_indices]
+    
+    # Filter out rows based on their labels
+    filtered_data2d = [row for label, row in zip(yLabels, filtered_data2d) if label not in exclude_rows_labels]
+    filtered_yLabels = [label for label in yLabels if label not in exclude_rows_labels]
+    
+    # Convert filtered data to numpy array
+    return np.array(filtered_data2d), filtered_xLabels, filtered_yLabels
+      
+def generate_distinct_colors(num_colors):
+    """Generate distinct colors using HSV color space."""
+    colors = []
+    for i in range(num_colors):
+        hue = i / num_colors
+        saturation = 0.7
+        value = 0.9
+        r, g, b = colorsys.hsv_to_rgb(hue, saturation, value)
+        colors.append(f'rgb({int(r * 255)},{int(g * 255)},{int(b * 255)})')
+    return colors
 
-def create_stacked_bar_graph(data2d, xLabels, save_path='stacked_bar_graph.html'):
+def create_stacked_bar_graph(data2d, xLabels, yLabels, save_path='stacked_bar_graph.html'):
     """
     Creates and saves a stacked bar graph from given 2D numpy array data using Plotly.
-
+    
     Parameters:
     - data2d (list of lists or numpy.ndarray): A 2D list or numpy array containing the data.
     - xLabels (list): A list of category labels for the x-axis.
+    - yLabels (list): A list of labels for the y-axis (e.g., hours).
     - save_path (str): The path where the plot image will be saved.
     """
-    # Identify columns to be removed based on their headers
-    excluded_columns = ["Count", "PERCENT"]
-
-    # Create a boolean array for columns to keep (not in excluded_columns)
-    columns_to_keep = [label not in excluded_columns for label in xLabels]
-
-    # Filter out the columns both from the data and xLabels
-    filtered_data2d = []
-    for row in data2d:
-        filtered_row = [value for keep, value in zip(columns_to_keep, row) if keep]
-        filtered_data2d.append(filtered_row)
-
-    filtered_xLabels = [label for label, keep in zip(xLabels, columns_to_keep) if keep]
-
-    # Sanitize data and convert it to a numpy array
-    data = sanitize_data(filtered_data2d)
-
-    # Find columns that are not fully zero
-    non_zero_columns = np.any(data != 0, axis=0)
-
-    # Filter out fully zero columns from both the data and x_labels
-    filtered_data = data[:, non_zero_columns]
-    filtered_x_labels = np.array(filtered_xLabels)[non_zero_columns]
+    # Identify columns to be removed based on their headers (label names) and indices (hours 24 and 25)
+    exclude_columns_labels = ["Count", "PERCENT","TOTALS"]
+    exclude_rows_labels = ["24:00", "25:00"]
 
+    # Ensure input yLabels correspond to the data
+    if len(yLabels) != len(data2d):
+        raise ValueError(f"The length of yLabels {len(yLabels)} must match the number of rows in the data {len(data2d)}.")
+    
+    # Sanitize and filter the data
+    sanitized_data, filtered_xLabels, filtered_yLabels = sanitize_and_filter_data_for_stacked_bar(data2d, xLabels, yLabels, exclude_columns_labels, exclude_rows_labels)
+    
+    # Ensure that the length of yLabels matches the number of rows (0 to n should be n+1 rows)
+    if len(filtered_yLabels) != sanitized_data.shape[0]:
+        raise ValueError(f"The length of filtered_yLabels {len(filtered_yLabels)} must match the number of rows in the data {sanitized_data.shape[0]}.")
+    
+    # Transpose the data so that hours are on the x-axis and categories are stacked in the y-axis
+    transposed_data = sanitized_data.T
+    
     fig = go.Figure()
+    
+    # Get unique colors for each category
+    extended_colors = generate_distinct_colors(len(filtered_xLabels))
+    
+    #print(len(filtered_xLabels))
+    #print(extended_colors)
+    #quit()
+    
+    for i, category in enumerate(filtered_xLabels):
+        fig.add_trace(go.Bar(
+            name=category,
+            x=filtered_yLabels,
+            y=transposed_data[i],
+            marker_color=extended_colors[i % len(extended_colors)]  # Cycle through the colors if there are more categories than colors
 
-    for i in range(filtered_data.shape[0]):
-        if i <= 23:  # Ensure to annotate rows with proper names (e.g., Hours)
-            fig.add_trace(go.Bar(
-                name=f'Hour {i}',
-                x=filtered_x_labels,
-                y=filtered_data[i]
-            ))
-
+        ))
+    
     fig.update_layout(
         barmode='stack',
-        title='Stacked Bar Graph Example',
-        xaxis=dict(title='Category'),
+        title='Stacked Bar Graph by Hour',
+        xaxis=dict(title='Hour'),
         yaxis=dict(title='Values'),
-        legend_title_text='Rows'
+        legend_title_text='Categories'
     )
-
+    
     # Save the graph to an HTML file
     fig.write_html(save_path)
-  
+
+ 
 def sanitize_and_filter_data(data2d, exclude_labels, xLabels):
     """
     Sanitize data by removing unwanted columns and converting to numeric values.
@@ -1419,11 +1451,13 @@ if __name__ == "__main__":
 	else:
 		text_file_path = ""
 	
-	# Create graph of data
-	create_stacked_bar_graph(columnCounts_2d,columnHeaders)
-	yLabels = [f'Hour {i}' for i in range(26)]
-	create_heatmap(columnCounts_2d,columnHeaders,yLabels)
-	create_line_chart(columnCounts_2d,columnHeaders,yLabels)
+	# Create graphs of data
+	#yLabels = [f'Hour {i}' for i in range(len(columnCounts_2d))]
+	yLabels = [f'{i:02d}:00' for i in range(len(columnCounts_2d))]
+	create_stacked_bar_graph(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'stacked_bar_'+analysis_date+'.html')
+	#yLabels = [f'Hour {i}' for i in range(26)]
+	create_heatmap(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'heatmap_'+analysis_date+'.html')
+	create_line_chart(columnCounts_2d,columnHeaders,yLabels,html_page_dir+'line_graph_'+analysis_date+'.html')
 	
 	html_content = None
 	text_content = None