* Fri Sep 12 2025 Brian Read <brianr@koozali.org> 11.1-7.sme

- Truncate Geoip table and add other category [SME: 13121]
- Cope with blank data in action1 [SME: 13121]
This commit is contained in:
2025-09-12 11:26:35 +01:00
parent 1b757b1336
commit 55cb7a6f05
3 changed files with 162 additions and 30 deletions

View File

@@ -1,7 +1,7 @@
<div class="${classname}">
<h2>${title}</h2>
<tal:block condition="threshold != 0">
<span class='greyed-out'>Display threshold set to ${threshold}%</span>
<span class='greyed-out'>${threshold}</span>
</tal:block>
<tal:block condition="threshold == 0">
<br>

View File

@@ -834,7 +834,123 @@ def split_timestamp_and_data(log_entry: str) -> list:
rest_of_line = log_entry # If no match, return the whole line
return [timestamp, rest_of_line]
def render_sub_table(table_title, table_headers, found_values, get_character=None, suppress_threshold=False):
MIN_COUNT = 3 # Hide entries with count < 5
MAX_TOTAL_ROWS = 10 # Total rows INCLUDING "Other"
OTHER_TARGET_FRAC = 0.01 # Strictly less than 1%
OTHER_LABEL = 'Other'
SHOW_ALL = True # Set True to show all entries >= MIN_COUNT, no "Other" row
def select_rows_just_below(items, min_count=MIN_COUNT,
max_total_rows=MAX_TOTAL_ROWS,
other_target_frac=OTHER_TARGET_FRAC,
other_label=OTHER_LABEL, show_all=SHOW_ALL):
"""
Build rows with percentages of total (0..100).
- If show_all is True: show all entries with count >= min_count, no 'Other', ignore caps and 1% target.
- If show_all is False: pick as many top entries (count >= min_count) as needed so that
'Other' is strictly < other_target_frac (if possible), always include 'Other(n)',
and respect max_total_rows (including 'Other').
Output rows preserve original extra fields for selected entries.
The percent is written to field index 2 (replacing it if present, or appended if not).
"""
# Normalize items to a list while preserving original rows
def to_rows(seq):
if isinstance(seq, dict):
# Convert dict to rows without extras
return [(k, v) for k, v in seq.items()]
rows_ = []
for it in seq:
if isinstance(it, (tuple, list)) and len(it) >= 2:
rows_.append(tuple(it)) # store as tuple
else:
raise TypeError("Each item must be a (key, count, ...) tuple/list or a dict mapping key->count.")
return rows_
def set_percent(row, pct_value):
# Return a tuple like the input row but with percent inserted at index 2 (0..100 number, rounded)
pct_value = round(pct_value, 2)
r = list(row)
if len(r) >= 3:
r[2] = pct_value
else:
r.append(pct_value)
return tuple(r)
rows_in = to_rows(items)
total = sum(r[1] for r in rows_in)
if total == 0:
return ([(f"{other_label}(0)", 0, 0.0)] if not show_all else []), 0, "No data."
# Filter by min_count and sort by count desc
eligible = [r for r in rows_in if r[1] >= min_count]
eligible.sort(key=lambda r: r[1], reverse=True)
if show_all:
# Show all eligible rows, no 'Other', ignore caps/target; compute percent (0..100) per row
rows_out = [set_percent(r, (r[1] / total) * 100.0) for r in eligible]
return rows_out, total, None
#logging.info(f"{show_all}")
# Leave room for the "Other" row
max_top_cap = max(0, max_total_rows - 1)
# Find smallest number of top rows so that Other is strictly < target
cum = 0
needed_top = None
for i, r in enumerate(eligible, start=1):
cum += r[1]
other_frac = (total - cum) / total
if other_frac < other_target_frac:
needed_top = i
break
notes = []
if needed_top is None:
# Even after including all eligible, Other >= target
final_top = min(len(eligible), max_top_cap)
#if final_top < len(eligible):
#notes.append(f"Row cap prevents adding enough rows to push Other below {other_target_frac*100:.2f}%.")
#else:
#notes.append(f"Cannot push Other below {other_target_frac*100:.2f}% with MIN_COUNT={min_count}.")
else:
# Apply cap
if needed_top > max_top_cap:
final_top = max_top_cap
#notes.append(
# f"Row cap prevents reaching Other < {other_target_frac*100:.2f}%; "
# f"need {needed_top} rows but only {max_top_cap} allowed before Other."
#)
else:
final_top = needed_top
top = eligible[:final_top]
shown_sum = sum(r[1] for r in top)
other_count = total - shown_sum
other_percent = (other_count / total) * 100.0
# Count how many rows are aggregated into Other: everything not in 'top'
other_rows_count = len(rows_in) - len(top)
# Build output: preserve extras; write percent at index 2 as a numeric percent 0..100
rows_out = [set_percent(r, (r[1] / total) * 100.0) for r in top]
# Build the Other row with percent; no extra fields beyond the percent
rows_out.append((f"{other_label}({other_rows_count})", other_count, round(other_percent, 2)))
#if other_percent >= other_target_frac * 100.0:
#notes.append(
# f"Other is {other_percent:.2f}%, which is not strictly below {other_target_frac*100:.2f}% "
# f"(MIN_COUNT={min_count}, MAX_TOTAL_ROWS={max_total_rows})."
#)
return rows_out, total, " ".join(notes) if notes else None
def render_sub_table(table_title, table_headers, found_values, get_character=None, show_all=True):
#Check if any data provided
if len(found_values) != 0:
# Get the total
@@ -878,30 +994,36 @@ def render_sub_table(table_title, table_headers, found_values, get_character=Non
raise ValueError("found_values must be either a list of numbers or a list of dictionaries.")
else:
raise TypeError("found_values must be a dictionary or a list.")
# # Dynamic threshold calculation
# if not suppress_threshold:
# dynamic_threshold = max(1, 100 / (original_total**0.65)) if original_total > 0 else 0
# dynamic_threshold = round(dynamic_threshold,1)
# logging.debug(f"Threshold for {table_title} set to {dynamic_threshold}% ")
# else:
# dynamic_threshold=0
# absolute_floor = 10 # Minimum absolute value threshold
# # Filter results using early termination
# filtered_sub_result = []
# for row in sub_result:
# value = row[1]
# percentage = (value / original_total * 100) if original_total else 0
# # Exit condition: below both thresholds
# if percentage < dynamic_threshold or value < absolute_floor:
# break
# filtered_sub_result.append(row)
# sub_result = filtered_sub_result # Keep only significant rows
sub_result.sort(key=lambda x: float(x[1]), reverse=True) # Sort by percentage in descending order
# Dynamic threshold calculation
if not suppress_threshold:
dynamic_threshold = max(1, 100 / (original_total**0.5)) if original_total > 0 else 0
dynamic_threshold = round(dynamic_threshold,1)
logging.debug(f"Threshold for {table_title} set to {dynamic_threshold}% ")
if not show_all:
sub_result, total, note = select_rows_just_below(sub_result,show_all=False)
else:
dynamic_threshold=0
absolute_floor = 50 # Minimum absolute value threshold
# Filter results using early termination
filtered_sub_result = []
for row in sub_result:
value = row[1]
percentage = (value / original_total * 100) if original_total else 0
# Exit condition: below both thresholds
if percentage < dynamic_threshold and value < absolute_floor:
break
filtered_sub_result.append(row)
sub_result = filtered_sub_result # Keep only significant rows
note = "" #no threshold applied
total = original_total
sub_template_path = template_dir+'mailstats-sub-table.html.pt'
# Load the template
@@ -914,7 +1036,7 @@ def render_sub_table(table_title, table_headers, found_values, get_character=Non
try:
rendered_html = template(array_2d=sub_result, column_headers=table_headers,
title=table_title, classname=get_first_word(table_title),
threshold=dynamic_threshold)
threshold=note)
except Exception as e:
raise ValueError(f"{table_title}: A chameleon controller render error occurred: {e}")
except Exception as e:
@@ -1672,6 +1794,9 @@ if __name__ == "__main__":
if match:
rejReason = match.group(1)
found_qpcodes[parsed_data['error-plugin']+"-"+rejReason] += 1
else:
if parsed_data['action1'] == "":
logging.warning(f"Found blank action1 {timestamp} {parsed_data['id']} {parsed_data['ip']} {parsed_data['sendurl']}")
else:
found_qpcodes[parsed_data['action1']] += 1
@@ -1709,6 +1834,8 @@ if __name__ == "__main__":
else:
email = None
if email:
if '@' in email:
email = email.lower()
record = next((item for item in recipients_found if item['email'] == email), None)
if not record:
# If email is not in the array, we add it
@@ -1821,6 +1948,7 @@ if __name__ == "__main__":
try:
match = geoip_pattern.match(data['MESSAGE'])
if match:
logging.debug(f"Found bad country message {data['MESSAGE']} {match.group(1)} ")
j += 1
country = match.group(1)
found_countries[country] += 1
@@ -1928,7 +2056,7 @@ if __name__ == "__main__":
#virus codes
virus_headers = ["Virus",'Count','Percent']
virus_title = 'Viruses found'
virus_rendered_html = render_sub_table(virus_title,virus_headers,found_viruses,suppress_threshold=True)
virus_rendered_html = render_sub_table(virus_title,virus_headers,found_viruses)
# Add it to the total
total_html = insert_string_after(total_html,virus_rendered_html, "<!---Add in sub tables here -->")
@@ -1944,7 +2072,7 @@ if __name__ == "__main__":
junk_mail_count_headers = ['Username','Count', 'Percent']
junk_mail_counts = scan_mail_users()
junk_mail_count_title = 'Junk mail counts'
junk_rendered_html = render_sub_table(junk_mail_count_title,junk_mail_count_headers,junk_mail_counts,suppress_threshold=True)
junk_rendered_html = render_sub_table(junk_mail_count_title,junk_mail_count_headers,junk_mail_counts)
# Add it to the total
total_html = insert_string_after(total_html,junk_rendered_html, "<!---Add in sub tables here -->")
@@ -1952,21 +2080,21 @@ if __name__ == "__main__":
#Recipient counts
recipient_count_headers = ["Email",'Queued','Rejected','Spam tagged','Accepted Percent']
recipient_count_title = 'Incoming email recipients'
recipient_rendered_html = render_sub_table(recipient_count_title,recipient_count_headers,recipients_found,suppress_threshold=True)
recipient_rendered_html = render_sub_table(recipient_count_title,recipient_count_headers,recipients_found)
# Add it to the total
total_html = insert_string_after(total_html,recipient_rendered_html, "<!---Add in sub tables here -->")
#Geoip Country codes
geoip_headers = ['Country','Count','Percent','Rejected?']
geoip_title = 'Geoip results'
geoip_rendered_html = render_sub_table(geoip_title,geoip_headers,found_countries,get_character_in_reject_list)
geoip_rendered_html = render_sub_table(geoip_title,geoip_headers,found_countries,get_character_in_reject_list,show_all=False)
# Add it to the total
total_html = insert_string_after(total_html,geoip_rendered_html, "<!---Add in sub tables here -->")
#Blacklist counts
blacklist_headers = ['URL','Count','Percent']
blacklist_title = 'Blacklist used'
blacklist_rendered_html = render_sub_table(blacklist_title,blacklist_headers,blacklist_found,suppress_threshold=True)
blacklist_rendered_html = render_sub_table(blacklist_title,blacklist_headers,blacklist_found)
# Add it to the total
total_html = insert_string_after(total_html,blacklist_rendered_html, "<!---Add in sub tables here -->")

View File

@@ -6,7 +6,7 @@ Summary: Daily mail statistics for SME Server
%define name smeserver-mailstats
Name: %{name}
%define version 11.1
%define release 6
%define release 7
Version: %{version}
Release: %{release}%{?dist}
License: GPL
@@ -90,6 +90,10 @@ usermod -aG systemd-journal www
/sbin/ldconfig
%changelog
* Fri Sep 12 2025 Brian Read <brianr@koozali.org> 11.1-7.sme
- Truncate Geoip table and add other category [SME: 13121]
- Cope with blank data in action1 [SME: 13121]
* Thu Sep 04 2025 Brian Read <brianr@koozali.org> 11.1-6.sme
- Add favicon to mailstats table, summary and detailed pages [SME: 13121]
- Bring DB config reading for mailstats itself inline with php summary and detailed logs - using /etc/mailstats/db.php [SME: 13121]