Refactor import and categorise for re-use of data in second scan

This commit is contained in:
Brian Read 2024-06-03 16:15:27 +01:00
parent ad1962753b
commit b2440be6d0

View File

@ -10,7 +10,7 @@
# Todo # Todo
# 1. Make "yesterday" parameterised # 1. Make "yesterday" parameterised
# #
import datetime from datetime import datetime, timedelta
import sys import sys
from chameleon import PageTemplateFile,PageTemplate from chameleon import PageTemplateFile,PageTemplate
import pkg_resources import pkg_resources
@ -25,6 +25,9 @@ Mailstats_version = '1.2'
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
data_file_path = script_dir+'/../../../' data_file_path = script_dir+'/../../../'
now = datetime.now()
yesterday = now - timedelta(days=1)
formatted_yesterday = yesterday.strftime("%Y-%m-%d")
# Column numbering # Column numbering
Hour = 0 Hour = 0
@ -67,64 +70,114 @@ def is_private_ip(ip):
return False return False
def truncate_microseconds(timestamp): def truncate_microseconds(timestamp):
# Split timestamp into main part and microseconds # Split timestamp into main part and microseconds
main_part, microseconds = timestamp.split('.') try:
# Truncate the last three digits of the microseconds main_part, microseconds = timestamp.split('.')
truncated_microseconds = microseconds[:-3] # Truncate the last three digits of the microseconds
# Combine the main part and truncated microseconds truncated_microseconds = microseconds[:-3]
truncated_timestamp = f"{main_part}.{truncated_microseconds}" # Combine the main part and truncated microseconds
# Remove the microseconds completely if they exist truncated_timestamp = f"{main_part}.{truncated_microseconds}"
return truncated_timestamp.split('.')[0] except Exception as e:
print(f"{e} {timestamp}")
raise ValueError
# Remove the microseconds completely if they exist
return truncated_timestamp.split('.')[0]
def filter_yesterdays_entries(log_entries): # def filter_yesterdays_entries(log_entries):
# Determine yesterday's date # # Determine yesterday's date
yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).date() # yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).date()
# # Filter entries for yesterday's date
# yesterday_entries = []
# for timestamp, data in log_entries:
# truncated_timestamp = truncate_microseconds(timestamp)
# entry_date = datetime.datetime.strptime(truncated_timestamp, '%Y-%m-%d %H:%M:%S').date()
# if entry_date == yesterday:
# parsed_data = parse_data(data)
# yesterday_entries.append((truncated_timestamp, parsed_data))
# Filter entries for yesterday's date # return yesterday_entries
yesterday_entries = []
for timestamp, data in log_entries:
truncated_timestamp = truncate_microseconds(timestamp)
entry_date = datetime.datetime.strptime(truncated_timestamp, '%Y-%m-%d %H:%M:%S').date()
if entry_date == yesterday:
parsed_data = parse_data(data)
yesterday_entries.append((truncated_timestamp, parsed_data))
return yesterday_entries def read_in_yesterday_log_file(file_path):
# Read the file and split each line into a list - timestamp and the rest
# Get current date and calculate yesterday's date
log_entries = []
skip_record_count = 0;
with open(file_path, 'r') as file:
for Line in file:
#extract time stamp
try:
entry = split_timestamp_and_data(Line)
# compare with yesterday
timestamp_str = truncate_microseconds(entry[0])
except ValueError as e:
#print(f"ValueError {e} on timestamp create {timestamp_str}:{entry[0]} {entry[1]}")
skip_record_count += 1
continue
# Parse the timestamp string into a datetime object
# Ignoring extra microseconds
try:
timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
except ValueError as e:
print(f"ValueError {e} on timestamp extract {timestamp_str}:{entry[1]}")
if timestamp.date() == yesterday.date():
log_entries.append((timestamp, entry[1]))
return [log_entries,skip_record_count]
def read_and_filter_yesterday_log(file_path): def filter_summary_records(log_entries):
# Read the file and split each line into a dictionary # Return just the summary records
log_entries = [] filtered_log_entries = []
with open(file_path, 'r') as file: skipped_entry_count = 0
for line in file: for line in log_entries:
if '`' in line: #print(line)
parts = line.split(' ') #quit()
if parts: if '`' in line[1]:
# Combine parts to form the complete timestamp filtered_log_entries.append(line)
timestamp = ' '.join(parts[:2]) else:
data = ' '.join(parts[2:]) # The rest of the line after date and time skipped_entry_count += 1
log_entries.append((timestamp, data)) return [filtered_log_entries,skipped_entry_count]
# Filter the entries to keep only those from yesterday def sort_log_entries(log_entries):
filtered_entries = filter_yesterdays_entries(log_entries) # Sort the records, based on the timestamp
sorted_entries = sorted(log_entries, key=lambda x: x[0])
# and return a dictionary
sorted_dict = {entry[0]: entry[1] for entry in sorted_entries}
return sorted_dict
# Sort the filtered log entries based on the truncated timestamp
sorted_entries = sorted(filtered_entries, key=lambda x: datetime.datetime.strptime(x[0], '%Y-%m-%d %H:%M:%S'))
# Create a dictionary # def read_and_filter_yesterday_log(file_path):
sorted_dict = {entry[0]: entry[1] for entry in sorted_entries} # # Read the file and split each line into a dictionary
# log_entries = []
# with open(file_path, 'r') as file:
# for line in file:
# if '`' in line:
# parts = line.split(' ')
# if parts:
# # Combine parts to form the complete timestamp
# timestamp = ' '.join(parts[:2])
# data = ' '.join(parts[2:]) # The rest of the line after date and time
# log_entries.append((timestamp, data))
return sorted_dict # # Filter the entries to keep only those from yesterday
# filtered_entries = filter_yesterdays_entries(log_entries)
# # Sort the filtered log entries based on the truncated timestamp
# sorted_entries = sorted(filtered_entries, key=lambda x: datetime.datetime.strptime(x[0], '%Y-%m-%d %H:%M:%S'))
# # Create a dictionary
# sorted_dict = {entry[0]: entry[1] for entry in sorted_entries}
# return sorted_dict
def parse_data(data): def parse_data(data):
# Split data string into parts and map to named fields. # Split data string into parts and map to named fields.
# Adjust the field names and parsing logic according to your data format. # Adjust the field names and parsing logic according to your data format.
# Split at the backtick - before it fields split at space, after, fields split at tab # Split at the backtick - before it fields split at space, after, fields split at tab
parts = data.split('`') parts = data.split('`')
#print(parts[0],parts[1]) #print(f"{parts[0]}:{parts[1]}")
fields1 = parts[0].strip().split() if len(parts) > 0 else [] fields1 = parts[0].strip().split() if len(parts) > 0 else []
fields2 = parts[1].split('\t') if len(parts) > 1 else [] fields2 = parts[1].split('\t') if len(parts) > 1 else []
# then merge them # then merge them
fields = fields1 + fields2 fields = fields1 + fields2
# if fields[8] != 'queued': # if fields[8] != 'queued':
# i = 0 # i = 0
# print(f"len:{len(fields)}") # print(f"len:{len(fields)}")
@ -132,81 +185,81 @@ def parse_data(data):
# print(f"{i}: {part}") # print(f"{i}: {part}")
# i = i +1 # i = i +1
# quit() # quit()
# and mapping: # and mapping:
try: try:
return_dict = { return_dict = {
'id': fields[0].strip() if len(fields) > 0 else None, 'id': fields[0].strip() if len(fields) > 0 else None,
'action': fields[1].strip() if len(fields) > 1 else None, 'action': fields[1].strip() if len(fields) > 1 else None,
'logterse': fields[2].strip() if len(fields) > 2 else None, 'logterse': fields[2].strip() if len(fields) > 2 else None,
'ip': fields[3].strip() if len(fields) > 3 else None, 'ip': fields[3].strip() if len(fields) > 3 else None,
'sendurl': fields[4].strip() if len(fields) > 4 else None, #1 'sendurl': fields[4].strip() if len(fields) > 4 else None, #1
'sendurl1': fields[5].strip() if len(fields) > 5 else None, #2 'sendurl1': fields[5].strip() if len(fields) > 5 else None, #2
'from-email': fields[6].strip() if len(fields) > 6 else None, #3 'from-email': fields[6].strip() if len(fields) > 6 else None, #3
'error-reason': fields[6].strip() if len(fields) > 6 else None, #3 'error-reason': fields[6].strip() if len(fields) > 6 else None, #3
'to-email': fields[7].strip() if len(fields) > 7 else None, #4 'to-email': fields[7].strip() if len(fields) > 7 else None, #4
'error-plugin': fields[8].strip() if len(fields) > 8 else None, #5 'error-plugin': fields[8].strip() if len(fields) > 8 else None, #5
'action1': fields[8].strip() if len(fields) > 8 else None, #5 'action1': fields[8].strip() if len(fields) > 8 else None, #5
'error-number' : fields[9].strip() if len(fields) > 9 else None, #6 'error-number' : fields[9].strip() if len(fields) > 9 else None, #6
'sender': fields[10].strip() if len(fields) > 10 else None, #7 'sender': fields[10].strip() if len(fields) > 10 else None, #7
'error-msg' :fields[10].strip() if len(fields) > 10 else None, #7 'error-msg' :fields[10].strip() if len(fields) > 10 else None, #7
'spam-status': fields[11].strip() if len(fields) > 11 else None, #8 'spam-status': fields[11].strip() if len(fields) > 11 else None, #8
'error-result': fields[11].strip() if len(fields) > 11 else None,#8 'error-result': fields[11].strip() if len(fields) > 11 else None,#8
# Add more fields as necessary # Add more fields as necessary
} }
except: except:
#print(f"error:len:{len(fields)}") #print(f"error:len:{len(fields)}")
return_dict = {} return_dict = {}
return return_dict return return_dict
def count_entries_by_hour(log_entries): def count_entries_by_hour(log_entries):
hourly_counts = defaultdict(int) hourly_counts = defaultdict(int)
for entry in log_entries: for entry in log_entries:
# Extract hour from the timestamp # Extract hour from the timestamp
timestamp = entry['timestamp'] timestamp = entry['timestamp']
hour = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H') hour = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H')
hourly_counts[hour] += 1 hourly_counts[hour] += 1
return hourly_counts return hourly_counts
def initialize_2d_array(num_hours, column_headers_len,reporting_date): def initialize_2d_array(num_hours, column_headers_len,reporting_date):
num_hours += 1 # Adjust for the zeroth hour num_hours += 1 # Adjust for the zeroth hour
# Initialize the 2D list with zeroes # Initialize the 2D list with zeroes
return [[0] * column_headers_len for _ in range(num_hours)] return [[0] * column_headers_len for _ in range(num_hours)]
def search_2d_list(target, data): def search_2d_list(target, data):
""" """
Search for a target string in a 2D list of variable-length lists of strings. Search for a target string in a 2D list of variable-length lists of strings.
:param target: str, the string to search for :param target: str, the string to search for
:param data: list of lists of str, the 2D list to search :param data: list of lists of str, the 2D list to search
:return: int, the row number where the target string is found, or -1 if not found :return: int, the row number where the target string is found, or -1 if not found
""" """
for row_idx, row in enumerate(data): for row_idx, row in enumerate(data):
if target in row: if target in row:
return row_idx return row_idx
return -1 # Return -1 if not found return -1 # Return -1 if not found
def check_html2text_installed(): def check_html2text_installed():
try: try:
# Check if html2text is installed by running 'which html2text' # Check if html2text is installed by running 'which html2text'
result = subprocess.run( result = subprocess.run(
['which', 'html2text'], ['which', 'html2text'],
check=True, check=True,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE stderr=subprocess.PIPE
) )
# If the command finds html2text, it will output the path # If the command finds html2text, it will output the path
html2text_path = result.stdout.decode('utf-8').strip() html2text_path = result.stdout.decode('utf-8').strip()
if not html2text_path: if not html2text_path:
raise FileNotFoundError raise FileNotFoundError
print(f"html2text is installed at: {html2text_path}") print(f"html2text is installed at: {html2text_path}")
return True return True
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
print("html2text is not installed. Please install it using your package manager.", file=sys.stderr) print("html2text is not installed. Please install it using your package manager.", file=sys.stderr)
return False return False
def html_to_text(input_file, output_file): def html_to_text(input_file, output_file):
if not check_html2text_installed(): if not check_html2text_installed():
@ -230,55 +283,76 @@ def html_to_text(input_file, output_file):
sys.exit(e.returncode) sys.exit(e.returncode)
def get_html2text_version(): def get_html2text_version():
try: try:
result = subprocess.run(['html2text', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) result = subprocess.run(['html2text', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
# Ensure the result is treated as a string in Python 3.6+ # Ensure the result is treated as a string in Python 3.6+
return result.stdout.strip() return result.stdout.strip()
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print(f"Error occurred while checking html2text version: {e}", file=sys.stderr) print(f"Error occurred while checking html2text version: {e}", file=sys.stderr)
return None return None
def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=50, fill='', print_end="\r"): def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=50, fill='', print_end="\r"):
""" """
Call in a loop to create a terminal progress bar Call in a loop to create a terminal progress bar
@params: @params:
iteration - Required : current iteration (Int) iteration - Required : current iteration (Int)
total - Required : total iterations (Int) total - Required : total iterations (Int)
prefix - Optional : prefix string (Str) prefix - Optional : prefix string (Str)
suffix - Optional : suffix string (Str) suffix - Optional : suffix string (Str)
decimals - Optional : positive number of decimals in percent complete (Int) decimals - Optional : positive number of decimals in percent complete (Int)
length - Optional : character length of bar (Int) length - Optional : character length of bar (Int)
fill - Optional : bar fill character (Str) fill - Optional : bar fill character (Str)
print_end - Optional : end character (e.g. "\r", "\r\n") (Str) print_end - Optional : end character (e.g. "\r", "\r\n") (Str)
""" """
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) if total == 0:
filled_length = int(length * iteration // total) raise ValueError("Progress total is zero")
bar = fill * filled_length + '-' * (length - filled_length) percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=print_end) filled_length = int(length * iteration // total)
# Print New Line on Complete bar = fill * filled_length + '-' * (length - filled_length)
if iteration == total: print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=print_end)
print() # Print New Line on Complete
if iteration == total:
print()
def insert_string_after(original:str, to_insert:str, after:str) -> str: def insert_string_after(original:str, to_insert:str, after:str) -> str:
""" """
Insert to_insert into original after the first occurrence of after. Insert to_insert into original after the first occurrence of after.
:param original: The original string. :param original: The original string.
:param to_insert: The string to be inserted. :param to_insert: The string to be inserted.
:param after: The set of characters after which the string will be inserted. :param after: The set of characters after which the string will be inserted.
:return: The new string with to_insert inserted after after. :return: The new string with to_insert inserted after after.
""" """
position = original.find(after) position = original.find(after)
print(position) #print(position)
if position == -1: if position == -1:
# 'after' string is not found in 'original' # 'after' string is not found in 'original'
return original return original
print(f"{len(after)}") #print(f"{len(after)}")
# Position of the insertion point # Position of the insertion point
insert_pos = position + len(after) insert_pos = position + len(after)
return original[:insert_pos] + to_insert + original[insert_pos:] return original[:insert_pos] + to_insert + original[insert_pos:]
def split_timestamp_and_data(log_entry: str) -> list:
"""
Split a log entry into timestamp and the rest of the data.
:param log_entry: The log entry as a string.
:return: A list with two entries: [timestamp, rest_of_data].
"""
# The timestamp is always the first part, up to the first space after the milliseconds
parts = log_entry.split(' ', 2)
if len(parts) < 3:
raise ValueError(f"The log entry format is incorrect {parts}")
timestamp = ' '.join(parts[:2])
rest_of_data = parts[2]
#print(f"{timestamp} {rest_of_data}")
return [timestamp, rest_of_data]
if __name__ == "__main__": if __name__ == "__main__":
try: try:
@ -287,10 +361,8 @@ if __name__ == "__main__":
chameleon_version = "Version information not available" chameleon_version = "Version information not available"
python_version = sys.version python_version = sys.version
python_version = python_version[:8] python_version = python_version[:8]
current_datetime = datetime.datetime.now() current_datetime = datetime.now()
formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M") formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M")
yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).date()
formatted_yesterday = yesterday.strftime("%Y-%m-%d")
#From SMEServer DB #From SMEServer DB
DomainName = 'bjsystems.co.uk' # $cdb->get('DomainName')->value; DomainName = 'bjsystems.co.uk' # $cdb->get('DomainName')->value;
@ -306,14 +378,25 @@ if __name__ == "__main__":
MAILMAN = "bounces"; #sender when mailman sending when orig is localhost MAILMAN = "bounces"; #sender when mailman sending when orig is localhost
DMARCDomain="dmarc"; #Pattern to recognised DMARC sent emails (this not very reliable, as the email address could be anything) DMARCDomain="dmarc"; #Pattern to recognised DMARC sent emails (this not very reliable, as the email address could be anything)
DMARCOkPattern="dmarc: pass"; #Pattern to use to detect DMARC approval DMARCOkPattern="dmarc: pass"; #Pattern to use to detect DMARC approval
hello_string = "Mailstats:"+Mailstats_version+' for '+DomainName+" at "+formatted_datetime hello_string = "Mailstats:"+Mailstats_version+' for '+DomainName+" at "+formatted_datetime+" for "+formatted_yesterday
print(hello_string) print(hello_string)
version_string = "Chameleon:"+chameleon_version+" Python:"+python_version version_string = "Chameleon:"+chameleon_version+" Python:"+python_version
print(version_string) print(version_string)
num_hours = 25 # Represents hours from 0 to 23 - adds extra one for column totals and another for percentages num_hours = 25 # Represents hours from 0 to 23 - adds extra one for column totals and another for percentages
data_file = data_file_path+'current.log' data_file = data_file_path+'current.log'
sorted_log_dict = read_and_filter_yesterday_log(data_file) log_entries,skip_count = read_in_yesterday_log_file(data_file)
if len(log_entries) == 0:
print(f"No records found in {data_file}")
quit()
else:
print(f"Found {len(log_entries)} entries in log for for {formatted_yesterday} skipped {skip_count}")
summary_log_entries,skip_count = filter_summary_records(log_entries)
print(f"Found {len(summary_log_entries)} summary entries and skipped {skip_count} entries")
sorted_log_dict = sort_log_entries(summary_log_entries)
print(f"Sorted {len(sorted_log_dict)} entries")
columnHeaders = ['Count','WebMail','Local','MailMan','Relay','DMARC','Virus','RBL/DNS','Geoip.','Non.Conf.','Karma','Rej.Load','Del.Spam','Qued.Spam?',' Ham','TOTALS','PERCENT'] columnHeaders = ['Count','WebMail','Local','MailMan','Relay','DMARC','Virus','RBL/DNS','Geoip.','Non.Conf.','Karma','Rej.Load','Del.Spam','Qued.Spam?',' Ham','TOTALS','PERCENT']
# dict for each colum identifying plugin that increments count # dict for each colum identifying plugin that increments count
columnPlugin = [''] * 17 columnPlugin = [''] * 17
@ -326,11 +409,11 @@ if __name__ == "__main__":
columnPlugin[RBLDNS] = ['rhsbl', 'dnsbl','uribl'] columnPlugin[RBLDNS] = ['rhsbl', 'dnsbl','uribl']
columnPlugin[Geoip] = ['check_badcountries'] columnPlugin[Geoip] = ['check_badcountries']
columnPlugin[NonConf] = ['check_earlytalker','check_relay','check_norelay', 'require_resolvable_fromhost' columnPlugin[NonConf] = ['check_earlytalker','check_relay','check_norelay', 'require_resolvable_fromhost'
,'check_basicheaders','check_badmailfrom','check_badrcptto_patterns' ,'check_basicheaders','check_badmailfrom','check_badrcptto_patterns'
,'check_badrcptto','check_spamhelo','check_goodrcptto extn','rcpt_ok' ,'check_badrcptto','check_spamhelo','check_goodrcptto extn','rcpt_ok'
,'check_goodrcptto','check_smtp_forward','count_unrecognized_commands','tls','auth::auth_cvm_unix_local' ,'check_goodrcptto','check_smtp_forward','count_unrecognized_commands','tls','auth::auth_cvm_unix_local'
,'auth::auth_imap', 'earlytalker','resolvable_fromhost','relay','headers','mailfrom','badrcptto','helo' ,'auth::auth_imap', 'earlytalker','resolvable_fromhost','relay','headers','mailfrom','badrcptto','helo'
,'check_smtp_forward','sender_permitted_from'] ,'check_smtp_forward','sender_permitted_from']
columnPlugin[RejLoad] = ['loadcheck'] columnPlugin[RejLoad] = ['loadcheck']
columnPlugin[DelSpam] = [] columnPlugin[DelSpam] = []
columnPlugin[QuedSpam] = [] columnPlugin[QuedSpam] = []
@ -353,13 +436,17 @@ if __name__ == "__main__":
print_progress_bar(0, sorted_len, prefix='Progress:', suffix='Complete', length=50) print_progress_bar(0, sorted_len, prefix='Progress:', suffix='Complete', length=50)
for timestamp, data in sorted_log_dict.items(): for timestamp, data in sorted_log_dict.items():
i += 1 i += 1
print_progress_bar(i, sorted_len, prefix='Progress:', suffix='Complete', length=50) print_progress_bar(i, sorted_len, prefix='Scanning for main table:', suffix='Complete', length=50)
#print(f"{i*100/len}%") #print(f"{i*100/len}%")
# Count of in which hour it falls # Count of in which hour it falls
#hour = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H') #hour = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H')
# Parse the timestamp string into a datetime object # Parse the timestamp string into a datetime object
dt = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S') dt = timestamp
hour = dt.hour hour = dt.hour
# parse the data
#print(data)
parsed_data = parse_data(data)
#print(f"parsed_data['action']:{parsed_data['action']}\n")
# Increment Count in which headings it falls # Increment Count in which headings it falls
#Hourly count and column total #Hourly count and column total
@ -370,19 +457,19 @@ if __name__ == "__main__":
#Total totals #Total totals
columnCounts_2d[ColTotals][TOTALS] += 1 columnCounts_2d[ColTotals][TOTALS] += 1
#Queued email #Queued email
if data['action'] == '(queue)': if parsed_data['action'] == '(queue)':
columnCounts_2d[hour][Ham] += 1 columnCounts_2d[hour][Ham] += 1
columnCounts_2d[ColTotals][Ham] += 1 columnCounts_2d[ColTotals][Ham] += 1
#spamassasin #spamassasin
if data['spam-status'].lower().startswith('yes'): if parsed_data['spam-status'].lower().startswith('yes'):
#Extract other parameters from this string #Extract other parameters from this string
# example: Yes, score=10.3 required=4.0 autolearn=disable # example: Yes, score=10.3 required=4.0 autolearn=disable
spam_pattern = r'score=([\d.]+)\s+required=([\d.]+)' spam_pattern = r'score=([\d.]+)\s+required=([\d.]+)'
match = re.search(spam_pattern, data['spam-status']) match = re.search(spam_pattern, parsed_data['spam-status'])
if match: if match:
score = float(match.group(1)) score = float(match.group(1))
required = float(match.group(2)) required = float(match.group(2))
#print(f"{data['spam-status']} / {score} {required}") #print(f"{parsed_data['spam-status']} / {score} {required}")
if score >= SARejectLevel: if score >= SARejectLevel:
columnCounts_2d[hour][DelSpam] += 1 columnCounts_2d[hour][DelSpam] += 1
columnCounts_2d[ColTotals][DelSpam] += 1 columnCounts_2d[ColTotals][DelSpam] += 1
@ -390,26 +477,26 @@ if __name__ == "__main__":
columnCounts_2d[hour][QuedSpam] += 1 columnCounts_2d[hour][QuedSpam] += 1
columnCounts_2d[ColTotals][QuedSpam] += 1 columnCounts_2d[ColTotals][QuedSpam] += 1
#Local send #Local send
elif DomainName in data['sendurl']: elif DomainName in parsed_data['sendurl']:
columnCounts_2d[hour][Local] += 1 columnCounts_2d[hour][Local] += 1
columnCounts_2d[ColTotals][Local] += 1 columnCounts_2d[ColTotals][Local] += 1
#Relay or webmail #Relay or webmail
elif not is_private_ip(data['ip']) and is_private_ip(data['sendurl1']) and data['action1'] == 'queued': elif not is_private_ip(parsed_data['ip']) and is_private_ip(parsed_data['sendurl1']) and parsed_data['action1'] == 'queued':
#Relay #Relay
if data['action1'] == 'queued': if parsed_data['action1'] == 'queued':
columnCounts_2d[hour][Relay] += 1 columnCounts_2d[hour][Relay] += 1
columnCounts_2d[ColTotals][Relay] += 1 columnCounts_2d[ColTotals][Relay] += 1
elif WebmailIP in data['sendurl1'] and not is_private_ip(data['ip']): elif WebmailIP in parsed_data['sendurl1'] and not is_private_ip(parsed_data['ip']):
#webmail #webmail
columnCounts_2d[hour][WebMail] += 1 columnCounts_2d[hour][WebMail] += 1
columnCounts_2d[ColTotals][WebMail] += 1 columnCounts_2d[ColTotals][WebMail] += 1
elif localhost in data['sendurl']: elif localhost in parsed_data['sendurl']:
# but not if it comes from fetchmail # but not if it comes from fetchmail
if not FETCHMAIL in data['sendurl1']: if not FETCHMAIL in parsed_data['sendurl1']:
# might still be from mailman here # might still be from mailman here
if MAILMAN in data['sendurl1']: if MAILMAN in parsed_data['sendurl1']:
#$mailmansendcount++; #$mailmansendcount++;
#$localsendtotal++; #$localsendtotal++;
columnCounts_2d[hour][MailMan] += 1 columnCounts_2d[hour][MailMan] += 1
@ -420,13 +507,13 @@ if __name__ == "__main__":
#Or sent to the DMARC server #Or sent to the DMARC server
#check for email address in $DMARC_Report_emails string #check for email address in $DMARC_Report_emails string
#my $logemail = $log_items[4]; #my $logemail = $log_items[4];
if DMARCDomain in data['from-email']: #(index($DMARC_Report_emails,$logemail)>=0) or if DMARCDomain in parsed_data['from-email']: #(index($DMARC_Report_emails,$logemail)>=0) or
#$localsendtotal++; #$localsendtotal++;
#$DMARCSendCount++; #$DMARCSendCount++;
localflag = 1; localflag = 1;
else: else:
# ignore incoming localhost spoofs # ignore incoming localhost spoofs
if not 'msg denied before queued' in data['error-msg']: if not 'msg denied before queued' in parsed_data['error-msg']:
#Webmail #Webmail
#$localflag = 1; #$localflag = 1;
#$WebMailsendtotal++; #$WebMailsendtotal++;
@ -441,54 +528,61 @@ if __name__ == "__main__":
columnCounts_2d[ColTotals][WebMail] += 1 columnCounts_2d[ColTotals][WebMail] += 1
#Now increment the column which the plugin name indicates #Now increment the column which the plugin name indicates
if data ['action'] == '(deny)' and data['error-plugin']: if parsed_data['action'] == '(deny)' and parsed_data['error-plugin']:
#print(f"Found plugin {data['error-plugin']}") #print(f"Found plugin {parsed_data['error-plugin']}")
if data['error-plugin']: if parsed_data['error-plugin']:
row = search_2d_list(data['error-plugin'],columnPlugin) row = search_2d_list(parsed_data['error-plugin'],columnPlugin)
if not row == -1: if not row == -1:
#print(f"Found row: {row}") #print(f"Found row: {row}")
columnCounts_2d[hour][row] += 1 columnCounts_2d[hour][row] += 1
columnCounts_2d[ColTotals][row] += 1 columnCounts_2d[ColTotals][row] += 1
# a few ad hoc extra extractons of data # a few ad hoc extra extractons of data
if row == Virus: if row == Virus:
match = virus_pattern.match(data['action1']) match = virus_pattern.match(parsed_data['action1'])
if match: if match:
found_viruses[match.group(1)] += 1 found_viruses[match.group(1)] += 1
else: else:
found_viruses[data['action1']] += 1 found_viruses[parsed_data['action1']] += 1
elif data['error-plugin'] == 'naughty': elif parsed_data['error-plugin'] == 'naughty':
match = qpcodes_pattern.match(data['action1']) match = qpcodes_pattern.match(parsed_data['action1'])
if match: if match:
rejReason = match.group(1) rejReason = match.group(1)
found_qpcodes[data['error-plugin']+"-"+rejReason] += 1 found_qpcodes[parsed_data['error-plugin']+"-"+rejReason] += 1
else: else:
found_qpcodes['Unknown'] += 1 found_qpcodes['Unknown'] += 1
else: else:
found_qpcodes[data['action1']] += 1 found_qpcodes[parsed_data['action1']] += 1
print() print()
# Now scan for the other lines in the log of interest # Now scan for the other lines in the log of interest
found_countries = defaultdict(int) found_countries = defaultdict(int)
geoip_pattern = re.compile(r"check_badcountries: GeoIP Country: (.*)") geoip_pattern = re.compile(r"check_badcountries: GeoIP Country: (.*)")
dmarc_pattern = re.compile(r"dmarc: pass") dmarc_pattern = re.compile(r"dmarc: pass")
total_countries = 0 total_countries = 0
DMARCOkCount = 0 DMARCOkCount = 0
with open(data_file, 'r') as file: # Pick up all log_entries = read_yesterday_log_file(data_file)
i = 0 sorted_log_dict = sort_log_entries(log_entries)
for line in file:
i += 1 i = 0
#Pull out Geoip countries for analysis table sorted_len = len(sorted_log_dict)
match = geoip_pattern.match(line) print_progress_bar(0, sorted_len, prefix='Progress:', suffix='Complete', length=50)
if match: for timestamp, data in sorted_log_dict.items():
country = match.group(1) i += 1
found_countries[country] += 1 print_progress_bar(i, sorted_len, prefix='Scanning for sub tables:', suffix='Complete', length=50)
total_countries += 1 #Pull out Geoip countries for analysis table
break
#Pull out DMARC approvals match = geoip_pattern.match(data)
match = dmarc_pattern.match(line) if match:
if match: country = match.group(1)
DMARCOkCount += 1 found_countries[country] += 1
break total_countries += 1
break
#Pull out DMARC approvals
match = dmarc_pattern.match(data)
if match:
DMARCOkCount += 1
break
#Now apply the results to the chameleon template - main table #Now apply the results to the chameleon template - main table
# Path to the template file # Path to the template file
@ -539,3 +633,5 @@ if __name__ == "__main__":
html_to_text(output_path+'.html',output_path+'.txt') html_to_text(output_path+'.html',output_path+'.txt')
print(f"Rendered HTML saved to {output_path}.html/txt") print(f"Rendered HTML saved to {output_path}.html/txt")