diff --git a/root/usr/bin/mailstats.py b/root/usr/bin/mailstats.py index 915170b..c6e22f8 100644 --- a/root/usr/bin/mailstats.py +++ b/root/usr/bin/mailstats.py @@ -10,7 +10,7 @@ # Todo # 1. Make "yesterday" parameterised # -import datetime +from datetime import datetime, timedelta import sys from chameleon import PageTemplateFile,PageTemplate import pkg_resources @@ -25,6 +25,9 @@ Mailstats_version = '1.2' script_dir = os.path.dirname(os.path.abspath(__file__)) data_file_path = script_dir+'/../../../' +now = datetime.now() +yesterday = now - timedelta(days=1) +formatted_yesterday = yesterday.strftime("%Y-%m-%d") # Column numbering Hour = 0 @@ -67,64 +70,114 @@ def is_private_ip(ip): return False def truncate_microseconds(timestamp): - # Split timestamp into main part and microseconds - main_part, microseconds = timestamp.split('.') - # Truncate the last three digits of the microseconds - truncated_microseconds = microseconds[:-3] - # Combine the main part and truncated microseconds - truncated_timestamp = f"{main_part}.{truncated_microseconds}" - # Remove the microseconds completely if they exist - return truncated_timestamp.split('.')[0] + # Split timestamp into main part and microseconds + try: + main_part, microseconds = timestamp.split('.') + # Truncate the last three digits of the microseconds + truncated_microseconds = microseconds[:-3] + # Combine the main part and truncated microseconds + truncated_timestamp = f"{main_part}.{truncated_microseconds}" + except Exception as e: + print(f"{e} {timestamp}") + raise ValueError + # Remove the microseconds completely if they exist + return truncated_timestamp.split('.')[0] -def filter_yesterdays_entries(log_entries): - # Determine yesterday's date - yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).date() - - # Filter entries for yesterday's date - yesterday_entries = [] - for timestamp, data in log_entries: - truncated_timestamp = truncate_microseconds(timestamp) - entry_date = datetime.datetime.strptime(truncated_timestamp, '%Y-%m-%d %H:%M:%S').date() - if entry_date == yesterday: - parsed_data = parse_data(data) - yesterday_entries.append((truncated_timestamp, parsed_data)) - - return yesterday_entries +# def filter_yesterdays_entries(log_entries): + # # Determine yesterday's date + # yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).date() + # # Filter entries for yesterday's date + # yesterday_entries = [] + # for timestamp, data in log_entries: + # truncated_timestamp = truncate_microseconds(timestamp) + # entry_date = datetime.datetime.strptime(truncated_timestamp, '%Y-%m-%d %H:%M:%S').date() + # if entry_date == yesterday: + # parsed_data = parse_data(data) + # yesterday_entries.append((truncated_timestamp, parsed_data)) + + # return yesterday_entries + +def read_in_yesterday_log_file(file_path): + # Read the file and split each line into a list - timestamp and the rest + # Get current date and calculate yesterday's date + log_entries = [] + skip_record_count = 0; + with open(file_path, 'r') as file: + for Line in file: + #extract time stamp + try: + entry = split_timestamp_and_data(Line) + # compare with yesterday + timestamp_str = truncate_microseconds(entry[0]) + except ValueError as e: + #print(f"ValueError {e} on timestamp create {timestamp_str}:{entry[0]} {entry[1]}") + skip_record_count += 1 + continue + # Parse the timestamp string into a datetime object + # Ignoring extra microseconds + try: + timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S") + except ValueError as e: + print(f"ValueError {e} on timestamp extract {timestamp_str}:{entry[1]}") + if timestamp.date() == yesterday.date(): + log_entries.append((timestamp, entry[1])) + return [log_entries,skip_record_count] + +def filter_summary_records(log_entries): + # Return just the summary records + filtered_log_entries = [] + skipped_entry_count = 0 + for line in log_entries: + #print(line) + #quit() + if '`' in line[1]: + filtered_log_entries.append(line) + else: + skipped_entry_count += 1 + return [filtered_log_entries,skipped_entry_count] + +def sort_log_entries(log_entries): + # Sort the records, based on the timestamp + sorted_entries = sorted(log_entries, key=lambda x: x[0]) + # and return a dictionary + sorted_dict = {entry[0]: entry[1] for entry in sorted_entries} + return sorted_dict + -def read_and_filter_yesterday_log(file_path): - # Read the file and split each line into a dictionary - log_entries = [] - with open(file_path, 'r') as file: - for line in file: - if '`' in line: - parts = line.split(' ') - if parts: - # Combine parts to form the complete timestamp - timestamp = ' '.join(parts[:2]) - data = ' '.join(parts[2:]) # The rest of the line after date and time - log_entries.append((timestamp, data)) - - # Filter the entries to keep only those from yesterday - filtered_entries = filter_yesterdays_entries(log_entries) - - # Sort the filtered log entries based on the truncated timestamp - sorted_entries = sorted(filtered_entries, key=lambda x: datetime.datetime.strptime(x[0], '%Y-%m-%d %H:%M:%S')) - - # Create a dictionary - sorted_dict = {entry[0]: entry[1] for entry in sorted_entries} - - return sorted_dict +# def read_and_filter_yesterday_log(file_path): + # # Read the file and split each line into a dictionary + # log_entries = [] + # with open(file_path, 'r') as file: + # for line in file: + # if '`' in line: + # parts = line.split(' ') + # if parts: + # # Combine parts to form the complete timestamp + # timestamp = ' '.join(parts[:2]) + # data = ' '.join(parts[2:]) # The rest of the line after date and time + # log_entries.append((timestamp, data)) + + # # Filter the entries to keep only those from yesterday + # filtered_entries = filter_yesterdays_entries(log_entries) + + # # Sort the filtered log entries based on the truncated timestamp + # sorted_entries = sorted(filtered_entries, key=lambda x: datetime.datetime.strptime(x[0], '%Y-%m-%d %H:%M:%S')) + + # # Create a dictionary + # sorted_dict = {entry[0]: entry[1] for entry in sorted_entries} + + # return sorted_dict def parse_data(data): - # Split data string into parts and map to named fields. - # Adjust the field names and parsing logic according to your data format. - # Split at the backtick - before it fields split at space, after, fields split at tab - parts = data.split('`') - #print(parts[0],parts[1]) - fields1 = parts[0].strip().split() if len(parts) > 0 else [] - fields2 = parts[1].split('\t') if len(parts) > 1 else [] - # then merge them - fields = fields1 + fields2 + # Split data string into parts and map to named fields. + # Adjust the field names and parsing logic according to your data format. + # Split at the backtick - before it fields split at space, after, fields split at tab + parts = data.split('`') + #print(f"{parts[0]}:{parts[1]}") + fields1 = parts[0].strip().split() if len(parts) > 0 else [] + fields2 = parts[1].split('\t') if len(parts) > 1 else [] + # then merge them + fields = fields1 + fields2 # if fields[8] != 'queued': # i = 0 # print(f"len:{len(fields)}") @@ -132,81 +185,81 @@ def parse_data(data): # print(f"{i}: {part}") # i = i +1 # quit() - # and mapping: - try: - return_dict = { - 'id': fields[0].strip() if len(fields) > 0 else None, - 'action': fields[1].strip() if len(fields) > 1 else None, - 'logterse': fields[2].strip() if len(fields) > 2 else None, - 'ip': fields[3].strip() if len(fields) > 3 else None, - 'sendurl': fields[4].strip() if len(fields) > 4 else None, #1 - 'sendurl1': fields[5].strip() if len(fields) > 5 else None, #2 - 'from-email': fields[6].strip() if len(fields) > 6 else None, #3 - 'error-reason': fields[6].strip() if len(fields) > 6 else None, #3 - 'to-email': fields[7].strip() if len(fields) > 7 else None, #4 - 'error-plugin': fields[8].strip() if len(fields) > 8 else None, #5 - 'action1': fields[8].strip() if len(fields) > 8 else None, #5 - 'error-number' : fields[9].strip() if len(fields) > 9 else None, #6 - 'sender': fields[10].strip() if len(fields) > 10 else None, #7 - 'error-msg' :fields[10].strip() if len(fields) > 10 else None, #7 - 'spam-status': fields[11].strip() if len(fields) > 11 else None, #8 - 'error-result': fields[11].strip() if len(fields) > 11 else None,#8 - # Add more fields as necessary - } - except: - #print(f"error:len:{len(fields)}") - return_dict = {} - return return_dict + # and mapping: + try: + return_dict = { + 'id': fields[0].strip() if len(fields) > 0 else None, + 'action': fields[1].strip() if len(fields) > 1 else None, + 'logterse': fields[2].strip() if len(fields) > 2 else None, + 'ip': fields[3].strip() if len(fields) > 3 else None, + 'sendurl': fields[4].strip() if len(fields) > 4 else None, #1 + 'sendurl1': fields[5].strip() if len(fields) > 5 else None, #2 + 'from-email': fields[6].strip() if len(fields) > 6 else None, #3 + 'error-reason': fields[6].strip() if len(fields) > 6 else None, #3 + 'to-email': fields[7].strip() if len(fields) > 7 else None, #4 + 'error-plugin': fields[8].strip() if len(fields) > 8 else None, #5 + 'action1': fields[8].strip() if len(fields) > 8 else None, #5 + 'error-number' : fields[9].strip() if len(fields) > 9 else None, #6 + 'sender': fields[10].strip() if len(fields) > 10 else None, #7 + 'error-msg' :fields[10].strip() if len(fields) > 10 else None, #7 + 'spam-status': fields[11].strip() if len(fields) > 11 else None, #8 + 'error-result': fields[11].strip() if len(fields) > 11 else None,#8 + # Add more fields as necessary + } + except: + #print(f"error:len:{len(fields)}") + return_dict = {} + return return_dict def count_entries_by_hour(log_entries): - hourly_counts = defaultdict(int) - for entry in log_entries: - # Extract hour from the timestamp - timestamp = entry['timestamp'] - hour = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H') - hourly_counts[hour] += 1 - return hourly_counts + hourly_counts = defaultdict(int) + for entry in log_entries: + # Extract hour from the timestamp + timestamp = entry['timestamp'] + hour = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H') + hourly_counts[hour] += 1 + return hourly_counts def initialize_2d_array(num_hours, column_headers_len,reporting_date): - num_hours += 1 # Adjust for the zeroth hour - # Initialize the 2D list with zeroes - return [[0] * column_headers_len for _ in range(num_hours)] + num_hours += 1 # Adjust for the zeroth hour + # Initialize the 2D list with zeroes + return [[0] * column_headers_len for _ in range(num_hours)] def search_2d_list(target, data): - """ - Search for a target string in a 2D list of variable-length lists of strings. + """ + Search for a target string in a 2D list of variable-length lists of strings. - :param target: str, the string to search for - :param data: list of lists of str, the 2D list to search - :return: int, the row number where the target string is found, or -1 if not found - """ - for row_idx, row in enumerate(data): - if target in row: - return row_idx - return -1 # Return -1 if not found - + :param target: str, the string to search for + :param data: list of lists of str, the 2D list to search + :return: int, the row number where the target string is found, or -1 if not found + """ + for row_idx, row in enumerate(data): + if target in row: + return row_idx + return -1 # Return -1 if not found + def check_html2text_installed(): - try: - # Check if html2text is installed by running 'which html2text' - result = subprocess.run( - ['which', 'html2text'], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE - ) + try: + # Check if html2text is installed by running 'which html2text' + result = subprocess.run( + ['which', 'html2text'], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) - # If the command finds html2text, it will output the path - html2text_path = result.stdout.decode('utf-8').strip() - - if not html2text_path: - raise FileNotFoundError - - print(f"html2text is installed at: {html2text_path}") - return True + # If the command finds html2text, it will output the path + html2text_path = result.stdout.decode('utf-8').strip() + + if not html2text_path: + raise FileNotFoundError + + print(f"html2text is installed at: {html2text_path}") + return True - except subprocess.CalledProcessError: - print("html2text is not installed. Please install it using your package manager.", file=sys.stderr) - return False + except subprocess.CalledProcessError: + print("html2text is not installed. Please install it using your package manager.", file=sys.stderr) + return False def html_to_text(input_file, output_file): if not check_html2text_installed(): @@ -230,56 +283,77 @@ def html_to_text(input_file, output_file): sys.exit(e.returncode) def get_html2text_version(): - try: - result = subprocess.run(['html2text', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) - # Ensure the result is treated as a string in Python 3.6+ - return result.stdout.strip() - except subprocess.CalledProcessError as e: - print(f"Error occurred while checking html2text version: {e}", file=sys.stderr) - return None + try: + result = subprocess.run(['html2text', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + # Ensure the result is treated as a string in Python 3.6+ + return result.stdout.strip() + except subprocess.CalledProcessError as e: + print(f"Error occurred while checking html2text version: {e}", file=sys.stderr) + return None def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=50, fill='█', print_end="\r"): - """ - Call in a loop to create a terminal progress bar - @params: - iteration - Required : current iteration (Int) - total - Required : total iterations (Int) - prefix - Optional : prefix string (Str) - suffix - Optional : suffix string (Str) - decimals - Optional : positive number of decimals in percent complete (Int) - length - Optional : character length of bar (Int) - fill - Optional : bar fill character (Str) - print_end - Optional : end character (e.g. "\r", "\r\n") (Str) - """ - percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) - filled_length = int(length * iteration // total) - bar = fill * filled_length + '-' * (length - filled_length) - print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=print_end) - # Print New Line on Complete - if iteration == total: - print() + """ + Call in a loop to create a terminal progress bar + @params: + iteration - Required : current iteration (Int) + total - Required : total iterations (Int) + prefix - Optional : prefix string (Str) + suffix - Optional : suffix string (Str) + decimals - Optional : positive number of decimals in percent complete (Int) + length - Optional : character length of bar (Int) + fill - Optional : bar fill character (Str) + print_end - Optional : end character (e.g. "\r", "\r\n") (Str) + """ + if total == 0: + raise ValueError("Progress total is zero") + percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) + filled_length = int(length * iteration // total) + bar = fill * filled_length + '-' * (length - filled_length) + print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=print_end) + # Print New Line on Complete + if iteration == total: + print() def insert_string_after(original:str, to_insert:str, after:str) -> str: - """ - Insert to_insert into original after the first occurrence of after. - - :param original: The original string. - :param to_insert: The string to be inserted. - :param after: The set of characters after which the string will be inserted. - :return: The new string with to_insert inserted after after. - """ - position = original.find(after) - print(position) - - if position == -1: - # 'after' string is not found in 'original' - return original - print(f"{len(after)}") - # Position of the insertion point - insert_pos = position + len(after) - - return original[:insert_pos] + to_insert + original[insert_pos:] - + """ + Insert to_insert into original after the first occurrence of after. + + :param original: The original string. + :param to_insert: The string to be inserted. + :param after: The set of characters after which the string will be inserted. + :return: The new string with to_insert inserted after after. + """ + position = original.find(after) + #print(position) + + if position == -1: + # 'after' string is not found in 'original' + return original + #print(f"{len(after)}") + # Position of the insertion point + insert_pos = position + len(after) + + return original[:insert_pos] + to_insert + original[insert_pos:] + +def split_timestamp_and_data(log_entry: str) -> list: + """ + Split a log entry into timestamp and the rest of the data. + + :param log_entry: The log entry as a string. + :return: A list with two entries: [timestamp, rest_of_data]. + """ + # The timestamp is always the first part, up to the first space after the milliseconds + parts = log_entry.split(' ', 2) + + if len(parts) < 3: + raise ValueError(f"The log entry format is incorrect {parts}") + + timestamp = ' '.join(parts[:2]) + rest_of_data = parts[2] + #print(f"{timestamp} {rest_of_data}") + + return [timestamp, rest_of_data] + if __name__ == "__main__": try: chameleon_version = pkg_resources.get_distribution("Chameleon").version @@ -287,10 +361,8 @@ if __name__ == "__main__": chameleon_version = "Version information not available" python_version = sys.version python_version = python_version[:8] - current_datetime = datetime.datetime.now() + current_datetime = datetime.now() formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M") - yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).date() - formatted_yesterday = yesterday.strftime("%Y-%m-%d") #From SMEServer DB DomainName = 'bjsystems.co.uk' # $cdb->get('DomainName')->value; @@ -306,14 +378,25 @@ if __name__ == "__main__": MAILMAN = "bounces"; #sender when mailman sending when orig is localhost DMARCDomain="dmarc"; #Pattern to recognised DMARC sent emails (this not very reliable, as the email address could be anything) DMARCOkPattern="dmarc: pass"; #Pattern to use to detect DMARC approval - hello_string = "Mailstats:"+Mailstats_version+' for '+DomainName+" at "+formatted_datetime + hello_string = "Mailstats:"+Mailstats_version+' for '+DomainName+" at "+formatted_datetime+" for "+formatted_yesterday print(hello_string) version_string = "Chameleon:"+chameleon_version+" Python:"+python_version print(version_string) num_hours = 25 # Represents hours from 0 to 23 - adds extra one for column totals and another for percentages + data_file = data_file_path+'current.log' - sorted_log_dict = read_and_filter_yesterday_log(data_file) + log_entries,skip_count = read_in_yesterday_log_file(data_file) + if len(log_entries) == 0: + print(f"No records found in {data_file}") + quit() + else: + print(f"Found {len(log_entries)} entries in log for for {formatted_yesterday} skipped {skip_count}") + summary_log_entries,skip_count = filter_summary_records(log_entries) + print(f"Found {len(summary_log_entries)} summary entries and skipped {skip_count} entries") + sorted_log_dict = sort_log_entries(summary_log_entries) + print(f"Sorted {len(sorted_log_dict)} entries") + columnHeaders = ['Count','WebMail','Local','MailMan','Relay','DMARC','Virus','RBL/DNS','Geoip.','Non.Conf.','Karma','Rej.Load','Del.Spam','Qued.Spam?',' Ham','TOTALS','PERCENT'] # dict for each colum identifying plugin that increments count columnPlugin = [''] * 17 @@ -326,11 +409,11 @@ if __name__ == "__main__": columnPlugin[RBLDNS] = ['rhsbl', 'dnsbl','uribl'] columnPlugin[Geoip] = ['check_badcountries'] columnPlugin[NonConf] = ['check_earlytalker','check_relay','check_norelay', 'require_resolvable_fromhost' - ,'check_basicheaders','check_badmailfrom','check_badrcptto_patterns' - ,'check_badrcptto','check_spamhelo','check_goodrcptto extn','rcpt_ok' - ,'check_goodrcptto','check_smtp_forward','count_unrecognized_commands','tls','auth::auth_cvm_unix_local' - ,'auth::auth_imap', 'earlytalker','resolvable_fromhost','relay','headers','mailfrom','badrcptto','helo' - ,'check_smtp_forward','sender_permitted_from'] + ,'check_basicheaders','check_badmailfrom','check_badrcptto_patterns' + ,'check_badrcptto','check_spamhelo','check_goodrcptto extn','rcpt_ok' + ,'check_goodrcptto','check_smtp_forward','count_unrecognized_commands','tls','auth::auth_cvm_unix_local' + ,'auth::auth_imap', 'earlytalker','resolvable_fromhost','relay','headers','mailfrom','badrcptto','helo' + ,'check_smtp_forward','sender_permitted_from'] columnPlugin[RejLoad] = ['loadcheck'] columnPlugin[DelSpam] = [] columnPlugin[QuedSpam] = [] @@ -338,7 +421,7 @@ if __name__ == "__main__": columnPlugin[TOTALS] = [] columnPlugin[PERCENT] = [] columnPlugin[Karma] = ['karma'] - + columnHeaders_len = len(columnHeaders) columnCounts_2d = initialize_2d_array(num_hours, columnHeaders_len,formatted_yesterday) @@ -353,14 +436,18 @@ if __name__ == "__main__": print_progress_bar(0, sorted_len, prefix='Progress:', suffix='Complete', length=50) for timestamp, data in sorted_log_dict.items(): i += 1 - print_progress_bar(i, sorted_len, prefix='Progress:', suffix='Complete', length=50) + print_progress_bar(i, sorted_len, prefix='Scanning for main table:', suffix='Complete', length=50) #print(f"{i*100/len}%") # Count of in which hour it falls #hour = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H') # Parse the timestamp string into a datetime object - dt = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S') + dt = timestamp hour = dt.hour - + # parse the data + #print(data) + parsed_data = parse_data(data) + #print(f"parsed_data['action']:{parsed_data['action']}\n") + # Increment Count in which headings it falls #Hourly count and column total columnCounts_2d[hour][Hour] += 1 @@ -370,19 +457,19 @@ if __name__ == "__main__": #Total totals columnCounts_2d[ColTotals][TOTALS] += 1 #Queued email - if data['action'] == '(queue)': + if parsed_data['action'] == '(queue)': columnCounts_2d[hour][Ham] += 1 columnCounts_2d[ColTotals][Ham] += 1 #spamassasin - if data['spam-status'].lower().startswith('yes'): + if parsed_data['spam-status'].lower().startswith('yes'): #Extract other parameters from this string # example: Yes, score=10.3 required=4.0 autolearn=disable spam_pattern = r'score=([\d.]+)\s+required=([\d.]+)' - match = re.search(spam_pattern, data['spam-status']) + match = re.search(spam_pattern, parsed_data['spam-status']) if match: score = float(match.group(1)) required = float(match.group(2)) - #print(f"{data['spam-status']} / {score} {required}") + #print(f"{parsed_data['spam-status']} / {score} {required}") if score >= SARejectLevel: columnCounts_2d[hour][DelSpam] += 1 columnCounts_2d[ColTotals][DelSpam] += 1 @@ -390,26 +477,26 @@ if __name__ == "__main__": columnCounts_2d[hour][QuedSpam] += 1 columnCounts_2d[ColTotals][QuedSpam] += 1 #Local send - elif DomainName in data['sendurl']: + elif DomainName in parsed_data['sendurl']: columnCounts_2d[hour][Local] += 1 columnCounts_2d[ColTotals][Local] += 1 #Relay or webmail - elif not is_private_ip(data['ip']) and is_private_ip(data['sendurl1']) and data['action1'] == 'queued': + elif not is_private_ip(parsed_data['ip']) and is_private_ip(parsed_data['sendurl1']) and parsed_data['action1'] == 'queued': #Relay - if data['action1'] == 'queued': + if parsed_data['action1'] == 'queued': columnCounts_2d[hour][Relay] += 1 columnCounts_2d[ColTotals][Relay] += 1 - elif WebmailIP in data['sendurl1'] and not is_private_ip(data['ip']): + elif WebmailIP in parsed_data['sendurl1'] and not is_private_ip(parsed_data['ip']): #webmail columnCounts_2d[hour][WebMail] += 1 columnCounts_2d[ColTotals][WebMail] += 1 - elif localhost in data['sendurl']: + elif localhost in parsed_data['sendurl']: # but not if it comes from fetchmail - if not FETCHMAIL in data['sendurl1']: + if not FETCHMAIL in parsed_data['sendurl1']: # might still be from mailman here - if MAILMAN in data['sendurl1']: + if MAILMAN in parsed_data['sendurl1']: #$mailmansendcount++; #$localsendtotal++; columnCounts_2d[hour][MailMan] += 1 @@ -420,13 +507,13 @@ if __name__ == "__main__": #Or sent to the DMARC server #check for email address in $DMARC_Report_emails string #my $logemail = $log_items[4]; - if DMARCDomain in data['from-email']: #(index($DMARC_Report_emails,$logemail)>=0) or + if DMARCDomain in parsed_data['from-email']: #(index($DMARC_Report_emails,$logemail)>=0) or #$localsendtotal++; #$DMARCSendCount++; localflag = 1; else: # ignore incoming localhost spoofs - if not 'msg denied before queued' in data['error-msg']: + if not 'msg denied before queued' in parsed_data['error-msg']: #Webmail #$localflag = 1; #$WebMailsendtotal++; @@ -441,54 +528,61 @@ if __name__ == "__main__": columnCounts_2d[ColTotals][WebMail] += 1 #Now increment the column which the plugin name indicates - if data ['action'] == '(deny)' and data['error-plugin']: - #print(f"Found plugin {data['error-plugin']}") - if data['error-plugin']: - row = search_2d_list(data['error-plugin'],columnPlugin) + if parsed_data['action'] == '(deny)' and parsed_data['error-plugin']: + #print(f"Found plugin {parsed_data['error-plugin']}") + if parsed_data['error-plugin']: + row = search_2d_list(parsed_data['error-plugin'],columnPlugin) if not row == -1: #print(f"Found row: {row}") columnCounts_2d[hour][row] += 1 columnCounts_2d[ColTotals][row] += 1 # a few ad hoc extra extractons of data if row == Virus: - match = virus_pattern.match(data['action1']) + match = virus_pattern.match(parsed_data['action1']) if match: found_viruses[match.group(1)] += 1 else: - found_viruses[data['action1']] += 1 - elif data['error-plugin'] == 'naughty': - match = qpcodes_pattern.match(data['action1']) + found_viruses[parsed_data['action1']] += 1 + elif parsed_data['error-plugin'] == 'naughty': + match = qpcodes_pattern.match(parsed_data['action1']) if match: rejReason = match.group(1) - found_qpcodes[data['error-plugin']+"-"+rejReason] += 1 + found_qpcodes[parsed_data['error-plugin']+"-"+rejReason] += 1 else: found_qpcodes['Unknown'] += 1 else: - found_qpcodes[data['action1']] += 1 + found_qpcodes[parsed_data['action1']] += 1 print() + # Now scan for the other lines in the log of interest found_countries = defaultdict(int) geoip_pattern = re.compile(r"check_badcountries: GeoIP Country: (.*)") dmarc_pattern = re.compile(r"dmarc: pass") total_countries = 0 DMARCOkCount = 0 - with open(data_file, 'r') as file: - i = 0 - for line in file: - i += 1 - #Pull out Geoip countries for analysis table - match = geoip_pattern.match(line) - if match: - country = match.group(1) - found_countries[country] += 1 - total_countries += 1 - break - #Pull out DMARC approvals - match = dmarc_pattern.match(line) - if match: - DMARCOkCount += 1 - break + # Pick up all log_entries = read_yesterday_log_file(data_file) + sorted_log_dict = sort_log_entries(log_entries) + + i = 0 + sorted_len = len(sorted_log_dict) + print_progress_bar(0, sorted_len, prefix='Progress:', suffix='Complete', length=50) + for timestamp, data in sorted_log_dict.items(): + i += 1 + print_progress_bar(i, sorted_len, prefix='Scanning for sub tables:', suffix='Complete', length=50) + #Pull out Geoip countries for analysis table + + match = geoip_pattern.match(data) + if match: + country = match.group(1) + found_countries[country] += 1 + total_countries += 1 + break + #Pull out DMARC approvals + match = dmarc_pattern.match(data) + if match: + DMARCOkCount += 1 + break #Now apply the results to the chameleon template - main table # Path to the template file @@ -538,4 +632,6 @@ if __name__ == "__main__": if get_html2text_version() == '2019.9.26': html_to_text(output_path+'.html',output_path+'.txt') print(f"Rendered HTML saved to {output_path}.html/txt") - + + +