From f86021b8c92f7a5494c00ee607473244eba34d3a Mon Sep 17 00:00:00 2001 From: Brian Read Date: Tue, 2 Sep 2025 10:17:26 +0100 Subject: [PATCH] Fix missing blacklist URLs from report --- root/usr/bin/mailstats.py | 91 +++++++++++++++++++++++++++++++-------- 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/root/usr/bin/mailstats.py b/root/usr/bin/mailstats.py index 25134ca..9736c8e 100644 --- a/root/usr/bin/mailstats.py +++ b/root/usr/bin/mailstats.py @@ -1173,11 +1173,63 @@ def display_keys_and_values(data): raise ValueError("Input must be a list of dictionaries or a list of lists.") def extract_blacklist_domain(text): - match = re.search(r'http://www\.surbl\.org', text) - if match: - return "www.surbl.org" - return None - + """ + Compare 'text' against comma-separated URL strings from global vars + RBLList, SBLList, and UBLList. Return the first matching entry or "". + Match is done on exact hostname substring OR the base domain (eTLD+1), + so 'black.uribl.com' will match text containing 'lookup.uribl.com'. + """ + s = text if isinstance(text, str) else str(text or "") + s_lower = s.lower() + logging.info(f"extract blacklist called:{text}") + + combined = ",".join([RBLList, SBLList, UBLList]) + + def hostname_from(sval: str) -> str: + sval = (sval or "").strip().lower() + if "://" in sval: + # Strip scheme using simple split to avoid needing urlparse + sval = sval.split("://", 1)[1] + # Strip path and port if present + sval = sval.split("/", 1)[0] + sval = sval.split(":", 1)[0] + # Remove leading wildcards/dots + sval = sval.lstrip(".") + if sval.startswith("*."): + sval = sval[2:] + return sval + + def base_domain(hostname: str) -> str: + parts = hostname.split(".") + if len(parts) >= 3 and parts[-2] in ("co", "org", "gov", "ac") and parts[-1] == "uk": + return ".".join(parts[-3:]) + if len(parts) >= 2: + return ".".join(parts[-2:]) + return hostname + + def boundary_re(term: str): + # Match term when not part of a larger domain label + return re.compile(r"(?