changeset 192:8f4a9a37d4d9

delay autowhitelisting to avoid out of office reply bots
author carl
date Sun, 11 Nov 2007 12:49:25 -0800
parents 2a67d31099c3
children 3ea79ef741a0
files ChangeLog NEWS configure.in package src/context.cpp src/context.h src/dnsbl.cpp src/dnsbl.h
diffstat 8 files changed, 2617 insertions(+), 2558 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sat Nov 10 16:20:51 2007 -0800
+++ b/ChangeLog	Sun Nov 11 12:49:25 2007 -0800
@@ -1,5 +1,9 @@
     $Id$
 
+6.14 2007-11-11
+    Delay auto whitelisting until we see the headers, to prevent
+    auto whitelisting due to bulk out of office reply bots.
+
 6.13 2007-11-10
     HELO command is optional, and if missing caused a null pointer
     dereference. Use smfi_progress in libmilter if it is available.
--- a/NEWS	Sat Nov 10 16:20:51 2007 -0800
+++ b/NEWS	Sun Nov 11 12:49:25 2007 -0800
@@ -1,5 +1,6 @@
     $Id$
 
+6.13 2007-11-10 Don't autowhitelist due to out of office reply bots.
 6.13 2007-11-10 Fix null pointer dereference on missing HELO command.
 6.12 2007-10-13 SMTP rejections take precedence over greylisting.
 6.11 2007-10-07 Add DCC filtering via dccifd. Fix static buffer referenced by multiple threads.
--- a/configure.in	Sat Nov 10 16:20:51 2007 -0800
+++ b/configure.in	Sun Nov 11 12:49:25 2007 -0800
@@ -1,6 +1,6 @@
 
 AC_PREREQ(2.59)
-AC_INIT(dnsbl,6.13,carl@five-ten-sg.com)
+AC_INIT(dnsbl,6.14,carl@five-ten-sg.com)
 AC_CONFIG_SRCDIR([config.h.in])
 AC_CONFIG_HEADER([config.h])
 
--- a/package	Sat Nov 10 16:20:51 2007 -0800
+++ b/package	Sun Nov 11 12:49:25 2007 -0800
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+find . -name '*~' | while read a; do rm -f "$a"; done
+
 T=`grep AC_INIT configure.in | cut -d'(' -f2`
 NAME=`echo $T | cut -d, -f1`
 VER=`echo $T | cut -d, -f2`
--- a/src/context.cpp	Sat Nov 10 16:20:51 2007 -0800
+++ b/src/context.cpp	Sun Nov 11 12:49:25 2007 -0800
@@ -579,6 +579,13 @@
 }
 
 
+DELAYWHITE::DELAYWHITE(char *loto_, WHITELISTERP w_, CONTEXTP con_) {
+    loto = loto_;
+    w    = w_;
+    con  = con_;
+}
+
+
 DNSBL::DNSBL(char *n, char *s, char *m) {
 	name	= n;
 	suffix	= s;
--- a/src/context.h	Sat Nov 10 16:20:51 2007 -0800
+++ b/src/context.h	Sun Nov 11 12:49:25 2007 -0800
@@ -24,6 +24,7 @@
 class VERIFY;
 class SMTP;
 class WHITELISTER;
+class DELAYWHITE;
 class recorder;
 
 typedef map<char *, char *, ltstr>		  string_map;
@@ -33,6 +34,7 @@
 typedef DNSBL * 						  DNSBLP;
 typedef VERIFY *						  VERIFYP;
 typedef WHITELISTER *					  WHITELISTERP;
+typedef DELAYWHITE *                      DELAYWHITEP;
 typedef list<DNSBLP>					  dnsblp_list;
 typedef map<char *, DNSBLP, ltstr>		  dnsblp_map;
 typedef CONTEXT *						  CONTEXTP;
@@ -43,6 +45,7 @@
 typedef map<char *, time_t,  ltstr> 	  autowhite_sent;
 typedef map<char *, VERIFYP, ltstr> 	  verify_map;
 typedef map<char *, WHITELISTERP, ltstr>  whitelister_map;
+typedef list<DELAYWHITEP>                 delay_whitelist;
 
 class SMTP {
 	static const int maxlen = 1000;
@@ -109,6 +112,17 @@
 	void	set_days(int d) {days = d;};
 };
 
+class DELAYWHITE {
+    char*           loto;
+    WHITELISTERP    w;
+    CONTEXTP        con;
+public:
+    DELAYWHITE(char *loto_, WHITELISTERP w_, CONTEXTP con_);
+    char           *get_loto() {return loto;};
+    WHITELISTERP    get_w()    {return w;};
+    CONTEXTP        get_con()  {return con;};
+};
+
 struct DNSBL {
 	char	*name;		// nickname for this dns based list
 	char	*suffix;	// blacklist suffix like blackholes.five-ten-sg.com
@@ -261,9 +275,6 @@
 	void		dump();
 };
 
-struct RATELIMIT {
-
-};
 
 extern char *token_autowhite;
 extern char *token_black;
--- a/src/dnsbl.cpp	Sat Nov 10 16:20:51 2007 -0800
+++ b/src/dnsbl.cpp	Sun Nov 11 12:49:25 2007 -0800
@@ -257,6 +257,7 @@
 	want_spamassassin	= false;
 	want_dccgrey		= false;
 	want_dccbulk		= false;
+    is_bulk_precedence  = false;
 	content_context 	= NULL;
 	memory				= NULL;
 	scanner 			= NULL;
@@ -285,6 +286,7 @@
 	if (queueid)	   free(queueid);
 	if (authenticated) free(authenticated);
 	if (client_name)   free(client_name);
+    delayer.clear();
 	discard(env_to);
 	if (memory)   delete memory;
 	if (scanner)  delete scanner;
@@ -302,6 +304,7 @@
 		want_spamassassin	= false;
 		want_dccgrey		= false;
 		want_dccbulk		= false;
+        is_bulk_precedence  = false;
 		content_context 	= NULL;
 		memory				= NULL;
 		scanner 			= NULL;
@@ -950,17 +953,22 @@
 // wrapper if the mail client did not, but the current version does not do
 // that.  So the <> wrapper is now optional.  It may have mixed case, just
 // as the mail client sent it.	We dup the string and convert the duplicate
-// to lower case.
+// to lower case. Some clients enclose the entire address in single quotes,
+// so we strip those as well.
 //
 char *to_lower_string(char *email);
 char *to_lower_string(char *email) {
 	int n = strlen(email);
-	if (*email == '<') {
+    if (email[0] == '<') {
 		// assume it also ends with >
 		n -= 2;
 		if (n < 1) return strdup(email);	// return "<>"
 		email++;
 	}
+    if ((email[0] == '\'') && (email[n-1] == '\'') && (n > 2)) {
+        n -= 2;
+        email++;
+    }
 	char *key = strdup(email);
 	key[n] = '\0';
 	for (int i=0; i<n; i++) key[i] = tolower(key[i]);
@@ -1110,14 +1118,15 @@
 	// if needed to ensure we can accept replies
 	loto = to_lower_string(rcptaddr);
 	WHITELISTERP w = con2.find_autowhite(loto, priv.mailaddr);
+    // check if local part is too big
+    const int max_local_size = 30;
+    char *p = strchr(loto, '@');
+    int len = (p) ? p-loto : max_local_size;
+    if (len >= max_local_size) w = NULL;    // too big, pretend we don't have a whitelister
+    // record it if we have a whitelister
 	if (w) {
-		if (debug_syslog > 1) {
-			char buf[maxlen];
-			char msg[maxlen];
-			snprintf(msg, sizeof(msg), "whitelist reply from <%s> in context %s", loto, con2.get_full_name(buf,maxlen));
-			my_syslog(&priv, msg);
-		}
-		w->sent(loto);	// don't free it, the whitelister takes ownership of the string
+        DELAYWHITEP dwp = new DELAYWHITE(loto, w, &con2);
+        priv.delayer.push_back(dwp);
 	}
 	else {
 		free(loto);
@@ -1154,6 +1163,10 @@
 sfsistat mlfi_header(SMFICTX* ctx, char* headerf, char* headerv)
 {
 	mlfiPriv &priv = *MLFIPRIV;
+    // detect precedence:bulk for avoiding autowhitelisting
+    if ((strcasecmp(headerf, "precedence") == 0) &&
+        (strcasecmp(headerv, "bulk") == 0)) priv.is_bulk_precedence = true;
+    // other headers are only needed for content filtering
 	if (priv.authenticated) 	return SMFIS_CONTINUE;
 	if (priv.only_whites)		return SMFIS_CONTINUE;
 	if (priv.want_spamassassin) priv.assassin->mlfi_header(headerf, headerv);
@@ -1164,6 +1177,25 @@
 sfsistat mlfi_eoh(SMFICTX* ctx)
 {
 	mlfiPriv &priv = *MLFIPRIV;
+    // delayed autowhitelisting
+    while (!priv.delayer.empty()) {
+        DELAYWHITEP dwp = priv.delayer.front();
+        if (!priv.is_bulk_precedence) {
+            char         *loto = dwp->get_loto();
+            WHITELISTERP w     = dwp->get_w();
+            CONTEXTP     con2  = dwp->get_con();
+            if (debug_syslog > 1) {
+                char buf[maxlen];
+                char msg[maxlen];
+                snprintf(msg, sizeof(msg), "whitelist reply from <%s> in context %s", loto, con2->get_full_name(buf,maxlen));
+                my_syslog(&priv, msg);
+            }
+            w->sent(loto);  // don't free it, the whitelister takes ownership of the string
+        }
+        delete dwp;
+        priv.delayer.pop_front();
+    }
+    // content filtering
 	if (priv.authenticated) 	return SMFIS_CONTINUE;
 	if (priv.only_whites)		return SMFIS_CONTINUE;
 	if (priv.want_spamassassin) priv.assassin->mlfi_eoh();
--- a/src/dnsbl.h	Sat Nov 10 16:20:51 2007 -0800
+++ b/src/dnsbl.h	Sun Nov 11 12:49:25 2007 -0800
@@ -44,6 +44,8 @@
 	bool			want_spamassassin;		// at least one non-whitelisted recipient has a non zero spamassassin limit
 	bool			want_dccgrey;			// at least one non-whitelisted recipient wants dcc greylisting
 	bool			want_dccbulk;			// at least one non-whitelisted recipient wants dcc bulk filtering
+    bool            is_bulk_precedence;     // have precedence:bulk header to prevent autowhitelisting
+    delay_whitelist delayer;                // to remember autowhitelisting until we see headers
 	CONTEXT 		*content_context;		// first non-whitelisted recipient with a content filtering context
 	context_map 	env_to; 				// map each non-whitelisted recipient to their filtering context
 	recorder		*memory;				// memory for the content scanner