diff src/dnsbl.cpp @ 178:d6531c702be3

embedded dcc filtering
author carl
date Thu, 04 Oct 2007 22:45:21 -0700
parents a4d313c2460b
children 8b86a894514d
line wrap: on
line diff
--- a/src/dnsbl.cpp	Sun Sep 30 10:27:14 2007 -0700
+++ b/src/dnsbl.cpp	Thu Oct 04 22:45:21 2007 -0700
@@ -255,14 +255,18 @@
 	have_whites 		= false;
 	only_whites 		= true;
 	want_spamassassin	= false;
+	want_dccgrey		= false;
+	want_dccbulk		= false;
+	content_context 	= NULL;
 	memory				= NULL;
 	scanner 			= NULL;
-	assassin			= NULL;
 	content_suffix		= NULL;
 	content_message 	= NULL;
 	uribl_suffix		= NULL;
 	uribl_message		= NULL;
 	content_host_ignore = NULL;
+	assassin			= NULL;
+	dccifd				= NULL;
 }
 
 mlfiPriv::~mlfiPriv() {
@@ -285,6 +289,7 @@
 	if (memory)   delete memory;
 	if (scanner)  delete scanner;
 	if (assassin) delete assassin;
+	if (dccifd)   delete dccifd;
 	if (!final) {
 		mailaddr			= NULL;
 		queueid 			= NULL;
@@ -293,14 +298,18 @@
 		have_whites 		= false;
 		only_whites 		= true;
 		want_spamassassin	= false;
+		want_dccgrey		= false;
+		want_dccbulk		= false;
+		content_context 	= NULL;
 		memory				= NULL;
 		scanner 			= NULL;
-		assassin			= NULL;
 		content_suffix		= NULL;
 		content_message 	= NULL;
 		uribl_suffix		= NULL;
 		uribl_message		= NULL;
 		content_host_ignore = NULL;
+		assassin			= NULL;
+		dccifd				= NULL;
 	}
 }
 
@@ -404,8 +413,8 @@
 void mlfiPriv::need_content_filter(char *rcpt, CONTEXT &con) {
 	register_string(env_to, rcpt, &con);
 	if (!memory) {
-		// first recipient that needs content filtering sets all
-		// the content filtering parameters
+		// first recipient that needs content filtering sets
+		// some of the content filtering parameters
 		memory		  = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_cctlds());
 		scanner 	  = new url_scanner(memory);
 		content_suffix		= con.get_content_suffix();
@@ -718,11 +727,12 @@
 //	lookup the domain name part of a hostname on the uribl
 //
 //	if we find part of the hostname on the uribl, return
-//	true and point found to the part of the hostname that we found.
+//	true and point found to the part of the hostname that we found
+//	as a string registered in hosts.
 //	otherwise, return false and preserve the value of found.
 //
-bool uriblookup(mlfiPriv &priv ,char *hostname, char *top, char *&found) ;
-bool uriblookup(mlfiPriv &priv, char *hostname, char *top, char *&found) {
+bool uriblookup(mlfiPriv &priv, string_set &hosts, char *hostname, char *top, char *&found) ;
+bool uriblookup(mlfiPriv &priv, string_set &hosts, char *hostname, char *top, char *&found) {
 	// top is pointer to '.' char at end of base domain, or null for ip address form
 	// so for hostname of www.fred.mydomain.co.uk
 	// top points to-----------------------^
@@ -742,7 +752,7 @@
 			snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix);
 			my_syslog(tmp);
 		}
-		found = hostname;
+		found = register_string(hosts, hostname);
 		return true;
 	}
 	return false;
@@ -757,11 +767,12 @@
 //	Else, look up three level domain.
 //
 //	if we find part of the hostname on the uribl, return
-//	true and point found to the part of the hostname that we found.
+//	true and point found to the part of the hostname that we found
+//	as a string registered in hosts.
 //	otherwise, return false and preserve the value of found.
 //
-bool check_uribl(mlfiPriv &priv, char *hostname, char *&found) ;
-bool check_uribl(mlfiPriv &priv, char *hostname, char *&found) {
+bool check_uribl(mlfiPriv &priv, string_set &hosts, char *hostname, char *&found) ;
+bool check_uribl(mlfiPriv &priv, string_set &hosts, char *hostname, char *&found) {
 	in_addr ip;
 	if (inet_aton(hostname, &ip)) {
 		const u_char *src = (const u_char *)&ip.s_addr;
@@ -769,9 +780,9 @@
 		if (src[0] == 10)  return false;	// don't do dns lookups on rfc1918 space
 		if ((src[0] == 192) && (src[1] == 168)) return false;
 		if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false;
-		static char adr[sizeof "255.255.255.255"];
+		char adr[sizeof "255.255.255.255   "];
 		snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]);
-		return (uriblookup(priv, adr, NULL, found));
+		return (uriblookup(priv, hosts, adr, NULL, found));
 	}
 
 	char *top, *top2, *top3;
@@ -785,18 +796,18 @@
 			string_set::iterator i = priv.memory->get_cctlds()->find(top2+1);
 			string_set::iterator x = priv.memory->get_cctlds()->end();
 			// if we have a 2-level-cctld, just look at top three levels of the name
-			if (i != x) return uriblookup(priv, hostname, top2, found);
+			if (i != x) return uriblookup(priv, hosts, hostname, top2, found);
 
 			*top2 = '\0';
 			top3 = strrchr(hostname, '.');
 			*top2 = '.';
 
 			// if we have more than 3 levels in the name, look at the top three levels of the name
-			if (top3 && uriblookup(priv, hostname, top2, found)) return true;
+			if (top3 && uriblookup(priv, hosts, hostname, top2, found)) return true;
 			// if that was not found, fall thru to looking at the top two levels
 		}
 		// look at the top two levels of the name
-		return uriblookup(priv, hostname, top, found);
+		return uriblookup(priv, hosts, hostname, top, found);
 	}
 	return false;
 }
@@ -863,7 +874,7 @@
 					return true;
 				}
 				// Check uribl & surbl style list
-				if (priv.uribl_suffix && check_uribl(priv, host, found)) {
+				if (priv.uribl_suffix && check_uribl(priv, hosts, host, found)) {
 					msg = priv.uribl_message;
 					return true;
 				}
@@ -976,6 +987,9 @@
 	if (spamc != spamc_empty) {
 		priv.assassin  = new SpamAssassin(&priv, priv.ip, priv.helo, priv.mailaddr, priv.queueid);
 	}
+	if (dccifd_port) {
+		priv.dccifd = new DccInterface(dccifd_port, &priv, priv.ip, priv.helo, priv.mailaddr, priv.queueid);
+	}
 	return SMFIS_CONTINUE;
 }
 
@@ -994,6 +1008,7 @@
 	}
 
 	if (priv.assassin) priv.assassin->mlfi_envrcpt(ctx, loto);
+	if (priv.dccifd)   priv.dccifd->mlfi_envrcpt(loto);
 	// priv.mailaddr sending original message to loto
 	CONTEXT 	&con = *(dc.find_context(loto)->find_context(priv.mailaddr));
 	VERIFYP 	 ver = con.find_verify(loto);
@@ -1089,6 +1104,14 @@
 	else {
 		free(loto);
 	}
+	// remember first content filtering context
+	if (con.get_content_filtering()) {
+		if (!priv.content_context) priv.content_context = &con;
+		else if (con.get_require() && (priv.content_context != &con)) {
+			smfi_setreply(ctx, "452", "4.2.1", "incompatible filtering contexts");
+			return SMFIS_TEMPFAIL;
+		}
+	}
 	// accept the recipient
 	if (!con.get_content_filtering()) st = white;
 	if (st == oksofar) {
@@ -1097,6 +1120,10 @@
 		priv.only_whites = false;
 		priv.want_spamassassin |= (priv.assassin) &&					// have spam assassin available and
 								  (con.get_spamassassin_limit() != 0);	// want to use it with a non-zero score
+		priv.want_dccgrey	   |= (priv.dccifd) &&						// have dcc interface and
+								  (con.get_grey()); 					// want to use it for greylisting
+		priv.want_dccbulk	   |= (priv.dccifd) &&						// have dcc interface and
+								  (con.get_bulk() != 0);				// want to use it for bulk detection
 	}
 	if (st == white) {
 		priv.have_whites = true;
@@ -1110,6 +1137,7 @@
 	if (priv.authenticated) 	return SMFIS_CONTINUE;
 	if (priv.only_whites)		return SMFIS_CONTINUE;
 	if (priv.want_spamassassin) priv.assassin->mlfi_header(headerf, headerv);
+	if (priv.want_dccgrey || priv.want_dccbulk) priv.dccifd->mlfi_header(ctx, headerf, headerv);
 	return SMFIS_CONTINUE;
 }
 
@@ -1119,6 +1147,7 @@
 	if (priv.authenticated) 	return SMFIS_CONTINUE;
 	if (priv.only_whites)		return SMFIS_CONTINUE;
 	if (priv.want_spamassassin) priv.assassin->mlfi_eoh();
+	if (priv.want_dccgrey || priv.want_dccbulk) priv.dccifd->mlfi_eoh();
 	return SMFIS_CONTINUE;
 }
 
@@ -1128,6 +1157,7 @@
 	if (priv.authenticated) 	return SMFIS_CONTINUE;
 	if (priv.only_whites)		return SMFIS_CONTINUE;
 	if (priv.want_spamassassin) priv.assassin->mlfi_body(data, len);
+	if (priv.want_dccgrey || priv.want_dccbulk) priv.dccifd->mlfi_body(data, len);
 	priv.scanner->scan(data, len);
 	return SMFIS_CONTINUE;
 }
@@ -1143,8 +1173,18 @@
 	// process end of message
 	if (priv.authenticated || priv.only_whites) rc = SMFIS_CONTINUE;
 	else {
+		// assert env_to not empty, it contains the
+		// non-whitelisted folks that want content filtering
 		int score = (priv.want_spamassassin) ? priv.assassin->mlfi_eom() : 0;
-		// assert env_to not empty
+		bool greylist = false;
+		int  dccbulk  = 0;
+		if (priv.want_dccgrey || priv.want_dccbulk) priv.dccifd->mlfi_eom(greylist, dccbulk);
+
+		if (priv.want_dccgrey && greylist) {
+			smfi_setreply(ctx, "452", "4.2.1", "temporary greylist embargoed");
+			rc = SMFIS_TEMPFAIL;
+		}
+		else {
 		char buf[maxlen];
 		string msg;
 		string_set alive;
@@ -1153,8 +1193,9 @@
 		for (context_map::iterator i=priv.env_to.begin(); i!=priv.env_to.end(); i++) {
 			char *rcpt	 = (*i).first;
 			CONTEXT &con = *((*i).second);
-			if (!con.acceptable_content(*priv.memory, score, msg)) {
-				// bad html tags or excessive hosts or high spam assassin score
+				if (!con.acceptable_content(*priv.memory, score, dccbulk, msg)) {
+					// bad html tags or excessive hosts or
+					// high spam assassin score or dcc bulk threshold exceedeed
 				smfi_delrcpt(ctx, rcpt);
 			}
 			else {
@@ -1200,6 +1241,7 @@
 			rc = SMFIS_CONTINUE;
 		}
 	}
+	}
 	// reset for a new message on the same connection
 	mlfi_abort(ctx);
 	return rc;