changeset 119:d9d2f8699621

uribl patch from Jeff Evans <jeffe@tricab.com>
author carl
date Sun, 12 Mar 2006 12:38:43 -0800 (2006-03-12)
parents 13fcb0c66763
children 1d9e6c1b8872
files dnsbl.conf src/context.cpp src/context.h src/dnsbl.cpp src/dnsbl.h src/scanner.cpp xml/dnsbl.in
diffstat 7 files changed, 119 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/dnsbl.conf	Sun Mar 12 10:20:59 2006 -0800
+++ b/dnsbl.conf	Sun Mar 12 12:38:43 2006 -0800
@@ -7,10 +7,12 @@
 
     content on {
         filter    sbl-xbl.spamhaus.org        "Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s";
+        uribl     multi-surbl.org             "Mail containing %s rejected - surbl; see http://www.rulesemporium.com/cgi-bin/uribl.cgi?bl0=1&domain0=%s";
+        #uribl    black.uribl.com             "Mail containing %s rejected - uribl; see http://l.uribl.com/?d=%s";
         ignore    { include "hosts-ignore.conf"; };
         tld       { include "tld.conf"; };
         cctld       { include "cctld.conf"; };
-#        html_tags { include "html-tags.conf"; };
+        html_tags { include "html-tags.conf"; };
         html_limit off;
         host_limit soft 20;
     };
--- a/src/context.cpp	Sun Mar 12 10:20:59 2006 -0800
+++ b/src/context.cpp	Sun Mar 12 12:38:43 2006 -0800
@@ -52,6 +52,7 @@
 char *token_tld;
 char *token_cctld;
 char *token_unknown;
+char *token_uribl;
 char *token_verify;
 char *token_white;
 
@@ -486,6 +487,8 @@
 	content_filtering	= (parent) ? parent->content_filtering : false;
 	content_suffix		= NULL;
 	content_message 	= NULL;
+	uribl_suffix		= NULL;
+	uribl_message		= NULL;
 	host_limit			= (parent) ? parent->host_limit  : 0;
 	host_limit_message	= NULL;
 	host_random 		= (parent) ? parent->host_random : false;
@@ -615,12 +618,24 @@
 }
 
 
+char* CONTEXT::get_uribl_suffix() {
+	if (!uribl_suffix && parent) return parent->get_uribl_suffix();
+	return uribl_suffix;
+}
+
+
 char* CONTEXT::get_content_message() {
 	if (!content_message && parent) return parent->get_content_message();
 	return content_message;
 }
 
 
+char* CONTEXT::get_uribl_message() {
+	if (!uribl_message && parent) return parent->get_uribl_message();
+	return uribl_message;
+}
+
+
 string_set& CONTEXT::get_content_host_ignore() {
 	if (content_host_ignore.empty() && parent) return parent->get_content_host_ignore();
 	return content_host_ignore;
@@ -692,6 +707,9 @@
 		if (content_suffix) {
 			printf("%s         filter %s \"%s\"; \n", indent, content_suffix, content_message);
 		}
+		if (uribl_suffix) {
+			printf("%s         uribl %s \"%s\"; \n", indent, uribl_suffix, uribl_message);
+		}
 		if (!content_host_ignore.empty()) {
 			printf("%s         ignore { \n", indent);
 			for (string_set::iterator i=content_host_ignore.begin(); i!=content_host_ignore.end(); i++) {
@@ -891,6 +909,13 @@
 			me.set_content_message(messag);
 			if (!tsa(tok, token_semi)) return false;
 		}
+		else if (have == token_uribl) {
+			char *suffix = tok.next();
+			char *messag = tok.next();
+			me.set_uribl_suffix(suffix);
+			me.set_uribl_message(messag);
+			if (!tsa(tok, token_semi)) return false;
+		}
 		else if (have == token_ignore) {
 			if (!tsa(tok, token_lbrace)) return false;
 			while (true) {
@@ -1282,6 +1307,7 @@
 	token_substitute = register_string("substitute");
 	token_tld		 = register_string("tld");
 	token_unknown	 = register_string("unknown");
+	token_uribl 	 = register_string("uribl");
 	token_verify	 = register_string("verify");
 	token_white 	 = register_string("white");
 
--- a/src/context.h	Sun Mar 12 10:20:59 2006 -0800
+++ b/src/context.h	Sun Mar 12 12:38:43 2006 -0800
@@ -96,8 +96,10 @@
 	context_map 	env_from_context;	// map senders to a child context
 	char *			env_from_default;	// default value for senders that are not found in the map white/black/unknown/inherit
 	bool			content_filtering;	//
-	char *			content_suffix; 	// for sbl url body filtering
+	char *			content_suffix; 	// for url body filtering based on ip addresses of hostnames in the body
 	char *			content_message;	// ""
+	char *			uribl_suffix;		// for uribl body filtering based on hostnames in the body
+	char *			uribl_message;		// ""
 	string_set		content_host_ignore;// hosts to ignore for content sbl checking
 	string_set		content_tlds;		//
 	string_set		content_cctlds; 	//
@@ -135,6 +137,8 @@
 	void		set_content_filtering(bool filter)			{content_filtering = filter;};
 	void		set_content_suffix(char *suffix)			{content_suffix    = suffix;};
 	void		set_content_message(char *message)			{content_message   = message;};
+	void		set_uribl_suffix(char *suffix)				{uribl_suffix	   = suffix;};
+	void		set_uribl_message(char *message)			{uribl_message	   = message;};
 	void		add_ignore(char *host)						{content_host_ignore.insert(host);};
 	void		add_tld(char *tld)							{content_tlds.insert(tld);};
 	void		add_cctld(char *cctld)						{content_cctlds.insert(cctld);};
@@ -155,6 +159,8 @@
 	bool			get_host_random()						{return host_random;};
 	char*			get_content_suffix();
 	char*			get_content_message();
+	char*			get_uribl_suffix();
+	char*			get_uribl_message();
 	string_set& 	get_content_host_ignore();
 	string_set& 	get_content_tlds();
 	string_set& 	get_content_cctlds();
@@ -219,6 +225,7 @@
 extern char *token_substitute;
 extern char *token_tld;
 extern char *token_unknown;
+extern char *token_uribl;
 extern char *token_white;
 
 extern char *token_myhostname;
--- a/src/dnsbl.cpp	Sun Mar 12 10:20:59 2006 -0800
+++ b/src/dnsbl.cpp	Sun Mar 12 12:38:43 2006 -0800
@@ -232,6 +232,8 @@
 	scanner 			= NULL;
 	content_suffix		= NULL;
 	content_message 	= NULL;
+	uribl_suffix		= NULL;
+	uribl_message		= NULL;
 	content_host_ignore = NULL;
 }
 
@@ -259,6 +261,8 @@
 		scanner 			= NULL;
 		content_suffix		= NULL;
 		content_message 	= NULL;
+		uribl_suffix		= NULL;
+		uribl_message		= NULL;
 		content_host_ignore = NULL;
 	}
 }
@@ -369,6 +373,8 @@
 		scanner 	  = new url_scanner(memory);
 		content_suffix		= con.get_content_suffix();
 		content_message 	= con.get_content_message();
+		uribl_suffix		= con.get_uribl_suffix();
+		uribl_message		= con.get_uribl_message();
 		content_host_ignore = &con.get_content_host_ignore();
 	}
 }
@@ -669,14 +675,14 @@
 		if (x) hostname = x+1;
 		*top = '.';
 	}
-	for (int i=0; i<2; i++) {
-		snprintf(buf, sizeof(buf), "%s.%s", hostname, uriblname[i]);
+	snprintf(buf, sizeof(buf), "%s.%s", hostname, priv.uribl_suffix);
+	if (dns_interface(priv, buf, false, NULL)) {
 		if (debug_syslog > 2) {
 			char tmp[maxlen];
-			snprintf(tmp, sizeof(tmp), "Looking up %s on %s", hostname, uriblname[i]);
+			snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix);
 			my_syslog(tmp);
 		}
-		if (dns_interface(priv, buf, false, NULL)) return true;
+		return true;
 	}
 	return false;
 }
@@ -727,14 +733,14 @@
 
 
 ////////////////////////////////////////////////
-//	check the hosts from the body against the content dnsbl
+//	check the hosts from the body against the content filter and uribl dnsbls
 //
-bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip);
-bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip) {
+bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&msg, char *&host, int &ip);
+bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&msg, char *&host, int &ip) {
+	if (!priv.content_suffix && !priv.uribl_suffix) return false;	// nothing to check
 	CONFIG	   &dc	   = *priv.pc;
 	string_set &hosts  = priv.memory->get_hosts();
 	string_set &ignore = *priv.content_host_ignore;
-	char	   *suffix = priv.content_suffix;
 
 	int count = 0;
 	int   cnt = hosts.size();	// number of hosts we could look at
@@ -779,19 +785,22 @@
 			if (i == ips.end()) {
 				// we haven't looked this up yet
 				ips.insert(ip);
-				if (check_single(priv, ip, suffix)) return true;
+				if (check_single(priv, ip, priv.content_suffix)) {
+					msg = priv.content_message;
+					return true;
+				}
 				// Check uribl & surbl
-				if (check_uribl(priv, host)) return true;
+				if (check_uribl(priv, host)) {
+					msg = priv.uribl_message;
+					return true;
+				}
 			}
 		}
 	}
 	limit *= 4;   // allow average of 3 ns per host name
 	for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) {
 		count++;
-		if ((count > limit) && (limit > 0)) {
-			if (random) continue; // don't complain
-			return true;
-		}
+		if ((count > limit) && (limit > 0)) return false;	// too many name servers to check them all
 		host = (*i).first;	// a transient reference that needs to be replaced before we return it
 		ip	 = (*i).second;
 		if (!ip) ip = dns_interface(priv, host, false, NULL);
@@ -812,7 +821,8 @@
 			int_set::iterator i = ips.find(ip);
 			if (i == ips.end()) {
 				ips.insert(ip);
-				if (check_single(priv, ip, suffix)) {
+				if (check_single(priv, ip, priv.content_suffix)) {
+					msg = priv.content_message;
 					string_map::iterator j = nameservers.ns_host.find(host);
 					if (j != nameservers.ns_host.end()) {
 						char *refer = (*j).second;
@@ -985,11 +995,12 @@
 		}
 		bool rejecting = alive.empty(); // if alive is empty, we must have set msg above in acceptable_content()
 		if (!rejecting) {
-			if (check_hosts(priv, random, limit, host, ip)) {
+			char *msg;
+			if (check_hosts(priv, random, limit, msg, host, ip)) {
 				char adr[sizeof "255.255.255.255"];
 				adr[0] = '\0';
 				inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
-				snprintf(buf, sizeof(buf), priv.content_message, host, adr);
+				snprintf(buf, sizeof(buf), msg, host, adr);
 				msg = buf;
 				rejecting = true;
 			}
--- a/src/dnsbl.h	Sun Mar 12 10:20:59 2006 -0800
+++ b/src/dnsbl.h	Sun Mar 12 12:38:43 2006 -0800
@@ -28,8 +28,10 @@
 	context_map	env_to;		// map each	non-whitelisted	recipient to their filtering context
 	recorder	*memory;	// memory for the content scanner
 	url_scanner	*scanner;	// object to handle	body scanning
-	char		*content_suffix;		// content filtering parameters
+	char		*content_suffix;		// for url body filtering based on ip addresses of hostnames in the body
 	char		*content_message;		// ""
+	char		*uribl_suffix;			// for uribl body filtering based on hostnames in the body
+	char		*uribl_message; 		// ""
 	string_set	*content_host_ignore;	// ""
 
 
--- a/src/scanner.cpp	Sun Mar 12 10:20:59 2006 -0800
+++ b/src/scanner.cpp	Sun Mar 12 12:38:43 2006 -0800
@@ -6,6 +6,9 @@
 
 */
 
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
 #include "includes.h"
 
 static char* scanner_version="$Id$";
--- a/xml/dnsbl.in	Sun Mar 12 10:20:59 2006 -0800
+++ b/xml/dnsbl.in	Sun Mar 12 12:38:43 2006 -0800
@@ -320,14 +320,26 @@
             </orderedlist>
             <para>
                 If content filtering is enabled for this body, the mail text is decoded
-                (uuencode, base64, mime, html entity, url encodings), scanned for HTTP
-                and HTTPS URLs, and the first &lt;configurable&gt; host names are
-                checked for their presence on the single &lt;configurable&gt; DNSBL.
-                The only known list that is suitable for this purpose is the SBL.  If
-                any of those host names are on that DNSBL (or have nameservers that are
-                on that list), and it is not on the &lt;configurable&gt; ignore list,
-                the mail is rejected.  We also scan for excessive bad html tags, and if
-                a &lt;configurable&gt; limit is exceeded, the mail is rejected.
+                (uuencode, base64, mime, html entity, url encodings), and scanned for HTTP
+                and HTTPS URLs or bare host names.  Hostnames must be either ip address
+                literals, or must end in a string defined by the TLD list.  The first
+                &lt;configurable&gt; host names are checked as follows.
+            </para>
+            <para>
+                The only known list that is suitable for the content filter DNSBL is the
+                SBL.  If the content filter DNSBL is defined, and any of those host
+                names resolve to ip addresses that are on that DNSBL (or have
+                nameservers that are on that list), and the host name is not on the
+                &lt;configurable&gt; ignore list, the mail is rejected.
+            </para>
+            <para>
+                If the content uribl DNSBL is defined, and any of those host names are
+                on that DNSBL, and the host name is not on the &lt;configurable&gt;
+                ignore list, the mail is rejected.
+            </para>
+            <para>
+                We also scan for excessive bad html tags, and if a &lt;configurable&gt;
+                limit is exceeded, the mail is rejected.
             </para>
         </refsect1>
 
@@ -513,11 +525,13 @@
 DNSBLLIST  = "dnsbl_list" {NAME}+
 
 CONTENT    = "content" ("on" | "off") "{" {CONTENT-ST}+ "}"
-CONTENT-ST = (FILTER | IGNORE | TLD | HTML-TAGS | HTML-LIMIT |
-                                                  HOST-LIMIT) ";"
+CONTENT-ST = (FILTER | URIBL | IGNORE | TLD | CCTLD | HTML-TAGS |
+              HTML-LIMIT | HOST-LIMIT) ";"
 FILTER     = "filter" DNSPREFIX ERROR-MSG
+URIBL      = "uribl"  DNSPREFIX ERROR-MSG
 IGNORE     = "ignore"     "{" {HOSTNAME [";"]}+ "}"
 TLD        = "tld"        "{" {TLD      [";"]}+ "}"
+CCTLD      = "cctld"      "{" {TLD      [";"]}+ "}"
 HTML-TAGS  = "html_tags"  "{" {HTMLTAG  [";"]}+ "}"
 ERROR-MSG  = string containing exactly two %s replacement tokens
              for the client ip address
@@ -553,8 +567,11 @@
 
     content on {
         filter    sbl-xbl.spamhaus.org        "Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s";
+        uribl     multi-surbl.org             "Mail containing %s rejected - surbl; see http://www.rulesemporium.com/cgi-bin/uribl.cgi?bl0=1&domain0=%s";
+        #uribl    black.uribl.com             "Mail containing %s rejected - uribl; see http://l.uribl.com/?d=%s";
         ignore    { include "hosts-ignore.conf"; };
         tld       { include "tld.conf"; };
+        cctld     { include "cctld.conf"; };
         html_tags { include "html-tags.conf"; };
         html_limit on 20 "Mail containing excessive bad html tags rejected";
         html_limit off;