diff src/dnsbl.cpp @ 117:aa07452e641b

uribl patch from Jeff Evans <jeffe@tricab.com>
author carl
date Sun, 12 Mar 2006 10:15:39 -0800
parents 07e5d4721213
children d9d2f8699621
line wrap: on
line diff
--- a/src/dnsbl.cpp	Sun Jan 08 10:27:24 2006 -0800
+++ b/src/dnsbl.cpp	Sun Mar 12 10:15:39 2006 -0800
@@ -365,7 +365,7 @@
 	if (!memory) {
 		// first recipient that needs content filtering sets all
 		// the content filtering parameters
-		memory		  = new recorder(this, con.get_html_tags(), con.get_content_tlds());
+		memory		  = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_cctlds());
 		scanner 	  = new url_scanner(memory);
 		content_suffix		= con.get_content_suffix();
 		content_message 	= con.get_content_message();
@@ -650,6 +650,83 @@
 
 
 ////////////////////////////////////////////////
+//	lookup the domain name part of a hostname on two lists
+//
+bool uriblookup(mlfiPriv &priv ,char *hostname, char *top) ;
+bool uriblookup(mlfiPriv &priv, char *hostname, char *top) {
+	// top is pointer to '.' char at end of base domain, or null for ip address form
+	// so for hostname of www.fred.mydomain.co.uk
+	// top points to-----------------------^
+	// and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff
+	char buf[maxlen];
+	char buf2[maxlen];
+	const char *uriblname[2] = { "multi.surbl.org", "multi.uribl.com" };
+
+	if (top) {
+		// add one more component
+		*top = '\0';
+		char *x = strrchr(hostname, '.');
+		if (x) hostname = x+1;
+		*top = '.';
+	}
+	for (int i=0; i<2; i++) {
+		snprintf(buf, sizeof(buf), "%s.%s", hostname, uriblname[i]);
+		if (debug_syslog > 2) {
+			char tmp[maxlen];
+			snprintf(tmp, sizeof(tmp), "Looking up %s on %s", hostname, uriblname[i]);
+			my_syslog(tmp);
+		}
+		if (dns_interface(priv, buf, false, NULL)) return true;
+	}
+	return false;
+}
+
+
+////////////////////////////////////////////////
+// uribl checker
+// -------------
+// hostname MUST not have a trailing dot
+// If tld, two level lookup.
+// Else, look up three level domain.
+bool check_uribl(mlfiPriv &priv, char *hostname) ;
+bool check_uribl(mlfiPriv &priv, char *hostname) {
+	in_addr ip;
+	if (inet_aton(hostname, &ip)) {
+		char adr[sizeof "255.255.255.255"];
+		adr[0] = '\0';
+		inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
+		return (uriblookup(priv, adr, NULL));
+	}
+
+	char *top, *top2, *top3;
+	top = strrchr(hostname, '.');
+	if (top) {
+		*top = '\0';
+		top2 = strrchr(hostname, '.');
+		*top = '.';
+
+		if (top2) {
+			string_set::iterator i = priv.memory->get_cctlds()->find(top2+1);
+			string_set::iterator x = priv.memory->get_cctlds()->end();
+			// if we have a 2-level-cctld, just look at top three levels of the name
+			if (i != x) return uriblookup(priv, hostname, top2);
+
+			*top2 = '\0';
+			top3 = strrchr(hostname, '.');
+			*top2 = '.';
+
+			// if we have more than 3 levels in the name, look at the top three levels of the name
+			if (top3 && uriblookup(priv, hostname, top2)) return true;
+			// if that was not found, fall thru to looking at the top two levels
+		}
+		// look at the top two levels of the name
+		return uriblookup(priv, hostname, top);
+	}
+	return false;
+}
+
+
+////////////////////////////////////////////////
 //	check the hosts from the body against the content dnsbl
 //
 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip);
@@ -700,10 +777,11 @@
 		if (ip) {
 			int_set::iterator i = ips.find(ip);
 			if (i == ips.end()) {
+				// we haven't looked this up yet
 				ips.insert(ip);
-				if (check_single(priv, ip, suffix)) {
-					return true;
-				}
+				if (check_single(priv, ip, suffix)) return true;
+				// Check uribl & surbl
+				if (check_uribl(priv, host)) return true;
 			}
 		}
 	}