# HG changeset patch # User carl # Date 1142195923 28800 # Node ID d9d2f8699621a9a093f075b1dea363ad2ead2f27 # Parent 13fcb0c66763524d37981992baf5c90eb99308ba uribl patch from Jeff Evans diff -r 13fcb0c66763 -r d9d2f8699621 dnsbl.conf --- a/dnsbl.conf Sun Mar 12 10:20:59 2006 -0800 +++ b/dnsbl.conf Sun Mar 12 12:38:43 2006 -0800 @@ -7,10 +7,12 @@ content on { filter sbl-xbl.spamhaus.org "Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s"; + uribl multi-surbl.org "Mail containing %s rejected - surbl; see http://www.rulesemporium.com/cgi-bin/uribl.cgi?bl0=1&domain0=%s"; + #uribl black.uribl.com "Mail containing %s rejected - uribl; see http://l.uribl.com/?d=%s"; ignore { include "hosts-ignore.conf"; }; tld { include "tld.conf"; }; cctld { include "cctld.conf"; }; -# html_tags { include "html-tags.conf"; }; + html_tags { include "html-tags.conf"; }; html_limit off; host_limit soft 20; }; diff -r 13fcb0c66763 -r d9d2f8699621 src/context.cpp --- a/src/context.cpp Sun Mar 12 10:20:59 2006 -0800 +++ b/src/context.cpp Sun Mar 12 12:38:43 2006 -0800 @@ -52,6 +52,7 @@ char *token_tld; char *token_cctld; char *token_unknown; +char *token_uribl; char *token_verify; char *token_white; @@ -486,6 +487,8 @@ content_filtering = (parent) ? parent->content_filtering : false; content_suffix = NULL; content_message = NULL; + uribl_suffix = NULL; + uribl_message = NULL; host_limit = (parent) ? parent->host_limit : 0; host_limit_message = NULL; host_random = (parent) ? parent->host_random : false; @@ -615,12 +618,24 @@ } +char* CONTEXT::get_uribl_suffix() { + if (!uribl_suffix && parent) return parent->get_uribl_suffix(); + return uribl_suffix; +} + + char* CONTEXT::get_content_message() { if (!content_message && parent) return parent->get_content_message(); return content_message; } +char* CONTEXT::get_uribl_message() { + if (!uribl_message && parent) return parent->get_uribl_message(); + return uribl_message; +} + + string_set& CONTEXT::get_content_host_ignore() { if (content_host_ignore.empty() && parent) return parent->get_content_host_ignore(); return content_host_ignore; @@ -692,6 +707,9 @@ if (content_suffix) { printf("%s filter %s \"%s\"; \n", indent, content_suffix, content_message); } + if (uribl_suffix) { + printf("%s uribl %s \"%s\"; \n", indent, uribl_suffix, uribl_message); + } if (!content_host_ignore.empty()) { printf("%s ignore { \n", indent); for (string_set::iterator i=content_host_ignore.begin(); i!=content_host_ignore.end(); i++) { @@ -891,6 +909,13 @@ me.set_content_message(messag); if (!tsa(tok, token_semi)) return false; } + else if (have == token_uribl) { + char *suffix = tok.next(); + char *messag = tok.next(); + me.set_uribl_suffix(suffix); + me.set_uribl_message(messag); + if (!tsa(tok, token_semi)) return false; + } else if (have == token_ignore) { if (!tsa(tok, token_lbrace)) return false; while (true) { @@ -1282,6 +1307,7 @@ token_substitute = register_string("substitute"); token_tld = register_string("tld"); token_unknown = register_string("unknown"); + token_uribl = register_string("uribl"); token_verify = register_string("verify"); token_white = register_string("white"); diff -r 13fcb0c66763 -r d9d2f8699621 src/context.h --- a/src/context.h Sun Mar 12 10:20:59 2006 -0800 +++ b/src/context.h Sun Mar 12 12:38:43 2006 -0800 @@ -96,8 +96,10 @@ context_map env_from_context; // map senders to a child context char * env_from_default; // default value for senders that are not found in the map white/black/unknown/inherit bool content_filtering; // - char * content_suffix; // for sbl url body filtering + char * content_suffix; // for url body filtering based on ip addresses of hostnames in the body char * content_message; // "" + char * uribl_suffix; // for uribl body filtering based on hostnames in the body + char * uribl_message; // "" string_set content_host_ignore;// hosts to ignore for content sbl checking string_set content_tlds; // string_set content_cctlds; // @@ -135,6 +137,8 @@ void set_content_filtering(bool filter) {content_filtering = filter;}; void set_content_suffix(char *suffix) {content_suffix = suffix;}; void set_content_message(char *message) {content_message = message;}; + void set_uribl_suffix(char *suffix) {uribl_suffix = suffix;}; + void set_uribl_message(char *message) {uribl_message = message;}; void add_ignore(char *host) {content_host_ignore.insert(host);}; void add_tld(char *tld) {content_tlds.insert(tld);}; void add_cctld(char *cctld) {content_cctlds.insert(cctld);}; @@ -155,6 +159,8 @@ bool get_host_random() {return host_random;}; char* get_content_suffix(); char* get_content_message(); + char* get_uribl_suffix(); + char* get_uribl_message(); string_set& get_content_host_ignore(); string_set& get_content_tlds(); string_set& get_content_cctlds(); @@ -219,6 +225,7 @@ extern char *token_substitute; extern char *token_tld; extern char *token_unknown; +extern char *token_uribl; extern char *token_white; extern char *token_myhostname; diff -r 13fcb0c66763 -r d9d2f8699621 src/dnsbl.cpp --- a/src/dnsbl.cpp Sun Mar 12 10:20:59 2006 -0800 +++ b/src/dnsbl.cpp Sun Mar 12 12:38:43 2006 -0800 @@ -232,6 +232,8 @@ scanner = NULL; content_suffix = NULL; content_message = NULL; + uribl_suffix = NULL; + uribl_message = NULL; content_host_ignore = NULL; } @@ -259,6 +261,8 @@ scanner = NULL; content_suffix = NULL; content_message = NULL; + uribl_suffix = NULL; + uribl_message = NULL; content_host_ignore = NULL; } } @@ -369,6 +373,8 @@ scanner = new url_scanner(memory); content_suffix = con.get_content_suffix(); content_message = con.get_content_message(); + uribl_suffix = con.get_uribl_suffix(); + uribl_message = con.get_uribl_message(); content_host_ignore = &con.get_content_host_ignore(); } } @@ -669,14 +675,14 @@ if (x) hostname = x+1; *top = '.'; } - for (int i=0; i<2; i++) { - snprintf(buf, sizeof(buf), "%s.%s", hostname, uriblname[i]); + snprintf(buf, sizeof(buf), "%s.%s", hostname, priv.uribl_suffix); + if (dns_interface(priv, buf, false, NULL)) { if (debug_syslog > 2) { char tmp[maxlen]; - snprintf(tmp, sizeof(tmp), "Looking up %s on %s", hostname, uriblname[i]); + snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix); my_syslog(tmp); } - if (dns_interface(priv, buf, false, NULL)) return true; + return true; } return false; } @@ -727,14 +733,14 @@ //////////////////////////////////////////////// -// check the hosts from the body against the content dnsbl +// check the hosts from the body against the content filter and uribl dnsbls // -bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip); -bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip) { +bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&msg, char *&host, int &ip); +bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&msg, char *&host, int &ip) { + if (!priv.content_suffix && !priv.uribl_suffix) return false; // nothing to check CONFIG &dc = *priv.pc; string_set &hosts = priv.memory->get_hosts(); string_set &ignore = *priv.content_host_ignore; - char *suffix = priv.content_suffix; int count = 0; int cnt = hosts.size(); // number of hosts we could look at @@ -779,19 +785,22 @@ if (i == ips.end()) { // we haven't looked this up yet ips.insert(ip); - if (check_single(priv, ip, suffix)) return true; + if (check_single(priv, ip, priv.content_suffix)) { + msg = priv.content_message; + return true; + } // Check uribl & surbl - if (check_uribl(priv, host)) return true; + if (check_uribl(priv, host)) { + msg = priv.uribl_message; + return true; + } } } } limit *= 4; // allow average of 3 ns per host name for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) { count++; - if ((count > limit) && (limit > 0)) { - if (random) continue; // don't complain - return true; - } + if ((count > limit) && (limit > 0)) return false; // too many name servers to check them all host = (*i).first; // a transient reference that needs to be replaced before we return it ip = (*i).second; if (!ip) ip = dns_interface(priv, host, false, NULL); @@ -812,7 +821,8 @@ int_set::iterator i = ips.find(ip); if (i == ips.end()) { ips.insert(ip); - if (check_single(priv, ip, suffix)) { + if (check_single(priv, ip, priv.content_suffix)) { + msg = priv.content_message; string_map::iterator j = nameservers.ns_host.find(host); if (j != nameservers.ns_host.end()) { char *refer = (*j).second; @@ -985,11 +995,12 @@ } bool rejecting = alive.empty(); // if alive is empty, we must have set msg above in acceptable_content() if (!rejecting) { - if (check_hosts(priv, random, limit, host, ip)) { + char *msg; + if (check_hosts(priv, random, limit, msg, host, ip)) { char adr[sizeof "255.255.255.255"]; adr[0] = '\0'; inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); - snprintf(buf, sizeof(buf), priv.content_message, host, adr); + snprintf(buf, sizeof(buf), msg, host, adr); msg = buf; rejecting = true; } diff -r 13fcb0c66763 -r d9d2f8699621 src/dnsbl.h --- a/src/dnsbl.h Sun Mar 12 10:20:59 2006 -0800 +++ b/src/dnsbl.h Sun Mar 12 12:38:43 2006 -0800 @@ -28,8 +28,10 @@ context_map env_to; // map each non-whitelisted recipient to their filtering context recorder *memory; // memory for the content scanner url_scanner *scanner; // object to handle body scanning - char *content_suffix; // content filtering parameters + char *content_suffix; // for url body filtering based on ip addresses of hostnames in the body char *content_message; // "" + char *uribl_suffix; // for uribl body filtering based on hostnames in the body + char *uribl_message; // "" string_set *content_host_ignore; // "" diff -r 13fcb0c66763 -r d9d2f8699621 src/scanner.cpp --- a/src/scanner.cpp Sun Mar 12 10:20:59 2006 -0800 +++ b/src/scanner.cpp Sun Mar 12 12:38:43 2006 -0800 @@ -6,6 +6,9 @@ */ +#include +#include +#include #include "includes.h" static char* scanner_version="$Id$"; diff -r 13fcb0c66763 -r d9d2f8699621 xml/dnsbl.in --- a/xml/dnsbl.in Sun Mar 12 10:20:59 2006 -0800 +++ b/xml/dnsbl.in Sun Mar 12 12:38:43 2006 -0800 @@ -320,14 +320,26 @@ If content filtering is enabled for this body, the mail text is decoded - (uuencode, base64, mime, html entity, url encodings), scanned for HTTP - and HTTPS URLs, and the first <configurable> host names are - checked for their presence on the single <configurable> DNSBL. - The only known list that is suitable for this purpose is the SBL. If - any of those host names are on that DNSBL (or have nameservers that are - on that list), and it is not on the <configurable> ignore list, - the mail is rejected. We also scan for excessive bad html tags, and if - a <configurable> limit is exceeded, the mail is rejected. + (uuencode, base64, mime, html entity, url encodings), and scanned for HTTP + and HTTPS URLs or bare host names. Hostnames must be either ip address + literals, or must end in a string defined by the TLD list. The first + <configurable> host names are checked as follows. + + + The only known list that is suitable for the content filter DNSBL is the + SBL. If the content filter DNSBL is defined, and any of those host + names resolve to ip addresses that are on that DNSBL (or have + nameservers that are on that list), and the host name is not on the + <configurable> ignore list, the mail is rejected. + + + If the content uribl DNSBL is defined, and any of those host names are + on that DNSBL, and the host name is not on the <configurable> + ignore list, the mail is rejected. + + + We also scan for excessive bad html tags, and if a <configurable> + limit is exceeded, the mail is rejected. @@ -513,11 +525,13 @@ DNSBLLIST = "dnsbl_list" {NAME}+ CONTENT = "content" ("on" | "off") "{" {CONTENT-ST}+ "}" -CONTENT-ST = (FILTER | IGNORE | TLD | HTML-TAGS | HTML-LIMIT | - HOST-LIMIT) ";" +CONTENT-ST = (FILTER | URIBL | IGNORE | TLD | CCTLD | HTML-TAGS | + HTML-LIMIT | HOST-LIMIT) ";" FILTER = "filter" DNSPREFIX ERROR-MSG +URIBL = "uribl" DNSPREFIX ERROR-MSG IGNORE = "ignore" "{" {HOSTNAME [";"]}+ "}" TLD = "tld" "{" {TLD [";"]}+ "}" +CCTLD = "cctld" "{" {TLD [";"]}+ "}" HTML-TAGS = "html_tags" "{" {HTMLTAG [";"]}+ "}" ERROR-MSG = string containing exactly two %s replacement tokens for the client ip address @@ -553,8 +567,11 @@ content on { filter sbl-xbl.spamhaus.org "Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s"; + uribl multi-surbl.org "Mail containing %s rejected - surbl; see http://www.rulesemporium.com/cgi-bin/uribl.cgi?bl0=1&domain0=%s"; + #uribl black.uribl.com "Mail containing %s rejected - uribl; see http://l.uribl.com/?d=%s"; ignore { include "hosts-ignore.conf"; }; tld { include "tld.conf"; }; + cctld { include "cctld.conf"; }; html_tags { include "html-tags.conf"; }; html_limit on 20 "Mail containing excessive bad html tags rejected"; html_limit off;