# HG changeset patch # User carl # Date 1099028932 25200 # Node ID 419e009015702dacfe9a6176b9f60569031ee942 # Parent 57607387263d0978b83aa6cd35f5a81125958fbd changes to handle 5iantlavalamp.com diff -r 57607387263d -r 419e00901570 ChangeLog --- a/ChangeLog Wed Sep 08 14:46:45 2004 -0700 +++ b/ChangeLog Thu Oct 28 22:48:52 2004 -0700 @@ -1,5 +1,14 @@ $Id$ +3.7 2004-10-28 + Added an 'ignore' command to the conf file, used to ignore some + hosts that might end up on the SBL and otherwise trip the content + scanning filter. In particular, many recent Microsoft Word + documents contain the string www.5iantlavalamp.com which is + associated with their smart tags stuff. That is currently hosted at + 216.168.224.70, which is shared with a site that ended up on the + SBL. + 3.6 2004-09-08 Contributions from Dan Harkless Better documentation for disabling the content filtering. diff -r 57607387263d -r 419e00901570 dnsbl.conf --- a/dnsbl.conf Wed Sep 08 14:46:45 2004 -0700 +++ b/dnsbl.conf Thu Oct 28 22:48:52 2004 -0700 @@ -5,6 +5,7 @@ #host_limit 20 'Mail containing too many host names rejected' host_soft_limit 20 html_limit 20 'Mail containing excessive bad html tags rejected' +include hosts-ignore.conf include html-tags.conf include tld.conf diff -r 57607387263d -r 419e00901570 dnsbl.spec.in --- a/dnsbl.spec.in Wed Sep 08 14:46:45 2004 -0700 +++ b/dnsbl.spec.in Thu Oct 28 22:48:52 2004 -0700 @@ -1,6 +1,6 @@ Summary: DNSBL Sendmail Milter Name: dnsbl -Version: 3.5 +Version: 3.7 Release: 2 Copyright: GPL Group: System Environment/Daemons diff -r 57607387263d -r 419e00901570 hosts-ignore.conf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hosts-ignore.conf Thu Oct 28 22:48:52 2004 -0700 @@ -0,0 +1,1 @@ +ignore www.5iantlavalamp.com # present in almost all microsoft word documents diff -r 57607387263d -r 419e00901570 install.bash --- a/install.bash Wed Sep 08 14:46:45 2004 -0700 +++ b/install.bash Thu Oct 28 22:48:52 2004 -0700 @@ -38,6 +38,9 @@ if [ ! -f $DST/dnsbl.conf ]; then cp dnsbl.conf $DST fi +if [ ! -f $DST/hosts-ignore.conf ]; then + cp hosts-ignore.conf $DST +fi if [ ! -f $DST/html-tags.conf ]; then cp html-tags.conf $DST fi diff -r 57607387263d -r 419e00901570 package.bash --- a/package.bash Wed Sep 08 14:46:45 2004 -0700 +++ b/package.bash Thu Oct 28 22:48:52 2004 -0700 @@ -1,6 +1,6 @@ #!/bin/bash -VER=dnsbl-3.6 +VER=dnsbl-3.7 mkdir $VER target1=/home/httpd/html/510sg/util/dnsbl.tar.gz target2=/home/httpd/html/510sg/dnsbl.conf @@ -9,6 +9,7 @@ cp sample1.conf $VER/dnsbl.conf cp sample.conf $VER cp html-tags.conf $VER + cp hosts-ignore.conf $VER cp tld.conf $VER cp *cpp $VER cp *rc $VER diff -r 57607387263d -r 419e00901570 src/dnsbl.cpp --- a/src/dnsbl.cpp Wed Sep 08 14:46:45 2004 -0700 +++ b/src/dnsbl.cpp Thu Oct 28 22:48:52 2004 -0700 @@ -142,6 +142,7 @@ string_map env_to_chkfrom; // map recipient to a named from map char * content_suffix; // for sbl url body filtering char * content_message; // "" + string_set content_host_ignore;// hosts to ignore for content sbl checking char * host_limit_message; // error message for excessive host names int host_limit; // limit on host names bool host_random; // pick a random selection of host names rather than error for excessive hosts @@ -608,6 +609,10 @@ int_set ips; // remove duplicate ip addresses for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) { host = *i; // a reference into priv.memory->hosts, which will live until this smtp transaction is closed + string_set::iterator j = priv.pc->content_host_ignore.find(host); + if (j == priv.pc->content_host_ignore.end()) { + continue; // don't bother looking up this host + } if ((cnt > lim) && (lim > 0) && ran) { // try to only look at lim/cnt fraction of the available cnt host names int r = rand() % cnt; @@ -930,6 +935,9 @@ if (dc.content_suffix) { fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message); } + for (string_set::iterator i=dc.content_host_ignore.begin(); i!=dc.content_host_ignore.end(); i++) { + fprintf(stdout, "ignore %s\n", (*i)); + } if (dc.host_limit && !dc.host_random) { fprintf(stdout, "\ncontent filtering for host names hard limit %d %s\n", dc.host_limit, dc.host_limit_message); } @@ -1048,9 +1056,10 @@ } dc.config_files.push_back(fn); map commands; - enum {dummy, tld, content, hostlimit, hostslimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc}; + enum {dummy, tld, content, ignore, hostlimit, hostslimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc}; commands["tld" ] = tld; commands["content" ] = content; + commands["ignore" ] = ignore; commands["host_limit" ] = hostlimit; commands["host_soft_limit"] = hostslimit; commands["html_limit" ] = htmllimit; @@ -1101,6 +1110,13 @@ processed = true; } break; + case ignore: { + char *host = next_token(delim); + if (!host) break; + dc.content_host_ignore.insert(host); + processed = true; + } break; + case hostlimit: { char *limit = strtok(NULL, delim); if (!limit) break; // no integer limit diff -r 57607387263d -r 419e00901570 xml/dnsbl.in --- a/xml/dnsbl.in Wed Sep 08 14:46:45 2004 -0700 +++ b/xml/dnsbl.in Thu Oct 28 22:48:52 2004 -0700 @@ -2,7 +2,7 @@ -DNSBL Sendmail milter - Version 3.6 +DNSBL Sendmail milter - Version 3.7
Introduction
@@ -114,10 +114,10 @@
  • If the mail has not been accepted or rejected yet, the body content is optionally scanned for HTTP URLs (after base64, mime and html entity decoding), and the first <configurable> host names are checked for -their presence on the SBL. If any host name is on the SBL, the mail is -rejected. If we are doing body content scanning, we also scan for -excessive bad html tags, and if a <configurable> limit is -exceeded, the mail is rejected. +their presence on the SBL. If any host name is on the SBL, and it is +not on the "ignore" list, the mail is rejected. If we are doing body +content scanning, we also scan for excessive bad html tags, and if a +<configurable> limit is exceeded, the mail is rejected. diff -r 57607387263d -r 419e00901570 xml/sample.conf --- a/xml/sample.conf Wed Sep 08 14:46:45 2004 -0700 +++ b/xml/sample.conf Thu Oct 28 22:48:52 2004 -0700 @@ -19,6 +19,12 @@ # for host names or bad html tags. # # +# ignore: +# second token is a host name that is allowed in the body even +# if it would otherwise be rejected by the content scanning +# above. +# +# # host_limit: # second token is the integer count of the number of host names # or urls that are allowed in any one mail body. Zero is @@ -124,6 +130,7 @@ host_limit 20 'Mail containing too many host names rejected' host_soft_limit 20 html_limit 20 'Mail containing excessive bad html tags rejected' +include hosts-ignore.conf include html-tags.conf include tld.conf