changeset 57:419e00901570

changes to handle 5iantlavalamp.com
author carl
date Thu, 28 Oct 2004 22:48:52 -0700
parents 57607387263d
children 7bb8bbf79285
files ChangeLog dnsbl.conf dnsbl.spec.in hosts-ignore.conf install.bash package.bash src/dnsbl.cpp xml/dnsbl.in xml/sample.conf
diffstat 9 files changed, 57 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Wed Sep 08 14:46:45 2004 -0700
+++ b/ChangeLog	Thu Oct 28 22:48:52 2004 -0700
@@ -1,5 +1,14 @@
     $Id$
 
+3.7 2004-10-28
+    Added an 'ignore' command to the conf file, used to ignore some
+    hosts that might end up on the SBL and otherwise trip the content
+    scanning filter.  In particular, many recent Microsoft Word
+    documents contain the string www.5iantlavalamp.com which is
+    associated with their smart tags stuff.  That is currently hosted at
+    216.168.224.70, which is shared with a site that ended up on the
+    SBL.
+
 3.6 2004-09-08
     Contributions from Dan Harkless <software@harkless.org>
         Better documentation for disabling the content filtering.
--- a/dnsbl.conf	Wed Sep 08 14:46:45 2004 -0700
+++ b/dnsbl.conf	Thu Oct 28 22:48:52 2004 -0700
@@ -5,6 +5,7 @@
 #host_limit     20                          'Mail containing too many host names rejected'
 host_soft_limit 20
 html_limit      20                          'Mail containing excessive bad html tags rejected'
+include hosts-ignore.conf
 include html-tags.conf
 include tld.conf
 
--- a/dnsbl.spec.in	Wed Sep 08 14:46:45 2004 -0700
+++ b/dnsbl.spec.in	Thu Oct 28 22:48:52 2004 -0700
@@ -1,6 +1,6 @@
 Summary: DNSBL Sendmail Milter
 Name: dnsbl
-Version: 3.5
+Version: 3.7
 Release: 2
 Copyright: GPL
 Group: System Environment/Daemons
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hosts-ignore.conf	Thu Oct 28 22:48:52 2004 -0700
@@ -0,0 +1,1 @@
+ignore  www.5iantlavalamp.com       # present in almost all microsoft word documents
--- a/install.bash	Wed Sep 08 14:46:45 2004 -0700
+++ b/install.bash	Thu Oct 28 22:48:52 2004 -0700
@@ -38,6 +38,9 @@
 if [ ! -f $DST/dnsbl.conf ]; then
     cp dnsbl.conf $DST
 fi
+if [ ! -f $DST/hosts-ignore.conf ]; then
+    cp hosts-ignore.conf $DST
+fi
 if [ ! -f $DST/html-tags.conf ]; then
     cp html-tags.conf $DST
 fi
--- a/package.bash	Wed Sep 08 14:46:45 2004 -0700
+++ b/package.bash	Thu Oct 28 22:48:52 2004 -0700
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-VER=dnsbl-3.6
+VER=dnsbl-3.7
 mkdir $VER
     target1=/home/httpd/html/510sg/util/dnsbl.tar.gz
     target2=/home/httpd/html/510sg/dnsbl.conf
@@ -9,6 +9,7 @@
     cp sample1.conf   $VER/dnsbl.conf
     cp sample.conf    $VER
     cp html-tags.conf $VER
+    cp hosts-ignore.conf $VER
     cp tld.conf       $VER
     cp *cpp           $VER
     cp *rc            $VER
--- a/src/dnsbl.cpp	Wed Sep 08 14:46:45 2004 -0700
+++ b/src/dnsbl.cpp	Thu Oct 28 22:48:52 2004 -0700
@@ -142,6 +142,7 @@
     string_map  env_to_chkfrom;     // map recipient to a named from map
     char *      content_suffix;     // for sbl url body filtering
     char *      content_message;    // ""
+    string_set  content_host_ignore;// hosts to ignore for content sbl checking
     char *      host_limit_message; // error message for excessive host names
     int         host_limit;         // limit on host names
     bool        host_random;        // pick a random selection of host names rather than error for excessive hosts
@@ -608,6 +609,10 @@
     int_set ips;    // remove duplicate ip addresses
     for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) {
         host = *i;  // a reference into priv.memory->hosts, which will live until this smtp transaction is closed
+        string_set::iterator j = priv.pc->content_host_ignore.find(host);
+        if (j == priv.pc->content_host_ignore.end()) {
+            continue;   // don't bother looking up this host
+        }
         if ((cnt > lim) && (lim > 0) && ran) {
             // try to only look at lim/cnt fraction of the available cnt host names
             int r = rand() % cnt;
@@ -930,6 +935,9 @@
     if (dc.content_suffix) {
         fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message);
     }
+    for (string_set::iterator i=dc.content_host_ignore.begin(); i!=dc.content_host_ignore.end(); i++) {
+        fprintf(stdout, "ignore %s\n", (*i));
+    }
     if (dc.host_limit && !dc.host_random) {
         fprintf(stdout, "\ncontent filtering for host names hard limit %d %s\n", dc.host_limit, dc.host_limit_message);
     }
@@ -1048,9 +1056,10 @@
     }
     dc.config_files.push_back(fn);
     map<char*, int, ltstr> commands;
-    enum {dummy, tld, content, hostlimit, hostslimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc};
+    enum {dummy, tld, content, ignore, hostlimit, hostslimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc};
     commands["tld"            ] = tld;
     commands["content"        ] = content;
+    commands["ignore"         ] = ignore;
     commands["host_limit"     ] = hostlimit;
     commands["host_soft_limit"] = hostslimit;
     commands["html_limit"     ] = htmllimit;
@@ -1101,6 +1110,13 @@
                     processed = true;
                     } break;
 
+                case ignore: {
+                    char *host = next_token(delim);
+                    if (!host) break;
+                    dc.content_host_ignore.insert(host);
+                    processed = true;
+                    } break;
+
                 case hostlimit: {
                     char *limit = strtok(NULL, delim);
                     if (!limit) break;                          // no integer limit
--- a/xml/dnsbl.in	Wed Sep 08 14:46:45 2004 -0700
+++ b/xml/dnsbl.in	Thu Oct 28 22:48:52 2004 -0700
@@ -2,7 +2,7 @@
 
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
-<title>DNSBL Sendmail milter - Version 3.6</title>
+<title>DNSBL Sendmail milter - Version 3.7</title>
 </head>
 
 <center>Introduction</center>
@@ -114,10 +114,10 @@
 <li>If the mail has not been accepted or rejected yet, the body content
 is optionally scanned for HTTP URLs (after base64, mime and html entity
 decoding), and the first &lt;configurable&gt; host names are checked for
-their presence on the SBL.  If any host name is on the SBL, the mail is
-rejected.  If we are doing body content scanning, we also scan for
-excessive bad html tags, and if a &lt;configurable&gt; limit is
-exceeded, the mail is rejected.
+their presence on the SBL.  If any host name is on the SBL, and it is
+not on the "ignore" list, the mail is rejected.  If we are doing body
+content scanning, we also scan for excessive bad html tags, and if a
+&lt;configurable&gt; limit is exceeded, the mail is rejected.
 
 </ol>
 
--- a/xml/sample.conf	Wed Sep 08 14:46:45 2004 -0700
+++ b/xml/sample.conf	Thu Oct 28 22:48:52 2004 -0700
@@ -19,6 +19,12 @@
 #   for host names or bad html tags.
 #
 #
+# ignore:
+#   second token is a host name that is allowed in the body even
+#   if it would otherwise be rejected by the content scanning
+#   above.
+#
+#
 # host_limit:
 #   second token is the integer count of the number of host names
 #       or urls that are allowed in any one mail body. Zero is
@@ -124,6 +130,7 @@
 host_limit      20                          'Mail containing too many host names rejected'
 host_soft_limit 20
 html_limit      20                          'Mail containing excessive bad html tags rejected'
+include hosts-ignore.conf
 include html-tags.conf
 include tld.conf