changeset 27:43a4f6b3e668 stable-2-3

add configurable host name limit and bad html tag limits.
author carl
date Sat, 22 May 2004 22:30:45 -0700
parents fdae7ab30cfc
children 33e1e3910506
files sendmail.st src/dnsbl.cpp src/scanner.cpp xml/dnsbl.in xml/sample.conf
diffstat 5 files changed, 103 insertions(+), 54 deletions(-) [+]
line wrap: on
line diff
Binary file sendmail.st has changed
--- a/src/dnsbl.cpp	Fri May 21 21:55:38 2004 -0700
+++ b/src/dnsbl.cpp	Sat May 22 22:30:45 2004 -0700
@@ -71,7 +71,9 @@
 enum status {oksofar,   // not rejected yet
              white,     // whitelisted by envelope from
              black,     // blacklisted by envelope from or to
-             reject};   // rejected by a dns list
+             reject,        // rejected by a dns list
+             reject_tag,    // too many bad html tags
+             reject_host};  // too many hosts/urls in body
 
 using namespace std;
 
@@ -125,8 +127,10 @@
     string_map  env_to_chkfrom;     // map recipient to a named from map
     char *      content_suffix;     // for sbl url body filtering
     char *      content_message;    // ""
-    char *      limit_message;      // error message for excessive bad html tags
-    int         bad_tag_limit;      // limit on bad html tags
+    char *      host_limit_message; // error message for excessive host names
+    int         host_limit;         // limit on host names
+    char *      tag_limit_message;  // error message for excessive bad html tags
+    int         tag_limit;          // limit on bad html tags
     string_set  html_tags;          // set of valid html tags
     CONFIG();
     ~CONFIG();
@@ -136,8 +140,10 @@
     load_time       = 0;
     content_suffix  = NULL;
     content_message = NULL;
-    limit_message   = NULL;
-    bad_tag_limit   = 0;
+    host_limit_message = NULL;
+    host_limit         = 0;
+    tag_limit_message  = NULL;
+    tag_limit          = 0;
 }
 CONFIG::~CONFIG() {
     for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) {
@@ -483,7 +489,8 @@
     int count = 0;
     for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) {
         count++;
-        if (count > 20) return oksofar; // silly to check too many hosts
+        int lim = priv.pc->host_limit;
+        if ((count > lim) && (lim > 0)) return reject_host;
         host = *i;
         if (debug_syslog) {
             char buf[200];
@@ -492,14 +499,14 @@
         }
         ip  = protected_dns_interface(host, true);
         if (ip) {
-            if (debug_syslog) {
-                char adr[sizeof "255.255.255.255"];
-                adr[0] = '\0';
-                inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
-                char buf[200];
-                snprintf(buf, sizeof(buf), "found host %s at %s", host, adr);
-                my_syslog(buf);
-            }
+        //  if (debug_syslog) {
+        //      char adr[sizeof "255.255.255.255"];
+        //      adr[0] = '\0';
+        //      inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
+        //      char buf[200];
+        //      snprintf(buf, sizeof(buf), "found host %s at %s", host, adr);
+        //      my_syslog(buf);
+        //  }
             status st = check_single(ip, dc.content_suffix);
             if (st == reject) return st;
         }
@@ -507,9 +514,9 @@
     host = NULL;
     int bin = priv.memory->binary_tags;
     int bad = priv.memory->bad_html_tags;
-    int lim = priv.pc->bad_tag_limit;
+    int lim = priv.pc->tag_limit;
     if (bin > bad) return oksofar;  // probably .zip or .tar.gz with random content
-    if ((bad > lim) && (lim > 0)) return reject;
+    if ((bad > lim) && (lim > 0)) return reject_tag;
     return oksofar;
 }
 
@@ -618,17 +625,22 @@
     mlfiPriv &priv = *MLFIPRIV;
     char *host = NULL;
     int  ip;
+    status    st;
     // process end of message
     if (priv.authenticated ||
         priv.only_whites   ||
-        (check_hosts(priv, host, ip) == oksofar)) rc = SMFIS_CONTINUE;
+        ((st=check_hosts(priv, host, ip)) == oksofar)) rc = SMFIS_CONTINUE;
     else {
         if (!priv.have_whites) {
             // can reject the entire message
             char buf[2000];
-            if (!host) {
-                // must be rejected due to excessive bad html tags
-                snprintf(buf, sizeof(buf), priv.pc->limit_message);
+            if (st == reject_tag) {
+                // rejected due to excessive bad html tags
+                snprintf(buf, sizeof(buf), priv.pc->tag_limit_message);
+            }
+            else if (st == reject_host) {
+                // rejected due to excessive unique host/urls
+                snprintf(buf, sizeof(buf), priv.pc->host_limit_message);
             }
             else {
                 char adr[sizeof "255.255.255.255"];
@@ -730,8 +742,11 @@
     if (dc.content_suffix) {
         fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message);
     }
-    if (dc.bad_tag_limit) {
-        fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.bad_tag_limit, dc.limit_message);
+    if (dc.host_limit) {
+        fprintf(stdout, "\ncontent filtering for host names enabled with limit %d %s\n", dc.host_limit, dc.host_limit_message);
+    }
+    if (dc.tag_limit) {
+        fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.tag_limit, dc.tag_limit_message);
     }
     fprintf(stdout, "\nfiles\n");
     for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
@@ -830,8 +845,9 @@
 static void load_conf(CONFIG &dc, char *fn) {
     dc.config_files.push_back(fn);
     map<char*, int, ltstr> commands;
-    enum {dummy, content, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc};
+    enum {dummy, content, hostlimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc};
     commands["content"    ] = content;
+    commands["host_limit" ] = hostlimit;
     commands["html_limit" ] = htmllimit;
     commands["html_tag"   ] = htmltag;
     commands["dnsbl"      ] = dnsbl;
@@ -875,6 +891,23 @@
                     processed = true;
                     } break;
 
+                case hostlimit: {
+                    char *limit = strtok(NULL, delim);
+                    if (!limit) break;                          // no integer limit
+                    char *msg = limit + strlen(limit);
+                    if ((msg - line) >= strlen(orig)) break;    // line ended with the limit
+                    msg  = strchr(msg+1, '\'');
+                    if (!msg) break;                            // no reply message template
+                    msg++; // move over the leading '
+                    if ((msg - line) >= strlen(orig)) break;    // line ended with the leading quote
+                    char *last = strchr(msg, '\'');
+                    if (!last) break;                           // no trailing quote
+                    *last = '\0';                               // make it a null terminator
+                    dc.host_limit         = atoi(limit);
+                    dc.host_limit_message = register_string(msg);
+                    processed = true;
+                    } break;
+
                 case htmllimit: {
                     char *limit = strtok(NULL, delim);
                     if (!limit) break;                          // no integer limit
@@ -887,18 +920,20 @@
                     char *last = strchr(msg, '\'');
                     if (!last) break;                           // no trailing quote
                     *last = '\0';                               // make it a null terminator
-                    dc.bad_tag_limit = atoi(limit);
-                    dc.limit_message = register_string(msg);
+                    dc.tag_limit         = atoi(limit);
+                    dc.tag_limit_message = register_string(msg);
                     processed = true;
                     } break;
 
                 case htmltag: {
                     char *tag = next_token(delim);
                     if (!tag) break;                            // no html tag value
+                    dc.html_tags.insert(tag);                   // base version
                     char buf[200];
                     snprintf(buf, sizeof(buf), "/%s", tag);
-                    dc.html_tags.insert(tag);
-                    dc.html_tags.insert(register_string(buf));
+                    dc.html_tags.insert(register_string(buf));  // leading /
+                    snprintf(buf, sizeof(buf), "%s/", tag);
+                    dc.html_tags.insert(register_string(buf));  // trailing /
                     processed = true;
                     } break;
 
--- a/src/scanner.cpp	Fri May 21 21:55:38 2004 -0700
+++ b/src/scanner.cpp	Sat May 22 22:30:45 2004 -0700
@@ -179,7 +179,7 @@
     {h_init, h_end,  t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_eq,   m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x3D =
     {h_init, h_end,  t_init, t_end,  t_end,  u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x3E >
     {h_init, h_end,  t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x3F ?
-    {h_init, h_end,  t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x40 @
+    {h_init, h_host, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x40 @
     {h_host, h_host, t_init, t_tag,  t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x41 A
     {h_host, h_host, t_init, t_tag,  t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x42 B
     {h_host, h_host, t_init, t_tag,  t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x43 C
@@ -379,6 +379,7 @@
     ".net",
     ".org",
     ".biz",
+    ".info",
     NULL
 };
 
@@ -950,6 +951,8 @@
             //  host name recognizer
             case h_end: {
                 pending[--count] = '\0';  // null terminate host name by overwriting the terminator
+                if (!strchr((const char *)pending, '@')) {
+                    // not an email address or message id
                 char *tld;
                 for (int i=0; (tld = tlds[i]); i++) {
                     int n = strlen(tld);
@@ -960,6 +963,7 @@
                         }
                     }
                 }
+                }
                 st = h_init;
                 } // fall thru
 
--- a/xml/dnsbl.in	Fri May 21 21:55:38 2004 -0700
+++ b/xml/dnsbl.in	Sat May 22 22:30:45 2004 -0700
@@ -19,12 +19,13 @@
 per-recipient basis, so that fred@example.com could use SPEWS and the
 SBL, where all other users @example.com use only the SBL.
 
-<p>This milter will also decode (base64, mime, html entity) and scan for
-HTTP and HTTPS URLs and bare hostnames in the body of the mail.  If any
-of those host names have A records on the SBL (or a single configurable
-list), the mail will be rejected unless previously whitelisted. This
-milter also counts the number of invalid HTML tags, and can reject mail
-if that count exceeds your specified limit.
+<p>This milter will also decode (base64, mime, html entity, url
+encodings) and scan for HTTP and HTTPS URLs and bare hostnames in the
+body of the mail.  If any of those host names have A records on the SBL
+(or a single configurable DNSBL), the mail will be rejected unless
+previously whitelisted.  This milter also counts the number of invalid
+HTML tags, and can reject mail if that count exceeds your specified
+limit.
 
 <p>The DNSBL milter reads a text configuration file (dnsbl.conf) on
 startup, and whenever the config file (or any of the referenced include
--- a/xml/sample.conf	Fri May 21 21:55:38 2004 -0700
+++ b/xml/sample.conf	Sat May 22 22:30:45 2004 -0700
@@ -11,9 +11,17 @@
 #       up to two %s parameters for the offending host name and
 #       client ip address respectively.
 #
+# host_limit:
+#   second token is the integer count of the number of host names
+#       or urls that are allowed in any one mail body. Zero is
+#       unlimited.
+#   third  token? is a string enclosed in single quotes, so it
+#       is not really a token. This is the error message supplied
+#       to the smtp client.
+#
 # html_limit:
-#   second token is the integer count of the number of bad html
-#       tags that are allowed in any one mail body.
+#   second token is the integer count of the number of bad html tags
+#       that are allowed in any one mail body. Zero is unlimited.
 #   third  token? is a string enclosed in single quotes, so it
 #       is not really a token. This is the error message supplied
 #       to the smtp client.
@@ -91,7 +99,8 @@
 # content scanning parameters
 #
 content         sbl-xbl.spamhaus.org        'Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s'
-html_limit      20
+host_limit      20                          'Mail containing too many host names rejected'
+html_limit      20                          'Mail containing excessive bad html tags rejected'
 include html-tags.conf