changeset 76:81f1e400e8ab

start coding on new config syntax
author carl
date Sat, 16 Jul 2005 13:47:19 -0700
parents 1142e46be550
children 8487650c98ee
files clients.conf install.bash sendmail.st src/context.cpp src/context.h src/dnsbl.cpp src/dnsbl.h src/scanner.cpp src/scanner.h test.bash xml/dnsbl.in xml/sample.conf
diffstat 12 files changed, 311 insertions(+), 171 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clients.conf	Sat Jul 16 13:47:19 2005 -0700
@@ -0,0 +1,6 @@
+include "510sg.conf";
+include "ams.conf";
+include "davd.conf";
+include "mbmg.conf";
+include "neuro.conf";
+include "pmg.conf";
--- a/install.bash	Wed Jul 13 23:04:14 2005 -0700
+++ b/install.bash	Sat Jul 16 13:47:19 2005 -0700
@@ -11,12 +11,12 @@
 #####################
 # build the milter
 # add compiler flags - suggested by Nigel Horne
-g++ -c $CXXFLAGS -pthread dnsbl.cpp
+g++ -c $CXXFLAGS -pthread dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp
 if [ $? -ne 0 ]; then
     echo "compiler errors"
     exit
 fi
-g++ -o dnsbl dnsbl.o /usr/lib/libresolv.a -lmilter -pthread
+g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o /usr/lib/libresolv.a -lmilter -pthread
 if [ $? -ne 0 ]; then
     echo "linker errors"
     exit
@@ -35,8 +35,26 @@
     rm /var/dnsbl/dnsbl # remove the old binary
     rmdir /var/dnsbl
 fi
-if [ ! -f $DST/dnsbl.conf ]; then
+CONF=$DST/dnsbl.conf
+if [ -f $CONF [; then
+    grep $CONF '^context' >/dev/null
+    if [ $? -eq 1 ]; then
+        # config file exists, but it is for the older version
+        # preserve it and start over
+        suf=4.old
+        for i in dnsbl hosts-ignore html-tags tld; do
+            j=$DST/$i.conf
+            if [ -f $j ]; then
+                mv -f $j $j.$suf
+            fi
+        done
+    fi
+fi
+if [ ! -f $CONF ]; then
     cp dnsbl.conf $DST
+    cp hosts-ignore.conf $DST
+    cp html-tags.conf    $DST
+    cp tld.conf          $DST
 fi
 if [ ! -f $DST/hosts-ignore.conf ]; then
     cp hosts-ignore.conf $DST
Binary file sendmail.st has changed
--- a/src/context.cpp	Wed Jul 13 23:04:14 2005 -0700
+++ b/src/context.cpp	Sat Jul 16 13:47:19 2005 -0700
@@ -61,6 +61,7 @@
 
 
 CONFIG::~CONFIG() {
+    if (debug_syslog) my_syslog("config::~config destructor");
     for (context_list::iterator i=contexts.begin(); i!=contexts.end(); i++) {
         CONTEXT *c = *i;
         delete c;
@@ -81,6 +82,20 @@
     context_map::iterator i = env_to.find(to);
     if (i != env_to.end()) {
         CONTEXTP c = (*i).second;
+        int       s = strlen(to);
+        bool     at = s && (to[s-1] == '@');
+        if (at && con->is_parent(c->get_parent())) {
+            if (debug_syslog) {
+                char oldname[maxlen];
+                char newname[maxlen];
+                char *oldn = c->get_full_name(oldname, maxlen);
+                char *newn = con->get_full_name(newname, maxlen);
+                char buf[maxlen*3];
+                snprintf(buf, maxlen*3, "both %s and %s claim envelope to %s, the first one wins", oldn, newn, to);
+                my_syslog(buf);
+            }
+            return;     // don't take over user@ entries from your ancestors children
+        }
         if ((c != con) && (c != con->get_parent())) {
             char oldname[maxlen];
             char newname[maxlen];
@@ -145,6 +160,12 @@
 
 
 CONTEXT::~CONTEXT() {
+    if (debug_syslog) {
+        char buf[maxlen];
+        char msg[maxlen];
+        snprintf(msg, maxlen, "context::~context %s destructor", get_full_name(buf,maxlen));
+        my_syslog(msg);
+    }
     for (dnsblp_map::iterator i=dnsbl_names.begin(); i!=dnsbl_names.end(); i++) {
         DNSBLP d = (*i).second;
         // delete the underlying DNSBL objects.
@@ -153,6 +174,13 @@
 }
 
 
+bool CONTEXT::is_parent(CONTEXTP p) {
+    if (p == parent) return true;
+    if (!parent) return false;
+    return parent->is_parent(p);
+}
+
+
 char *CONTEXT::get_full_name(char *buffer, int size) {
     if (!parent) return name;
     char buf[maxlen];
@@ -174,23 +202,27 @@
 
 
 char *CONTEXT::find_from(char *from) {
+    char *rc = token_inherit;
     string_map::iterator i = env_from.find(from);
-    if (i != env_from.end()) return (*i).second;        // found user@domain.tld key
+    if (i != env_from.end()) rc = (*i).second;  // found user@domain.tld key
+    else {
     char *x = strchr(from, '@');
     if (x) {
         x++;
         i = env_from.find(x);
-        if (i != env_from.end()) return (*i).second;    // found domain.tld key
+            if (i != env_from.end()) rc = (*i).second;  // found domain.tld key
+            else {
         char y = *x;
         *x = '\0';
         i = env_from.find(from);
         *x = y;
-        if (i != env_from.end()) return (*i).second;      // found user@ key
+                if (i != env_from.end()) rc = (*i).second;  // found user@ key
+            }
+        }
     }
-    if ((env_from_default == token_inherit) && parent) {
-        return parent->find_from(from);
-    }
-    return (env_from_default == token_inherit) ? token_unknown : env_from_default;
+    if (rc == token_inherit) rc = env_from_default;
+    if ((rc == token_inherit) && parent) return parent->find_from(from);
+    return (rc == token_inherit) ? token_unknown : rc;
 }
 
 
@@ -227,6 +259,42 @@
 }
 
 
+char* CONTEXT::get_content_suffix() {
+    if (!content_suffix && parent) return parent->get_content_suffix();
+    return content_suffix;
+}
+
+
+char* CONTEXT::get_content_message() {
+    if (!content_message && parent) return parent->get_content_message();
+    return content_message;
+}
+
+
+string_set& CONTEXT::get_content_host_ignore() {
+    if (content_host_ignore.empty() && parent) return parent->get_content_host_ignore();
+    return content_host_ignore;
+}
+
+
+string_set& CONTEXT::get_content_tlds() {
+    if (content_tlds.empty() && parent) return parent->get_content_tlds();
+    return content_tlds;
+}
+
+
+string_set& CONTEXT::get_html_tags() {
+    if (html_tags.empty() && parent) return parent->get_html_tags();
+    return html_tags;
+}
+
+
+dnsblp_list& CONTEXT::get_dnsbl_list() {
+    if (dnsbl_list.empty() && parent) return parent->get_dnsbl_list();
+    return dnsbl_list;
+}
+
+
 bool CONTEXT::acceptable_content(recorder &memory, char *&msg) {
     if (memory.excessive_bad_tags(tag_limit)) {
         msg = tag_limit_message;
@@ -427,7 +495,6 @@
 //
 bool parse_content(TOKEN &tok, CONFIG &dc, CONTEXT &me);
 bool parse_content(TOKEN &tok, CONFIG &dc, CONTEXT &me) {
-    bool topdefault = (!me.get_parent()) && (!dc.default_context);
     char *setting = tok.next();
     if (setting == token_on) {
         me.set_content_filtering(true);
@@ -446,25 +513,18 @@
         if (have == token_filter) {
             char *suffix = tok.next();
             char *messag = tok.next();
-            if (topdefault) {
                 me.set_content_suffix(suffix);
                 me.set_content_message(messag);
-            }
             if (!tsa(tok, token_semi)) return false;
-            if (!topdefault) tok.token_error("content filters may only be speciried in the top default context");
         }
         else if (have == token_ignore) {
             if (!tsa(tok, token_lbrace)) return false;
             while (true) {
                 if (!have) break;
                 char *have = tok.next();
-                if (have == token_rbrace) {
-                    break;  // done
-                }
-                else {
+                if (have == token_rbrace) break;  // done
                     me.add_ignore(have);
                 }
-            }
             if (!tsa(tok, token_semi)) return false;
         }
         else if (have == token_tld) {
@@ -472,15 +532,10 @@
             while (true) {
                 char *have = tok.next();
                 if (!have) break;
-                if (have == token_rbrace) {
-                    break;  // done
-                }
-                else {
-                    if (topdefault) me.add_tld(have);
-                }
+                if (have == token_rbrace) break;  // done
+                me.add_tld(have);
             }
             if (!tsa(tok, token_semi)) return false;
-            if (!topdefault) tok.token_error("tld values may only be specified in the top default context");
         }
         else if (have == token_html_limit) {
             have = tok.next();
@@ -507,11 +562,10 @@
                     break;  // done
                 }
                 else {
-                    if (topdefault) me.add_tag(have);
+                    me.add_tag(have);
                 }
             }
             if (!tsa(tok, token_semi)) return false;
-            if (!topdefault) tok.token_error("html tags may only be specified in the top default context");
         }
         else if (have == token_host_limit) {
             have = tok.next();
--- a/src/context.h	Wed Jul 13 23:04:14 2005 -0700
+++ b/src/context.h	Sat Jul 16 13:47:19 2005 -0700
@@ -58,6 +58,7 @@
     CONTEXT(CONTEXTP parent_, char *name_);
     ~CONTEXT();
     CONTEXTP    get_parent()                                {return parent;};
+    bool        is_parent(CONTEXTP p);      // is p a parent of this?
     char*       get_full_name(char *buf, int size);
     void        add_context(CONTEXTP child)                 {children[child->name] = child;};
     bool        allow_env_to(char *to)                      {return (parent) ? parent->cover_env_to(to) : true;};
@@ -88,15 +89,15 @@
     void        add_dnsbl(DNSBLP dns)                       {dnsbl_list.push_back(dns);};
     DNSBLP      find_dnsbl(char *name);
 
+    bool            get_content_filtering()                 {return content_filtering;};
     int             get_host_limit()                        {return host_limit;};
     bool            get_host_random()                       {return host_random;};
-    char*           get_content_suffix()                    {return content_suffix;};
-    char*           get_content_message()                   {return content_message;};
-    string_set&     get_content_host_ignore()               {return content_host_ignore;};
-    string_set&     get_content_tlds()                      {return content_tlds;};
-    string_set&     get_html_tags()                         {return html_tags;};
-    dnsblp_list&    get_dnsbl_list()                        {return dnsbl_list;};
-    bool            get_content_filtering()                 {return content_filtering;};
+    char*           get_content_suffix();
+    char*           get_content_message();
+    string_set&     get_content_host_ignore();
+    string_set&     get_content_tlds();
+    string_set&     get_html_tags();
+    dnsblp_list&    get_dnsbl_list();
 
     bool        acceptable_content(recorder &memory, char *&msg);
     bool        ignore_host(char *host);
@@ -122,13 +123,6 @@
     void        add_context(CONTEXTP con);
     void        add_to(char *to, CONTEXTP con);
     CONTEXTP    find_context(char *to);
-
-    char*       get_content_suffix()                        {return default_context->get_content_suffix()      ;};
-    char*       get_content_message()                       {return default_context->get_content_message()     ;};
-    string_set& get_content_host_ignore()                   {return default_context->get_content_host_ignore() ;};
-    string_set& get_content_tlds()                          {return default_context->get_content_tlds()        ;};
-    string_set& get_html_tags()                             {return default_context->get_html_tags()           ;};
-
     void        dump();
 };
 
--- a/src/dnsbl.cpp	Wed Jul 13 23:04:14 2005 -0700
+++ b/src/dnsbl.cpp	Sat Jul 16 13:47:19 2005 -0700
@@ -12,23 +12,15 @@
 -t sec   The timeout value.
 -c       Check the config, and print a copy to stdout. Don't start the
          milter or do anything with the socket.
+-s       Stress test by loading and deleting the current config in a loop.
 -d       Add debug syslog entries
+-e f|t   Print the results of looking up from address f and to address
+         t in the current config
 
 
 TODO:
-1) Add config for max_recipients for each mail domain. Recipients in
-excess of that limit will be rejected, and the entire data will be
-rejected if it is sent.
 
-2) Add config for poison addresses. If any recipient is poison, all
-recipients are rejected even if they would be whitelisted, and the
-data is rejected if sent.
-
-3) Add option to only allow one recipient if the return path is empty.
-
-4) Check if the envelope from domain name primary MX points 127.0.0.0/8
-
-5) Add option for using smtp connections to verify addresses from backup
+1) Add option for using smtp connections to verify addresses from backup
 mx machines. This allows the backup mx to learn the valid addresses
 on the primary machine.
 
@@ -95,6 +87,7 @@
 bool loader_run    = true;  // used to stop the config loader thread
 CONFIG * config = NULL;     // protected by the config_mutex
 int  generation = 0;        // protected by the config_mutex
+const int maxlen = 1000;    // used for snprintf buffers
 
 pthread_mutex_t  config_mutex;
 pthread_mutex_t  syslog_mutex;
@@ -163,6 +156,7 @@
         char *x = (*i).first;
         free(x);
     }
+    cm.clear();
 }
 
 
@@ -241,8 +235,11 @@
     authenticated = false;
     have_whites   = false;
     only_whites   = true;
-    memory        = new recorder(this, pc->get_html_tags(), pc->get_content_tlds());
-    scanner       = new url_scanner(memory);
+    memory              = NULL;
+    scanner             = NULL;
+    content_suffix      = NULL;
+    content_message     = NULL;
+    content_host_ignore = NULL;
 }
 
 mlfiPriv::~mlfiPriv() {
@@ -254,20 +251,25 @@
 }
 
 void mlfiPriv::reset(bool final) {
+    if (debug_syslog) my_syslog(this, "mlfiPriv::reset");
     if (mailaddr) free(mailaddr);
     if (queueid)  free(queueid);
     discard(env_to);
-    delete memory;
-    delete scanner;
+    if (memory)  delete memory;
+    if (scanner) delete scanner;
     if (!final) {
         mailaddr      = NULL;
         queueid       = NULL;
         authenticated = false;
         have_whites   = false;
         only_whites   = true;
-        memory        = new recorder(this, pc->get_html_tags(), pc->get_content_tlds());
-        scanner       = new url_scanner(memory);
+        memory              = NULL;
+        scanner             = NULL;
+        content_suffix      = NULL;
+        content_message     = NULL;
+        content_host_ignore = NULL;
     }
+    if (debug_syslog) my_syslog("mlfiPriv::reset exit");
 }
 
 void mlfiPriv::get_fd() {
@@ -368,7 +370,18 @@
 }
 
 void mlfiPriv::need_content_filter(char *rcpt, CONTEXT &con) {
+    if (debug_syslog) my_syslog(this, "need_content_filter");
     register_string(env_to, rcpt, &con);
+    if (!memory) {
+        // first recipient that needs content filtering sets all
+        // the content filtering parameters
+        memory        = new recorder(this, con.get_html_tags(), con.get_content_tlds());
+        scanner       = new url_scanner(memory);
+        content_suffix      = con.get_content_suffix();
+        content_message     = con.get_content_message();
+        content_host_ignore = &con.get_content_host_ignore();
+    }
+    if (debug_syslog) my_syslog(this, "need_content_filter exit");
 }
 
 #define MLFIPRIV    ((struct mlfiPriv *) smfi_getpriv(ctx))
@@ -378,7 +391,7 @@
 // syslog a message
 //
 void my_syslog(mlfiPriv *priv, char *text) {
-    char buf[1000];
+    char buf[maxlen];
     if (priv) {
         snprintf(buf, sizeof(buf), "%s: %s", priv->queueid, text);
         text = buf;
@@ -390,6 +403,8 @@
                 syslog_opened = true;
             }
             syslog(LOG_NOTICE, "%s", text);
+            closelog();
+            syslog_opened = false;
         pthread_mutex_unlock(&syslog_mutex);
     }
     else {
@@ -644,8 +659,8 @@
 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int ip) {
     CONFIG     &dc     = *priv.pc;
     string_set &hosts  = priv.memory->get_hosts();
-    string_set &ignore = dc.get_content_host_ignore();
-    char       *suffix = dc.get_content_suffix();
+    string_set &ignore = *priv.content_host_ignore;
+    char       *suffix = priv.content_suffix;
 
     int count = 0;
     int   cnt = hosts.size();   // number of hosts we could look at
@@ -662,7 +677,7 @@
         if ((cnt > limit) && (limit > 0) && random) {
             int r = rand() % cnt;
             if (r >= limit) {
-                char buf[1000];
+                char buf[maxlen];
                 snprintf(buf, sizeof(buf), "host %s skipped", host);
                 my_syslog(&priv, buf);
                 continue;
@@ -671,7 +686,7 @@
         count++;
         ip = dns_interface(priv, host, true, &nameservers);
         if (debug_syslog) {
-            char buf[1000];
+            char buf[maxlen];
             if (ip) {
                 char adr[sizeof "255.255.255.255"];
                 adr[0] = '\0';
@@ -704,7 +719,7 @@
         ip   = (*i).second;
         if (!ip) ip = dns_interface(priv, host, false, NULL);
         if (debug_syslog) {
-            char buf[200];
+            char buf[maxlen];
             if (ip) {
                 char adr[sizeof "255.255.255.255"];
                 adr[0] = '\0';
@@ -724,7 +739,7 @@
                     string_map::iterator j = nameservers.ns_host.find(host);
                     if (j != nameservers.ns_host.end()) {
                         char *refer = (*j).second;
-                        char buf[1000];
+                        char buf[maxlen];
                         snprintf(buf, sizeof(buf), "%s with nameserver %s", refer, host);
                         host = register_string(hosts, buf);    // put a copy into hosts, and return that reference
                     }
@@ -762,6 +777,7 @@
 //
 sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr)
 {
+    if (debug_syslog) my_syslog("mlfi_connect");
     // allocate some private memory
     mlfiPriv *priv = new mlfiPriv;
     if (hostaddr->sa_family == AF_INET) {
@@ -778,6 +794,7 @@
 sfsistat mlfi_envfrom(SMFICTX *ctx, char **from)
 {
     mlfiPriv &priv = *MLFIPRIV;
+    if (debug_syslog) my_syslog(&priv, "mlfi_envfrom");
     priv.mailaddr      = to_lower_string(from[0]);
     priv.authenticated = (smfi_getsymval(ctx, "{auth_authen}") != NULL);
     return SMFIS_CONTINUE;
@@ -787,11 +804,20 @@
 {
     DNSBLP rejectlist = NULL;   // list that caused the reject
     mlfiPriv &priv = *MLFIPRIV;
+    if (debug_syslog) my_syslog(&priv, "mlfi_envrcpt");
     CONFIG &dc = *priv.pc;
     if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i"));
     char *rcptaddr  = rcpt[0];
     char *loto      = to_lower_string(rcptaddr);
-    CONTEXT     con = *(dc.find_context(loto)->find_context(priv.mailaddr));
+    if (debug_syslog) my_syslog(&priv, "finding context");
+    CONTEXT    &con = *(dc.find_context(loto)->find_context(priv.mailaddr));
+    if (debug_syslog) {
+        char buf[maxlen];
+        char msg[maxlen];
+        snprintf(msg, sizeof(msg), "from <%s> to <%s> using context %s", priv.mailaddr, loto, con.get_full_name(buf,maxlen));
+        my_syslog(&priv, msg);
+    }
+    if (debug_syslog) my_syslog(&priv, "finding from value");
     char *fromvalue = con.find_from(priv.mailaddr);
     free(loto);
     status st;
@@ -803,6 +829,7 @@
     }
     else {
         // check the dns based lists
+        if (debug_syslog) my_syslog(&priv, "checking dns lists");
         st = (check_dnsbl(priv, con.get_dnsbl_list(), rejectlist)) ? black : oksofar;
     }
     if (st == reject) {
@@ -810,7 +837,7 @@
         char adr[sizeof "255.255.255.255"];
         adr[0] = '\0';
         inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr));
-        char buf[2000];
+        char buf[maxlen];
         snprintf(buf, sizeof(buf), rejectlist->message, adr, adr);
         smfi_setreply(ctx, "550", "5.7.1", buf);
         return SMFIS_REJECT;
@@ -822,6 +849,7 @@
     }
     else {
         // accept the recipient
+        if (debug_syslog) my_syslog(&priv, "checking content filtering");
         if (!con.get_content_filtering()) st = white;
         if (st == oksofar) {
             // but remember the non-whites
@@ -838,6 +866,7 @@
 sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len)
 {
     mlfiPriv &priv = *MLFIPRIV;
+    if (debug_syslog) my_syslog(&priv, "mlfi_body");
     if (priv.authenticated)       return SMFIS_CONTINUE;
     if (priv.only_whites)         return SMFIS_CONTINUE;
     priv.scanner->scan(data, len);
@@ -846,8 +875,10 @@
 
 sfsistat mlfi_eom(SMFICTX *ctx)
 {
+    if (debug_syslog) my_syslog("mlfi_eom");
     sfsistat  rc;
     mlfiPriv &priv = *MLFIPRIV;
+    if (debug_syslog) my_syslog(&priv, "mlfi_eom");
     CONFIG   &dc   = *priv.pc;
     char     *host = NULL;
     int       ip;
@@ -855,6 +886,8 @@
     // process end of message
     if (priv.authenticated || priv.only_whites) rc = SMFIS_CONTINUE;
     else {
+        // assert env_to not empty
+        char buf[maxlen];
         char *msg = NULL;
         string_set alive;
         bool random = false;
@@ -872,22 +905,21 @@
                 limit   = max(limit, con.get_host_limit());
             }
         }
-        bool rejecting = alive.empty();
+        bool rejecting = alive.empty(); // if alive is empty, we must have set msg above in acceptable_content()
         if (!rejecting) {
-            rejecting = check_hosts(priv, random, limit, host, ip);
-            if (rejecting) {
-                static char buf[2000];
+            if (check_hosts(priv, random, limit, host, ip)) {
                 char adr[sizeof "255.255.255.255"];
                 adr[0] = '\0';
                 inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
-                snprintf(buf, sizeof(buf), dc.get_content_message(), host, adr);
+                snprintf(buf, sizeof(buf), priv.content_message, host, adr);
                 msg = buf;
+                rejecting = true;
             }
         }
         if (!rejecting) {
             rc = SMFIS_CONTINUE;
         }
-        else if (!priv.have_whites && alive.empty()) {
+        else if (!priv.have_whites) {
             // can reject the entire message
             smfi_setreply(ctx, "550", "5.7.1", msg);
             rc = SMFIS_REJECT;
@@ -909,6 +941,7 @@
 sfsistat mlfi_abort(SMFICTX *ctx)
 {
     mlfiPriv &priv = *MLFIPRIV;
+    if (debug_syslog) my_syslog(&priv, "mlfi_abort");
     priv.reset();
     return SMFIS_CONTINUE;
 }
@@ -916,6 +949,7 @@
 sfsistat mlfi_close(SMFICTX *ctx)
 {
     mlfiPriv *priv = MLFIPRIV;
+    if (debug_syslog) my_syslog(priv, "mlfi_close");
     if (!priv) return SMFIS_CONTINUE;
     delete priv;
     smfi_setpriv(ctx, NULL);
@@ -949,7 +983,7 @@
     pthread_mutex_lock(&config_mutex);
         newc->generation = generation++;
     pthread_mutex_unlock(&config_mutex);
-    char buf[200];
+    char buf[maxlen];
     snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation);
     my_syslog(buf);
     if (load_conf(*newc, "dnsbl.conf")) {
@@ -996,7 +1030,7 @@
         for (configp_set::iterator i=old_configs.begin(); i!=old_configs.end(); ) {
             CONFIG *old = *i;
             if (!old->reference_count) {
-                char buf[200];
+                char buf[maxlen];
                 snprintf(buf, sizeof(buf), "freeing memory for old configuration generation %d", old->generation);
                 my_syslog(buf);
                 delete old; // destructor does all the work
@@ -1133,6 +1167,7 @@
 
     if (check) {
         use_syslog = false;
+        debug_syslog = true;
         CONFIG *conf = new_conf();
         if (conf) {
             conf->dump();
@@ -1167,7 +1202,6 @@
             CONFIG *conf = new_conf();
             if (conf) {
                 CONTEXTP con = conf->find_context(to);
-                const int maxlen = 1000;
                 char buf[maxlen];
                 fprintf(stdout, "envelope to   <%s> finds context %s\n", to, con->get_full_name(buf,maxlen));
                 CONTEXTP fc = con->find_context(from);
--- a/src/dnsbl.h	Wed Jul 13 23:04:14 2005 -0700
+++ b/src/dnsbl.h	Sat Jul 16 13:47:19 2005 -0700
@@ -28,6 +28,10 @@
     context_map env_to;     // map each non-whitelisted recipient to their filtering context
     recorder    *memory;    // memory for the content scanner
     url_scanner *scanner;   // object to handle body scanning
+    char        *content_suffix;        // content filtering parameters
+    char        *content_message;       // ""
+    string_set  *content_host_ignore;   // ""
+
 
     mlfiPriv();
     ~mlfiPriv();
--- a/src/scanner.cpp	Wed Jul 13 23:04:14 2005 -0700
+++ b/src/scanner.cpp	Sat Jul 16 13:47:19 2005 -0700
@@ -1148,9 +1148,6 @@
     bad_html_tags = 0;
     binary_tags   = 0;
 }
-recorder::~recorder() {
-    empty();
-}
 void recorder::empty() {
     bad_html_tags = 0;
     binary_tags   = 0;
--- a/src/scanner.h	Wed Jul 13 23:04:14 2005 -0700
+++ b/src/scanner.h	Sat Jul 16 13:47:19 2005 -0700
@@ -17,7 +17,7 @@
 
 public:
     recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_);
-    ~recorder();
+    ~recorder()                                 { empty(); };
     void empty();
     void new_url(char *host);
     void new_tag(char *tag);
--- a/test.bash	Wed Jul 13 23:04:14 2005 -0700
+++ b/test.bash	Sat Jul 16 13:47:19 2005 -0700
@@ -18,6 +18,8 @@
 ###########################
 # compile the milter
 #
+
+if [ "$1" == "build" ]; then
 rm -f dnsbl.o scanner.o context.o tokenizer.o
 g++ -c -pthread dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp
 if [ $? -ne 0 ]; then
@@ -29,6 +31,7 @@
     echo "linker errors"
     exit
 fi
+fi
 
 if [ "$1" == "comp" ]; then
     exit
--- a/xml/dnsbl.in	Wed Jul 13 23:04:14 2005 -0700
+++ b/xml/dnsbl.in	Sat Jul 16 13:47:19 2005 -0700
@@ -80,68 +80,70 @@
 <p>DNSBL-LIST - a named list of DNSBLs that will be used for specific
 recipients or recipient domains.
 
-<p>The envelope to email address is used to find an initial filtering context.
-That context then uses the envelope from email address to find the final
-filtering context. The envelope from email address is checked in that context
-to see if we should whitelist or blacklist the message
-two names (a named DNSBL-LIST, and a named ENVELOPE-FROM-MAP).  If the
-recipient is not found in the configuration, the named DEFAULT
-dnsbl-list and DEFAULT envelope-from-map will be used.  When mail is
-received for that recipient,
+<hr> <center>Filtering Procedure</center>
+
+<p>If the client has authenticated with sendmail, the mail is accepted,
+the dns lists are not checked, and the body content is not scanned.
+Otherwise, we follow these steps for each recipient.
 
 <ol>
 
-<li>If the client has authenticated with sendmail, the mail is accepted,
-the dns lists are not checked, and the body content is not scanned.
-
 <li>The envelope to email address is used to find an initial filtering
-context. We first look for a context that specified the full email address
-in the env_to statement. If that is not found, we look for a context that
-specified the entire domain name of the envelope recipient in the env_to
-statement. If that is not found, we look for a context that specified the
-user@ part of the envelope recipient in the env_to statement. If that is not
-found, we use the first top level context defined in the config file.
+context.  We first look for a context that specified the full email
+address in the env_to statement.  If that is not found, we look for a
+context that specified the entire domain name of the envelope recipient
+in the env_to statement.  If that is not found, we look for a context
+that specified the user@ part of the envelope recipient in the env_to
+statement.  If that is not found, we use the first top level context
+defined in the config file.
 
-<li>The initial filtering context may redirect to a child context based
-on the values in the initial context's env_from statement.  We look for
-[1) the full envelope from email address, 2) the domain name part of the
-envelope from address, 3) the user@ part of the envelope from address]
-in that context's env_from statement, with values that point to a child
-context.  If such an entry is found, we switch to that filtering
-context.
+<br><br><li>The initial filtering context may redirect to a child
+context based on the values in the initial context's env_from statement.
+We look for [1) the full envelope from email address, 2) the domain name
+part of the envelope from address, 3) the user@ part of the envelope
+from address] in that context's env_from statement, with values that
+point to a child context.  If such an entry is found, we switch to that
+child filtering context.
 
-<li>We lookup [1) the full envelope from email address, 2) the domain
-name part of the envelope from address, 3) the user@ part of the
+<br><br><li>We lookup [1) the full envelope from email address, 2) the
+domain name part of the envelope from address, 3) the user@ part of the
 envelope from address] in the filtering context env_from statement.
 That results in one of (white, black, unknown, inherit).
 
-<li>If the answer is black, mail to this recipient is rejected with "no
-such user", and the dns lists are not checked.
-
-<li>If the answer is white, mail to this recipient is accepted and the
-dns lists are not checked.
+<br><br><li>If the answer is black, mail to this recipient is rejected
+with "no such user", and the dns lists are not checked.
 
-<li>If the answer is unknown, we don't reject yet, but the dns lists
-will be checked, and the content may be scanned.
+<br><br><li>If the answer is white, mail to this recipient is accepted
+and the dns lists are not checked.
 
-<li>If the answer is inherit, we repeat the envelope from search in the
-parent context.
+<br><br><li>If the answer is unknown, we don't reject yet, but the dns
+lists will be checked, and the content may be scanned.
 
-<li>The dns lists specified in the filtering context are checked and the
-mail is rejected if any list has an A record for the standard dns based
-lookup scheme (reversed octets of the client followed by the dns
-suffix).
+<br><br><li>If the answer is inherit, we repeat the envelope from search
+in the parent context.
 
-<li>If the mail has not been accepted or rejected yet, the body content
-is optionally scanned for HTTP URLs (after base64, mime and html entity
-decoding), and the first &lt;configurable&gt; host names are checked for
-their presence on the SBL.  If any host name is on the SBL, and it is
-not on the "ignore" list, the mail is rejected.  If we are doing body
-content scanning, we also scan for excessive bad html tags, and if a
-&lt;configurable&gt; limit is exceeded, the mail is rejected.
+<br><br><li>The dns lists specified in the filtering context are checked
+and the mail is rejected if any list has an A record for the standard
+dns based lookup scheme (reversed octets of the client followed by the
+dns suffix).
+
+<br><br><li>If the mail has not been accepted or rejected yet, and the
+filtering context enables content filtering, and this is the first such
+recipient in this smtp transaction, we set the content filtering parameters
+from this context, and enable content filtering for this body.
 
 </ol>
 
+<p>If content filtering is enabled for this body, the mail text is
+decoded (uuencode, base64, mime, html entity, url encodings), scanned
+for HTTP and HTTPS URLs, and the first &lt;configurable&gt; host names
+are checked for their presence on the single &lt;configurable&gt; DNSBL.
+The only known list that is suitable for this purpose is the SBL.  If
+any of those host names are on that DNSBL (or have nameservers that are
+on that list), and it is not on the &lt;configurable&gt; ignore list,
+the mail is rejected.  We also scan for excessive bad html tags, and if
+a &lt;configurable&gt; limit is exceeded, the mail is rejected.
+
 <hr> <center>Sendmail access vs. DNSBL</center>
 <p>With the standard sendmail.mc dnsbl FEATURE, the dnsbl checks may be
 suppressed by entries in the /etc/mail/access database.  For example,
@@ -241,6 +243,34 @@
 processes, 400 milter threads, and 400 dns resolver processes.  Of
 course that steady state is very unlikely to happen.
 
+<hr> <center>Rejected Ideas</center>
+
+<p>The following ideas have been considered and rejected.
+
+<p>Add max_recipients for each mail domain to the configuration.
+Recipients in excess of that limit will be rejected, and all the
+recipients in that domain will be removed if there are some other
+whitelisted recipients.  Current spammers *very* rarely send more than
+ten recipients in a single smtp transaction, so this won't stop
+any significant amount of spam.
+
+<p>Add poison addresses to the configuration.  If any recipient is
+poison, all recipients are rejected even if they would be whitelisted,
+and the data is rejected if sent.  I have a collection of spam trap
+addresses that would be suitable for such use.  Based on my log files,
+any mail to those spam trap addresses is rejected based on either dnsbl
+lookups or the DCC.  So this won't result in blocking any additional
+spam.
+
+<p>Add an option to only allow one recipient if the return path is
+empty.  Based on my log files, there is no mail that violates this
+check.
+
+<p>Reject the mail if the envelope from domain name contains any MX
+records pointing to 127.0.0.0/8. I don't see any significant amount of spam
+sent with such domain names.
+
+
 <pre>
 $Id$
 </pre>
--- a/xml/sample.conf	Wed Jul 13 23:04:14 2005 -0700
+++ b/xml/sample.conf	Sat Jul 16 13:47:19 2005 -0700
@@ -128,7 +128,7 @@
             };
         };
 
-        env_from {
+        env_from inherit {
             yahoo.com           black;      # no mail from yahoo
             first@yahoo.com     unknown;    # except this one
         };