diff src/dnsbl.cpp @ 24:2e23b7184d2b

start coding for bad html tag detection
author carl
date Wed, 19 May 2004 21:40:50 -0700
parents 06de5ab6a232
children fdae7ab30cfc
line wrap: on
line diff
--- a/src/dnsbl.cpp	Wed May 12 13:23:22 2004 -0700
+++ b/src/dnsbl.cpp	Wed May 19 21:40:50 2004 -0700
@@ -125,6 +125,9 @@
     string_map  env_to_chkfrom;     // map recipient to a named from map
     char *      content_suffix;     // for sbl url body filtering
     char *      content_message;    // ""
+    char *      limit_message;      // error message for excessive bad html tags
+    int         bad_tag_limit;      // limit on bad html tags
+    string_set  html_tags;          // set of valid html tags
     CONFIG();
     ~CONFIG();
 };
@@ -133,14 +136,19 @@
     load_time       = 0;
     content_suffix  = NULL;
     content_message = NULL;
+    limit_message   = NULL;
+    bad_tag_limit   = 0;
 }
 CONFIG::~CONFIG() {
     for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) {
         DNSBLP d = (*i).second;
+        // delete the underlying DNSBL objects.
         delete d;
     }
     for (dnsbllp_map::iterator i=dnsblls.begin(); i!=dnsblls.end(); i++) {
         DNSBLLP d = (*i).second;
+        // *d is a list of pointers to DNSBL objects, but
+        // the underlying objects have already been deleted above.
         delete d;
     }
     for (from_map::iterator i=env_from.begin(); i!=env_from.end(); i++) {
@@ -213,9 +221,9 @@
     bool    authenticated;  // client authenticated? if so, suppress all dnsbl checks
     bool    have_whites;    // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails
     bool    only_whites;    // every recipient is whitelisted?
+    string_set  non_whites; // remember the non-whitelisted recipients so we can remove them if need be
+    recorder    *memory;    // memory for the content scanner
     url_scanner *scanner;   // object to handle body scanning
-    string_set  non_whites; // remember the non-whitelisted recipients so we can remove them if need be
-    string_set  hosts;      // remember the hosts that we have checked
     mlfiPriv();
     ~mlfiPriv();
     void reset(bool final = false); // for a new message
@@ -230,7 +238,8 @@
     authenticated = false;
     have_whites   = false;
     only_whites   = true;
-    scanner       = new url_scanner(&hosts);
+    memory        = new recorder(&pc->html_tags);
+    scanner       = new url_scanner(memory);
 }
 mlfiPriv::~mlfiPriv() {
     pthread_mutex_lock(&config_mutex);
@@ -240,15 +249,16 @@
 }
 void mlfiPriv::reset(bool final) {
     if (mailaddr) free(mailaddr);
+    discard(non_whites);
+    delete memory;
     delete scanner;
-    discard(non_whites);
-    discard(hosts);
     if (!final) {
         mailaddr      = NULL;
         authenticated = false;
         have_whites   = false;
         only_whites   = true;
-        scanner       = new url_scanner(&hosts);
+        memory        = new recorder(&pc->html_tags);
+        scanner       = new url_scanner(memory);
     }
 }
 
@@ -471,7 +481,7 @@
     CONFIG     &dc   = *priv.pc;
     if (!dc.content_suffix) return oksofar;
     int count = 0;
-    for (string_set::iterator i=priv.hosts.begin(); i!=priv.hosts.end(); i++) {
+    for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) {
         count++;
         if (count > 20) return oksofar; // silly to check too many hosts
         host = *i;
@@ -494,6 +504,10 @@
             if (st == reject) return st;
         }
     }
+    host = NULL;
+    int bad = priv.memory->bad_html_tags;
+    int lim = priv.pc->bad_tag_limit;
+    if ((bad > lim) && (lim > 0)) return reject;
     return oksofar;
 }
 
@@ -609,11 +623,17 @@
     else {
         if (!priv.have_whites) {
             // can reject the entire message
+            char buf[2000];
+            if (!host) {
+                // must be rejected due to excessive bad html tags
+                snprintf(buf, sizeof(buf), priv.pc->limit_message);
+            }
+            else {
             char adr[sizeof "255.255.255.255"];
             adr[0] = '\0';
             inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
-            char buf[2000];
             snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr);
+            }
             smfi_setreply(ctx, "550", "5.7.1", buf);
             rc = SMFIS_REJECT;
         }
@@ -708,6 +728,9 @@
     if (dc.content_suffix) {
         fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message);
     }
+    if (dc.bad_tag_limit) {
+        fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.bad_tag_limit, dc.limit_message);
+    }
     fprintf(stdout, "\nfiles\n");
     for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
         char *f = *i;
@@ -805,8 +828,10 @@
 static void load_conf(CONFIG &dc, char *fn) {
     dc.config_files.push_back(fn);
     map<char*, int, ltstr> commands;
-    enum {dummy, content, dnsbl, dnsbll, envfrom, envto, include, includedcc};
+    enum {dummy, content, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc};
     commands["content"    ] = content;
+    commands["html_limit" ] = htmllimit;
+    commands["html_tag"   ] = htmltag;
     commands["dnsbl"      ] = dnsbl;
     commands["dnsbl_list" ] = dnsbll;
     commands["env_from"   ] = envfrom;
@@ -833,7 +858,7 @@
             switch (commands[cmd]) {
                 case content: {
                     char *suff = strtok(NULL, delim);
-                    if (!suff) break;                           // no dns suffic
+                    if (!suff) break;                           // no dns suffix
                     char *msg = suff + strlen(suff);
                     if ((msg - line) >= strlen(orig)) break;    // line ended with the dns suffix
                     msg  = strchr(msg+1, '\'');
@@ -848,6 +873,30 @@
                     processed = true;
                     } break;
 
+                case htmllimit: {
+                    char *limit = strtok(NULL, delim);
+                    if (!limit) break;                          // no integer limit
+                    char *msg = limit + strlen(limit);
+                    if ((msg - line) >= strlen(orig)) break;    // line ended with the limit
+                    msg  = strchr(msg+1, '\'');
+                    if (!msg) break;                            // no reply message template
+                    msg++; // move over the leading '
+                    if ((msg - line) >= strlen(orig)) break;    // line ended with the leading quote
+                    char *last = strchr(msg, '\'');
+                    if (!last) break;                           // no trailing quote
+                    *last = '\0';                               // make it a null terminator
+                    dc.bad_tag_limit = atoi(limit);
+                    dc.limit_message = register_string(msg);
+                    processed = true;
+                    } break;
+
+                case htmltag: {
+                    char *tag = next_token(delim);
+                    if (!tag) break;                            // no html tag value
+                    dc.html_tags.insert(tag);
+                    processed = true;
+                    } break;
+
                 case dnsbl: {
                     // have a new dnsbl to use
                     char *name = next_token(delim);