changeset 74:b7449114ebb0

start coding on new config syntax
author carl
date Sun, 10 Jul 2005 14:19:00 -0700
parents 2b369f7db7bf
children 1142e46be550
files new.bash src/context.cpp src/context.h src/dnsbl.cpp src/dnsbl.h src/includes.h src/scanner.cpp src/scanner.h src/tokenizer.cpp test.bash
diffstat 10 files changed, 210 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/new.bash	Sun Jul 10 13:28:33 2005 -0700
+++ b/new.bash	Sun Jul 10 14:19:00 2005 -0700
@@ -4,12 +4,12 @@
 ## compile and run the new parser program
 ##
 rm -f dnsbl.o scanner.o context.o tokenizer.o
-g++ -c dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp
+g++ -c -pthread dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp
 if [ $? -ne 0 ]; then
     echo "compiler errors"
     exit
 fi
-g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o -pthread
+g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o /usr/lib/libresolv.a -lmilter -pthread
 if [ $? -ne 0 ]; then
     echo "linker errors"
     exit
--- a/src/context.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/context.cpp	Sun Jul 10 14:19:00 2005 -0700
@@ -324,7 +324,7 @@
 ////////////////////////////////////////////////
 // helper to discard the strings held by a string_set
 //
-static void discard(string_set &s) {
+void discard(string_set &s) {
     for (string_set::iterator i=s.begin(); i!=s.end(); i++) {
         free(*i);
     }
--- a/src/context.h	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/context.h	Sun Jul 10 14:19:00 2005 -0700
@@ -100,7 +100,7 @@
     dnsblp_list&    get_dnsbl_list()                        {return dnsbl_list;};
     bool            get_content_filtering()                 {return content_filtering;};
 
-    bool        acceptable_content(recorder &memory);
+    bool        acceptable_content(recorder &memory, char *&msg);
     bool        ignore_host(char *host);
 
     void        dump(int level = 0);
@@ -166,7 +166,7 @@
 
 extern string_set all_strings;      // owns all the strings, only modified by the config loader thread
 
-static void discard(string_set &s);
+void discard(string_set &s);
 char* register_string(string_set &s, char *name);
 char* register_string(char *name);
 CONFIG *parse_config(char *fn);
--- a/src/dnsbl.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/dnsbl.cpp	Sun Jul 10 14:19:00 2005 -0700
@@ -119,7 +119,7 @@
 };
 
 
-void ns_map::~ns_map() {
+ns_map::~ns_map() {
     for (string_map::iterator i=ns_host.begin(); i!=ns_host.end(); i++) {
         char *x = (*i).first;
         char *y = (*i).second;
@@ -635,10 +635,9 @@
 //  check the hosts from the body against the content dnsbl
 //
 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int ip);
-bool check_hosts(mlfiPriv &priv) {
-    static buf[2000];
+bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int ip) {
     CONFIG &dc = *priv.pc;
-    string_set &hosts  = priv.memory->hosts;
+    string_set &hosts  = priv.memory->get_hosts();
     string_set &ignore = dc.get_content_host_ignore();
 
     int count = 0;
@@ -681,7 +680,7 @@
             int_set::iterator i = ips.find(ip);
             if (i == ips.end()) {
                 ips.insert(ip);
-                if (check_single(priv, ip, dc.content_suffix)) {
+                if (check_single(priv, ip, dc.get_content_suffix())) {
                     return true;
                 }
             }
@@ -714,7 +713,7 @@
             int_set::iterator i = ips.find(ip);
             if (i == ips.end()) {
                 ips.insert(ip);
-                if (check_single(priv, ip, dc.content_suffix)) {
+                if (check_single(priv, ip, dc.get_content_suffix())) {
                     string_map::iterator j = nameservers.ns_host.find(host);
                     if (j != nameservers.ns_host.end()) {
                         char *refer = (*j).second;
@@ -778,7 +777,7 @@
     }
     else {
         // check the dns based lists
-        st = check_dnsbl(priv, con.get_dnsbl_list(), rejectlist);
+        st = (check_dnsbl(priv, con.get_dnsbl_list(), rejectlist)) ? black : oksofar;
     }
     if (st == reject) {
         // reject the recipient based on some dnsbl
@@ -833,11 +832,11 @@
         char *msg = NULL;
         string_set alive;
         bool random = false;
-        bool limit  = 0;
-        for (context_map::iterator i=env_to.begin(); i!=env_to.end(); i++) {
+        int  limit  = 0;
+        for (context_map::iterator i=priv.env_to.begin(); i!=priv.env_to.end(); i++) {
             char *rcpt   = (*i).first;
             CONTEXT &con = *((*i).second);
-            if (!con.acceptable_content(priv.memory, msg)) {
+            if (!con.acceptable_content(*priv.memory, msg)) {
                 // bad html tags or excessive hosts
                 smfi_delrcpt(ctx, rcpt);
             }
@@ -927,7 +926,7 @@
     char buf[200];
     snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation);
     my_syslog(buf);
-    if (load_conf(*newc, "dnsbl.conf") {
+    if (load_conf(*newc, "dnsbl.conf")) {
         newc->load_time = time(NULL);
         return newc;
     }
@@ -952,7 +951,7 @@
         time_t then = dc.load_time;
         struct stat st;
         bool reload = false;
-        for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
+        for (string_set::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
             char *fn = *i;
             if (stat(fn, &st))           reload = true; // file disappeared
             else if (st.st_mtime > then) reload = true; // file modified
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dnsbl.h	Sun Jul 10 14:19:00 2005 -0700
@@ -0,0 +1,45 @@
+#ifndef dnsbl_include
+#define dnsbl_include
+
+#include "context.h"
+
+extern bool debug_syslog;
+
+class recorder;
+class url_scanner;
+
+////////////////////////////////////////////////
+// mail filter private data, held for us by sendmail
+//
+struct mlfiPriv
+{
+    // connection specific data
+    CONFIG  *pc;                    // global filtering configuration
+    int     fd;                     // to talk to dns resolvers process
+    bool    err;                    // did we get any errors on the resolver socket?
+    int     ip;                     // ip4 address of the smtp client
+    map<DNSBLP, bool> checked;      // map of dnsblp to result of (ip listed on that dnsbl)
+    // message specific data
+    char    *mailaddr;      // envelope from value
+    char    *queueid;       // sendmail queue id
+    bool    authenticated;  // client authenticated? if so, suppress all dnsbl checks
+    bool    have_whites;    // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails
+    bool    only_whites;    // every recipient is whitelisted?
+    context_map env_to;     // map each non-whitelisted recipient to their filtering context
+    recorder    *memory;    // memory for the content scanner
+    url_scanner *scanner;   // object to handle body scanning
+
+    mlfiPriv();
+    ~mlfiPriv();
+    void reset(bool final = false); // for a new message
+    void get_fd();
+    void return_fd();
+    int  my_read(char *buf, int len);
+    int  my_write(char *buf, int len);
+    void need_content_filter(char *rcpt, CONTEXT &con);
+};
+
+void my_syslog(mlfiPriv *priv, char *text);
+void my_syslog(char *text);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/includes.h	Sun Jul 10 14:19:00 2005 -0700
@@ -0,0 +1,4 @@
+#include "tokenizer.h"
+#include "context.h"
+#include "dnsbl.h"
+#include "scanner.h"
--- a/src/scanner.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/scanner.cpp	Sun Jul 10 14:19:00 2005 -0700
@@ -1105,7 +1105,7 @@
 void fsa::error(char *err) {
     count = 0;
     st    = init;
-    if (err) my_syslog(memory->priv, err);
+    if (err) my_syslog(memory->get_priv(), err);
 }
 
 void fsa::pusher() {
@@ -1136,8 +1136,8 @@
                         // have two periods, so at least three components, and no empty components
                         for (int i=0; i<count; i++) pending[i] = tolower(pending[i]);
                         // is last component a tld?
-                        string_set::iterator i = memory->tlds->find(p2+1);
-                        if (i != memory->tlds->end()) memory->new_url((char*)pending);
+                        string_set::iterator i = memory->get_tlds()->find(p2+1);
+                        if (i != memory->get_tlds()->end()) memory->new_url((char*)pending);
                     }
                 }
                 st = h_init;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/scanner.h	Sun Jul 10 14:19:00 2005 -0700
@@ -0,0 +1,138 @@
+#ifndef scanner_include
+#define scanner_include
+
+#include "dnsbl.h"
+
+////////////////////////////////////////////////
+// memory for the content scanner
+//
+class recorder
+{
+    mlfiPriv    *priv;      // needed for syslog
+    string_set  *html_tags; // valid tags
+    string_set  *tlds;      // valid tlds
+    string_set  hosts;
+    int         bad_html_tags;
+    int         binary_tags;
+
+public:
+    recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_);
+    ~recorder();
+    void empty();
+    void new_url(char *host);
+    void new_tag(char *tag);
+    void binary();
+    mlfiPriv   *get_priv()                      {return priv;                                                                      };
+    string_set *get_tlds()                      {return tlds;                                                                      };
+    string_set &get_hosts()                     {return hosts;                                                                     };
+    bool        excessive_bad_tags(int limit)   {return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
+    bool        excessive_hosts(int limit)      {return (limit > 0) && (hosts.size() > limit);                                     };
+};
+
+
+////////////////////////////////////////////////
+// finite state machine
+//
+enum state {// host name recognizer states
+            h_init,
+            h_host,
+
+            // html tag discarder states
+            t_init,
+            t_tag1,     // seen opening <
+            t_tag2,     // not comment
+            t_com1,     // seen !
+            t_com2,     // seen first -
+            t_com3,     // seen second -, looking for -->
+            t_com4,     // seen first -
+            t_com5,     // seen second -
+            t_disc,     // looking for closing >
+
+            // url recognizer states
+            u_init,
+            u_http,
+            u_sla,
+            u_url,
+
+            // url decoder states  %xx
+            d_init,
+            d_pcnt,
+            d_1,
+
+            // html entity decoder states &#nnn;
+            e_init,
+            e_amp,
+            e_num,
+
+            // mime decoder states =xx
+            m_init,
+            m_eq,
+            m_1,
+
+            // base64 decoder states
+            b_init,
+            b_lf,
+            b_lf2,
+            b_64,
+
+            // uuencoding decoder states
+            uu_init,
+            uu_lf,
+            uu_lf2,
+            uu_64,
+
+            // counter for number of columns in the table
+            end_state,
+
+            // temporary states
+            h_end,
+            t_bin,
+            t_end,
+            u_reco,
+            d_2,
+            e_semi,
+            m_2,
+            m_cr,
+            m_nl,
+            b_cr,
+            uu_cr
+           };
+
+#define PENDING_LIMIT 100
+class fsa {
+    u_char      pending[PENDING_LIMIT];
+    int         count;
+    state       st;
+    state       init;
+    fsa         *next1;
+    fsa         *next2;
+    recorder    *memory;
+
+public:
+    fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_);
+    void push(u_char *buf, int len);
+    void pusher();
+    void error(char *err);
+};
+
+
+////////////////////////////////////////////////
+// the content scanner
+//
+class url_scanner {
+    fsa *host_parser;
+    fsa *tags_parser;
+    fsa *urls_parser;
+    fsa *urld_parser;
+    fsa *html_parser;
+    fsa *mime_parser;
+    fsa *b64_parser;
+    fsa *uu_parser;
+
+public:
+    url_scanner(recorder *memory);
+    ~url_scanner();
+    void scan(u_char *buffer, size_t length);
+};
+
+#endif
--- a/src/tokenizer.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/tokenizer.cpp	Sun Jul 10 14:19:00 2005 -0700
@@ -6,7 +6,7 @@
 
 */
 
-#include "context.h"
+#include "dnsbl.h"
 
 static char* tokenizer_version="$Id$";
 
--- a/test.bash	Sun Jul 10 13:28:33 2005 -0700
+++ b/test.bash	Sun Jul 10 14:19:00 2005 -0700
@@ -18,12 +18,13 @@
 ###########################
 # compile the milter
 #
-g++ -c -pthread dnsbl.cpp
+rm -f dnsbl.o scanner.o context.o tokenizer.o
+g++ -c -pthread dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp
 if [ $? -ne 0 ]; then
     echo "compiler errors"
     exit
 fi
-g++ -o dnsbl dnsbl.o /usr/lib/libresolv.a -lmilter -pthread
+g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o /usr/lib/libresolv.a -lmilter -pthread
 if [ $? -ne 0 ]; then
     echo "linker errors"
     exit