changeset 73:2b369f7db7bf

start coding on new config syntax
author carl
date Sun, 10 Jul 2005 13:28:33 -0700
parents e6a2d0be7c5e
children b7449114ebb0
files new.bash src/context.cpp src/context.h src/dnsbl.cpp src/scanner.cpp src/tokenizer.cpp src/tokenizer.h
diffstat 7 files changed, 82 insertions(+), 154 deletions(-) [+]
line wrap: on
line diff
--- a/new.bash	Sun Jul 10 13:28:33 2005 -0700
+++ b/new.bash	Sun Jul 10 13:28:33 2005 -0700
@@ -3,13 +3,13 @@
 ############################
 ## compile and run the new parser program
 ##
-rm -f new.o context.o tokenizer.o
-g++ -c new.cpp context.cpp tokenizer.cpp
+rm -f dnsbl.o scanner.o context.o tokenizer.o
+g++ -c dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp
 if [ $? -ne 0 ]; then
     echo "compiler errors"
     exit
 fi
-g++ -o new new.o context.o tokenizer.o -pthread
+g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o -pthread
 if [ $? -ne 0 ]; then
     echo "linker errors"
     exit
--- a/src/context.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/context.cpp	Sun Jul 10 13:28:33 2005 -0700
@@ -8,7 +8,7 @@
 
 #include "includes.h"
 
-static char* context_version="$Id:";
+static char* context_version="$Id$";
 
 char *token_black;
 char *token_content;
--- a/src/context.h	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/context.h	Sun Jul 10 13:28:33 2005 -0700
@@ -1,3 +1,6 @@
+#ifndef context_include
+#define context_include
+
 #include "tokenizer.h"
 #include <map>
 
@@ -11,6 +14,7 @@
 
 class DNSBL;
 class CONTEXT;
+class recorder;
 
 typedef map<char *, char *, ltstr>        string_map;
 typedef set<int>                          int_set;
@@ -86,6 +90,19 @@
     void        add_dnsbl(DNSBLP dns)                       {dnsbl_list.push_back(dns);};
     DNSBLP      find_dnsbl(char *name);
 
+    int             get_host_limit()                        {return host_limit;};
+    bool            get_host_random()                       {return host_random;};
+    char*           get_content_suffix()                    {return content_suffix;};
+    char*           get_content_message()                   {return content_message;};
+    string_set&     get_content_host_ignore()               {return content_host_ignore;};
+    string_set&     get_content_tlds()                      {return content_tlds;};
+    string_set&     get_html_tags()                         {return html_tags;};
+    dnsblp_list&    get_dnsbl_list()                        {return dnsbl_list;};
+    bool            get_content_filtering()                 {return content_filtering;};
+
+    bool        acceptable_content(recorder &memory);
+    bool        ignore_host(char *host);
+
     void        dump(int level = 0);
 };
 
@@ -100,12 +117,20 @@
     context_list    contexts;       // owns all the contexts, not just top level contexts
     context_map     env_to;         // map recipient to a filtering context
     CONTEXTP        default_context;// for env_to values that don't have their own specific filtering context
+    // the default context is also used for some of the content filtering values
 
     CONFIG();
     ~CONFIG();
     void        add_context(CONTEXTP con);
     void        add_to(char *to, CONTEXTP con)      {env_to[to] = con;};
     CONTEXTP    find_context(char *to, char *from);
+
+    char*       get_content_suffix()                        {return default_context->get_content_suffix()      ;};
+    char*       get_content_message()                       {return default_context->get_content_message()     ;};
+    string_set& get_content_host_ignore()                   {return default_context->get_content_host_ignore() ;};
+    string_set& get_content_tlds()                          {return default_context->get_content_tlds()        ;};
+    string_set& get_html_tags()                             {return default_context->get_html_tags()           ;};
+
     void        dump();
 };
 
@@ -147,3 +172,5 @@
 CONFIG *parse_config(char *fn);
 bool load_conf(CONFIG &dc, char *fn);
 void token_init();
+
+#endif
--- a/src/dnsbl.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/dnsbl.cpp	Sun Jul 10 13:28:33 2005 -0700
@@ -74,7 +74,7 @@
 
 #include "includes.h"
 
-static char* dnsbl_version="$Id:";
+static char* dnsbl_version="$Id$";
 
 
 extern "C" {
--- a/src/scanner.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/scanner.cpp	Sun Jul 10 13:28:33 2005 -0700
@@ -6,127 +6,9 @@
 
 */
 
-static char* scanner_version="$Id$";
-
-using namespace std;
-
-
-// object to record things we see in the body content
-struct recorder
-{
-    mlfiPriv    *priv;      // needed for syslog
-    string_set  *html_tags; // valid tags
-    string_set  *tlds;      // valid tlds
-    string_set  hosts;
-    int         bad_html_tags;
-    int         binary_tags;
-    recorder(mlfiPriv *priv_, string_set *html_tags_, string_set *tlds_);
-    ~recorder();
-    void empty();
-    void new_url(char *host);
-    void new_tag(char *tag);
-    void binary();
-};
-recorder::recorder(mlfiPriv *priv_, string_set *html_tags_, string_set *tlds_) {
-    priv          = priv_;
-    html_tags     = html_tags_;
-    tlds          = tlds_;
-    bad_html_tags = 0;
-    binary_tags   = 0;
-}
-recorder::~recorder() {
-    empty();
-}
-void recorder::empty() {
-    bad_html_tags = 0;
-    binary_tags   = 0;
-    discard(hosts);
-}
-void recorder::new_url(char *host) {
-    register_string(hosts, host);
-}
-void recorder::binary() {
-    binary_tags++;
-}
-void recorder::new_tag(char *tag) {
-    string_set::iterator i = html_tags->find(tag);
-    if (i == html_tags->end()) {
-        bad_html_tags++;
-        if (debug_syslog && (bad_html_tags < 10)) {
-            // only log the first 10 bad tags
-            char buf[200];
-            snprintf(buf, sizeof(buf), "bad html tag %s", tag);
-            my_syslog(priv, buf);
-        }
-    }
-}
-
-
-
-enum state {// host name recognizer states
-            h_init,
-            h_host,
+#include "includes.h"
 
-            // html tag discarder states
-            t_init,
-            t_tag1,     // seen opening <
-            t_tag2,     // not comment
-            t_com1,     // seen !
-            t_com2,     // seen first -
-            t_com3,     // seen second -, looking for -->
-            t_com4,     // seen first -
-            t_com5,     // seen second -
-            t_disc,     // looking for closing >
-
-            // url recognizer states
-            u_init,
-            u_http,
-            u_sla,
-            u_url,
-
-            // url decoder states  %xx
-            d_init,
-            d_pcnt,
-            d_1,
-
-            // html entity decoder states &#nnn;
-            e_init,
-            e_amp,
-            e_num,
-
-            // mime decoder states =xx
-            m_init,
-            m_eq,
-            m_1,
-
-            // base64 decoder states
-            b_init,
-            b_lf,
-            b_lf2,
-            b_64,
-
-            // uuencoding decoder states
-            uu_init,
-            uu_lf,
-            uu_lf2,
-            uu_64,
-
-            // counter for number of columns in the table
-            end_state,
-
-            // temporary states
-            h_end,
-            t_bin,
-            t_end,
-            u_reco,
-            d_2,
-            e_semi,
-            m_2,
-            m_cr,
-            m_nl,
-            b_cr,
-            uu_cr
-           };
+static char* scanner_version="$Id$";
 
 typedef state PARSE[end_state];
 
@@ -1169,22 +1051,48 @@
     0,  // 0xff
 };
 
-#define PENDING_LIMIT 100
-struct fsa {
-    u_char      pending[PENDING_LIMIT];
-    int         count;
-    state       st;
-    state       init;
-    fsa         *next1;
-    fsa         *next2;
-    recorder    *memory;
 
-    fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_);
-    void push(u_char *buf, int len);
-    void pusher();
-    void error(char *err);
-};
+////////////////////////////////////////////////
+//
+//
+recorder::recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_) {
+    priv          = priv_;
+    html_tags     = &html_tags_;
+    tlds          = &tlds_;
+    bad_html_tags = 0;
+    binary_tags   = 0;
+}
+recorder::~recorder() {
+    empty();
+}
+void recorder::empty() {
+    bad_html_tags = 0;
+    binary_tags   = 0;
+    discard(hosts);
+}
+void recorder::new_url(char *host) {
+    register_string(hosts, host);
+}
+void recorder::binary() {
+    binary_tags++;
+}
+void recorder::new_tag(char *tag) {
+    string_set::iterator i = html_tags->find(tag);
+    if (i == html_tags->end()) {
+        bad_html_tags++;
+        if (debug_syslog && (bad_html_tags < 10)) {
+            // only log the first 10 bad tags
+            char buf[200];
+            snprintf(buf, sizeof(buf), "bad html tag %s", tag);
+            my_syslog(priv, buf);
+        }
+    }
+}
 
+
+////////////////////////////////////////////////
+//
+//
 fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) {
     count  = 0;
     st     = init_;
@@ -1447,21 +1355,10 @@
     }
 }
 
-struct url_scanner {
-    fsa *host_parser;
-    fsa *tags_parser;
-    fsa *urls_parser;
-    fsa *urld_parser;
-    fsa *html_parser;
-    fsa *mime_parser;
-    fsa *b64_parser;
-    fsa *uu_parser;
 
-    url_scanner(recorder *memory);
-    ~url_scanner();
-    void scan(u_char *buffer, size_t length);
-};
-
+////////////////////////////////////////////////
+//
+//
 url_scanner::url_scanner(recorder *memory) {
     host_parser = new fsa(h_init,  NULL,        NULL,        memory);
     tags_parser = new fsa(t_init,  host_parser, NULL,        memory);
--- a/src/tokenizer.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/tokenizer.cpp	Sun Jul 10 13:28:33 2005 -0700
@@ -8,7 +8,7 @@
 
 #include "context.h"
 
-static char* tokenizer_version="$Id:";
+static char* tokenizer_version="$Id$";
 
 enum state {s_init,
             s_token,
--- a/src/tokenizer.h	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/tokenizer.h	Sun Jul 10 13:28:33 2005 -0700
@@ -1,3 +1,6 @@
+#ifndef tokenizer_include
+#define tokenizer_include
+
 #include <fstream>
 #include <list>
 #include <set>
@@ -47,3 +50,4 @@
     void    token_error();
 };
 
+#endif