diff src/scanner.cpp @ 73:2b369f7db7bf

start coding on new config syntax
author carl
date Sun, 10 Jul 2005 13:28:33 -0700
parents fb8afa205293
children b7449114ebb0
line wrap: on
line diff
--- a/src/scanner.cpp	Sun Jul 10 13:28:33 2005 -0700
+++ b/src/scanner.cpp	Sun Jul 10 13:28:33 2005 -0700
@@ -6,127 +6,9 @@
 
 */
 
-static char* scanner_version="$Id$";
-
-using namespace std;
-
-
-// object to record things we see in the body content
-struct recorder
-{
-    mlfiPriv    *priv;      // needed for syslog
-    string_set  *html_tags; // valid tags
-    string_set  *tlds;      // valid tlds
-    string_set  hosts;
-    int         bad_html_tags;
-    int         binary_tags;
-    recorder(mlfiPriv *priv_, string_set *html_tags_, string_set *tlds_);
-    ~recorder();
-    void empty();
-    void new_url(char *host);
-    void new_tag(char *tag);
-    void binary();
-};
-recorder::recorder(mlfiPriv *priv_, string_set *html_tags_, string_set *tlds_) {
-    priv          = priv_;
-    html_tags     = html_tags_;
-    tlds          = tlds_;
-    bad_html_tags = 0;
-    binary_tags   = 0;
-}
-recorder::~recorder() {
-    empty();
-}
-void recorder::empty() {
-    bad_html_tags = 0;
-    binary_tags   = 0;
-    discard(hosts);
-}
-void recorder::new_url(char *host) {
-    register_string(hosts, host);
-}
-void recorder::binary() {
-    binary_tags++;
-}
-void recorder::new_tag(char *tag) {
-    string_set::iterator i = html_tags->find(tag);
-    if (i == html_tags->end()) {
-        bad_html_tags++;
-        if (debug_syslog && (bad_html_tags < 10)) {
-            // only log the first 10 bad tags
-            char buf[200];
-            snprintf(buf, sizeof(buf), "bad html tag %s", tag);
-            my_syslog(priv, buf);
-        }
-    }
-}
-
-
-
-enum state {// host name recognizer states
-            h_init,
-            h_host,
+#include "includes.h"
 
-            // html tag discarder states
-            t_init,
-            t_tag1,     // seen opening <
-            t_tag2,     // not comment
-            t_com1,     // seen !
-            t_com2,     // seen first -
-            t_com3,     // seen second -, looking for -->
-            t_com4,     // seen first -
-            t_com5,     // seen second -
-            t_disc,     // looking for closing >
-
-            // url recognizer states
-            u_init,
-            u_http,
-            u_sla,
-            u_url,
-
-            // url decoder states  %xx
-            d_init,
-            d_pcnt,
-            d_1,
-
-            // html entity decoder states &#nnn;
-            e_init,
-            e_amp,
-            e_num,
-
-            // mime decoder states =xx
-            m_init,
-            m_eq,
-            m_1,
-
-            // base64 decoder states
-            b_init,
-            b_lf,
-            b_lf2,
-            b_64,
-
-            // uuencoding decoder states
-            uu_init,
-            uu_lf,
-            uu_lf2,
-            uu_64,
-
-            // counter for number of columns in the table
-            end_state,
-
-            // temporary states
-            h_end,
-            t_bin,
-            t_end,
-            u_reco,
-            d_2,
-            e_semi,
-            m_2,
-            m_cr,
-            m_nl,
-            b_cr,
-            uu_cr
-           };
+static char* scanner_version="$Id$";
 
 typedef state PARSE[end_state];
 
@@ -1169,22 +1051,48 @@
     0,  // 0xff
 };
 
-#define PENDING_LIMIT 100
-struct fsa {
-    u_char      pending[PENDING_LIMIT];
-    int         count;
-    state       st;
-    state       init;
-    fsa         *next1;
-    fsa         *next2;
-    recorder    *memory;
 
-    fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_);
-    void push(u_char *buf, int len);
-    void pusher();
-    void error(char *err);
-};
+////////////////////////////////////////////////
+//
+//
+recorder::recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_) {
+    priv          = priv_;
+    html_tags     = &html_tags_;
+    tlds          = &tlds_;
+    bad_html_tags = 0;
+    binary_tags   = 0;
+}
+recorder::~recorder() {
+    empty();
+}
+void recorder::empty() {
+    bad_html_tags = 0;
+    binary_tags   = 0;
+    discard(hosts);
+}
+void recorder::new_url(char *host) {
+    register_string(hosts, host);
+}
+void recorder::binary() {
+    binary_tags++;
+}
+void recorder::new_tag(char *tag) {
+    string_set::iterator i = html_tags->find(tag);
+    if (i == html_tags->end()) {
+        bad_html_tags++;
+        if (debug_syslog && (bad_html_tags < 10)) {
+            // only log the first 10 bad tags
+            char buf[200];
+            snprintf(buf, sizeof(buf), "bad html tag %s", tag);
+            my_syslog(priv, buf);
+        }
+    }
+}
 
+
+////////////////////////////////////////////////
+//
+//
 fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) {
     count  = 0;
     st     = init_;
@@ -1447,21 +1355,10 @@
     }
 }
 
-struct url_scanner {
-    fsa *host_parser;
-    fsa *tags_parser;
-    fsa *urls_parser;
-    fsa *urld_parser;
-    fsa *html_parser;
-    fsa *mime_parser;
-    fsa *b64_parser;
-    fsa *uu_parser;
 
-    url_scanner(recorder *memory);
-    ~url_scanner();
-    void scan(u_char *buffer, size_t length);
-};
-
+////////////////////////////////////////////////
+//
+//
 url_scanner::url_scanner(recorder *memory) {
     host_parser = new fsa(h_init,  NULL,        NULL,        memory);
     tags_parser = new fsa(t_init,  host_parser, NULL,        memory);