Mercurial > dnsbl
diff src/scanner.cpp @ 73:2b369f7db7bf
start coding on new config syntax
author | carl |
---|---|
date | Sun, 10 Jul 2005 13:28:33 -0700 |
parents | fb8afa205293 |
children | b7449114ebb0 |
line wrap: on
line diff
--- a/src/scanner.cpp Sun Jul 10 13:28:33 2005 -0700 +++ b/src/scanner.cpp Sun Jul 10 13:28:33 2005 -0700 @@ -6,127 +6,9 @@ */ -static char* scanner_version="$Id$"; - -using namespace std; - - -// object to record things we see in the body content -struct recorder -{ - mlfiPriv *priv; // needed for syslog - string_set *html_tags; // valid tags - string_set *tlds; // valid tlds - string_set hosts; - int bad_html_tags; - int binary_tags; - recorder(mlfiPriv *priv_, string_set *html_tags_, string_set *tlds_); - ~recorder(); - void empty(); - void new_url(char *host); - void new_tag(char *tag); - void binary(); -}; -recorder::recorder(mlfiPriv *priv_, string_set *html_tags_, string_set *tlds_) { - priv = priv_; - html_tags = html_tags_; - tlds = tlds_; - bad_html_tags = 0; - binary_tags = 0; -} -recorder::~recorder() { - empty(); -} -void recorder::empty() { - bad_html_tags = 0; - binary_tags = 0; - discard(hosts); -} -void recorder::new_url(char *host) { - register_string(hosts, host); -} -void recorder::binary() { - binary_tags++; -} -void recorder::new_tag(char *tag) { - string_set::iterator i = html_tags->find(tag); - if (i == html_tags->end()) { - bad_html_tags++; - if (debug_syslog && (bad_html_tags < 10)) { - // only log the first 10 bad tags - char buf[200]; - snprintf(buf, sizeof(buf), "bad html tag %s", tag); - my_syslog(priv, buf); - } - } -} - - - -enum state {// host name recognizer states - h_init, - h_host, +#include "includes.h" - // html tag discarder states - t_init, - t_tag1, // seen opening < - t_tag2, // not comment - t_com1, // seen ! - t_com2, // seen first - - t_com3, // seen second -, looking for --> - t_com4, // seen first - - t_com5, // seen second - - t_disc, // looking for closing > - - // url recognizer states - u_init, - u_http, - u_sla, - u_url, - - // url decoder states %xx - d_init, - d_pcnt, - d_1, - - // html entity decoder states &#nnn; - e_init, - e_amp, - e_num, - - // mime decoder states =xx - m_init, - m_eq, - m_1, - - // base64 decoder states - b_init, - b_lf, - b_lf2, - b_64, - - // uuencoding decoder states - uu_init, - uu_lf, - uu_lf2, - uu_64, - - // counter for number of columns in the table - end_state, - - // temporary states - h_end, - t_bin, - t_end, - u_reco, - d_2, - e_semi, - m_2, - m_cr, - m_nl, - b_cr, - uu_cr - }; +static char* scanner_version="$Id$"; typedef state PARSE[end_state]; @@ -1169,22 +1051,48 @@ 0, // 0xff }; -#define PENDING_LIMIT 100 -struct fsa { - u_char pending[PENDING_LIMIT]; - int count; - state st; - state init; - fsa *next1; - fsa *next2; - recorder *memory; - fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); - void push(u_char *buf, int len); - void pusher(); - void error(char *err); -}; +//////////////////////////////////////////////// +// +// +recorder::recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_) { + priv = priv_; + html_tags = &html_tags_; + tlds = &tlds_; + bad_html_tags = 0; + binary_tags = 0; +} +recorder::~recorder() { + empty(); +} +void recorder::empty() { + bad_html_tags = 0; + binary_tags = 0; + discard(hosts); +} +void recorder::new_url(char *host) { + register_string(hosts, host); +} +void recorder::binary() { + binary_tags++; +} +void recorder::new_tag(char *tag) { + string_set::iterator i = html_tags->find(tag); + if (i == html_tags->end()) { + bad_html_tags++; + if (debug_syslog && (bad_html_tags < 10)) { + // only log the first 10 bad tags + char buf[200]; + snprintf(buf, sizeof(buf), "bad html tag %s", tag); + my_syslog(priv, buf); + } + } +} + +//////////////////////////////////////////////// +// +// fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) { count = 0; st = init_; @@ -1447,21 +1355,10 @@ } } -struct url_scanner { - fsa *host_parser; - fsa *tags_parser; - fsa *urls_parser; - fsa *urld_parser; - fsa *html_parser; - fsa *mime_parser; - fsa *b64_parser; - fsa *uu_parser; - url_scanner(recorder *memory); - ~url_scanner(); - void scan(u_char *buffer, size_t length); -}; - +//////////////////////////////////////////////// +// +// url_scanner::url_scanner(recorder *memory) { host_parser = new fsa(h_init, NULL, NULL, memory); tags_parser = new fsa(t_init, host_parser, NULL, memory);