Mercurial > dnsbl
diff src/scanner.h @ 74:b7449114ebb0
start coding on new config syntax
author | carl |
---|---|
date | Sun, 10 Jul 2005 14:19:00 -0700 |
parents | |
children | 1142e46be550 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/scanner.h Sun Jul 10 14:19:00 2005 -0700 @@ -0,0 +1,138 @@ +#ifndef scanner_include +#define scanner_include + +#include "dnsbl.h" + +//////////////////////////////////////////////// +// memory for the content scanner +// +class recorder +{ + mlfiPriv *priv; // needed for syslog + string_set *html_tags; // valid tags + string_set *tlds; // valid tlds + string_set hosts; + int bad_html_tags; + int binary_tags; + +public: + recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_); + ~recorder(); + void empty(); + void new_url(char *host); + void new_tag(char *tag); + void binary(); + mlfiPriv *get_priv() {return priv; }; + string_set *get_tlds() {return tlds; }; + string_set &get_hosts() {return hosts; }; + bool excessive_bad_tags(int limit) {return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; + bool excessive_hosts(int limit) {return (limit > 0) && (hosts.size() > limit); }; +}; + + +//////////////////////////////////////////////// +// finite state machine +// +enum state {// host name recognizer states + h_init, + h_host, + + // html tag discarder states + t_init, + t_tag1, // seen opening < + t_tag2, // not comment + t_com1, // seen ! + t_com2, // seen first - + t_com3, // seen second -, looking for --> + t_com4, // seen first - + t_com5, // seen second - + t_disc, // looking for closing > + + // url recognizer states + u_init, + u_http, + u_sla, + u_url, + + // url decoder states %xx + d_init, + d_pcnt, + d_1, + + // html entity decoder states &#nnn; + e_init, + e_amp, + e_num, + + // mime decoder states =xx + m_init, + m_eq, + m_1, + + // base64 decoder states + b_init, + b_lf, + b_lf2, + b_64, + + // uuencoding decoder states + uu_init, + uu_lf, + uu_lf2, + uu_64, + + // counter for number of columns in the table + end_state, + + // temporary states + h_end, + t_bin, + t_end, + u_reco, + d_2, + e_semi, + m_2, + m_cr, + m_nl, + b_cr, + uu_cr + }; + +#define PENDING_LIMIT 100 +class fsa { + u_char pending[PENDING_LIMIT]; + int count; + state st; + state init; + fsa *next1; + fsa *next2; + recorder *memory; + +public: + fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); + void push(u_char *buf, int len); + void pusher(); + void error(char *err); +}; + + +//////////////////////////////////////////////// +// the content scanner +// +class url_scanner { + fsa *host_parser; + fsa *tags_parser; + fsa *urls_parser; + fsa *urld_parser; + fsa *html_parser; + fsa *mime_parser; + fsa *b64_parser; + fsa *uu_parser; + +public: + url_scanner(recorder *memory); + ~url_scanner(); + void scan(u_char *buffer, size_t length); +}; + +#endif