Mercurial > dnsbl
view src/scanner.h @ 74:b7449114ebb0
start coding on new config syntax
author | carl |
---|---|
date | Sun, 10 Jul 2005 14:19:00 -0700 |
parents | |
children | 1142e46be550 |
line wrap: on
line source
#ifndef scanner_include #define scanner_include #include "dnsbl.h" //////////////////////////////////////////////// // memory for the content scanner // class recorder { mlfiPriv *priv; // needed for syslog string_set *html_tags; // valid tags string_set *tlds; // valid tlds string_set hosts; int bad_html_tags; int binary_tags; public: recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_); ~recorder(); void empty(); void new_url(char *host); void new_tag(char *tag); void binary(); mlfiPriv *get_priv() {return priv; }; string_set *get_tlds() {return tlds; }; string_set &get_hosts() {return hosts; }; bool excessive_bad_tags(int limit) {return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; bool excessive_hosts(int limit) {return (limit > 0) && (hosts.size() > limit); }; }; //////////////////////////////////////////////// // finite state machine // enum state {// host name recognizer states h_init, h_host, // html tag discarder states t_init, t_tag1, // seen opening < t_tag2, // not comment t_com1, // seen ! t_com2, // seen first - t_com3, // seen second -, looking for --> t_com4, // seen first - t_com5, // seen second - t_disc, // looking for closing > // url recognizer states u_init, u_http, u_sla, u_url, // url decoder states %xx d_init, d_pcnt, d_1, // html entity decoder states &#nnn; e_init, e_amp, e_num, // mime decoder states =xx m_init, m_eq, m_1, // base64 decoder states b_init, b_lf, b_lf2, b_64, // uuencoding decoder states uu_init, uu_lf, uu_lf2, uu_64, // counter for number of columns in the table end_state, // temporary states h_end, t_bin, t_end, u_reco, d_2, e_semi, m_2, m_cr, m_nl, b_cr, uu_cr }; #define PENDING_LIMIT 100 class fsa { u_char pending[PENDING_LIMIT]; int count; state st; state init; fsa *next1; fsa *next2; recorder *memory; public: fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); void push(u_char *buf, int len); void pusher(); void error(char *err); }; //////////////////////////////////////////////// // the content scanner // class url_scanner { fsa *host_parser; fsa *tags_parser; fsa *urls_parser; fsa *urld_parser; fsa *html_parser; fsa *mime_parser; fsa *b64_parser; fsa *uu_parser; public: url_scanner(recorder *memory); ~url_scanner(); void scan(u_char *buffer, size_t length); }; #endif