Mercurial > dnsbl
view src/scanner.h @ 296:05b604c99e06 stable-6-0-43
allow broken SRS0+ rather than the correct SRS0= tag
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Fri, 18 Jul 2014 15:08:53 -0700 |
parents | f92f24950bd3 |
children | f9165d9aa689 |
line wrap: on
line source
/* Copyright (c) 2007 Carl Byington - 510 Software Group, released under the GPL version 3 or any later version at your choice available at http://www.gnu.org/licenses/gpl-3.0.txt */ #ifndef scanner_include #define scanner_include #include "dnsbl.h" //////////////////////////////////////////////// // memory for the content scanner // class recorder { mlfiPriv *priv; // needed for syslog string_set *html_tags; // valid tags string_set *tlds; // valid tlds string_set *tldwilds; // valid wildcard tlds string_set *tldnots; // invalid tlds string_set hosts; size_t bad_html_tags; size_t binary_tags; public: recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &tldwilds_, string_set &tldnots_); ~recorder() { empty(); }; void empty(); void new_url(const char *host); void new_tag(const char *tag); void binary(); void syslog(const char *buf) { my_syslog(priv, buf); }; mlfiPriv *get_priv() { return priv; }; string_set *get_tlds() { return tlds; }; string_set *get_tldwilds() { return tldwilds; }; string_set *get_tldnots() { return tldnots; }; string_set &get_hosts() { return hosts; }; bool excessive_bad_tags(size_t limit){ return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; bool excessive_hosts(size_t limit) { return (limit > 0) && (hosts.size() > limit); }; }; //////////////////////////////////////////////// // the content scanner // class fsa; class url_scanner { fsa *host_parser; fsa *tags_parser; fsa *urls_parser; fsa *urld_parser; fsa *html_parser; fsa *mime_parser; fsa *b64_parser; fsa *uu_parser; public: url_scanner(recorder *memory); ~url_scanner(); void scan(u_char *buffer, size_t length); }; #endif