comparison src/scanner.h @ 270:f92f24950bd3 stable-6-0-35

Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
author Carl Byington <carl@five-ten-sg.com>
date Mon, 09 Sep 2013 15:15:53 -0700
parents c0d2e99c0a1d
children f9165d9aa689
comparison
equal deleted inserted replaced
269:6d2a11f0ae41 270:f92f24950bd3
17 class recorder 17 class recorder
18 { 18 {
19 mlfiPriv *priv; // needed for syslog 19 mlfiPriv *priv; // needed for syslog
20 string_set *html_tags; // valid tags 20 string_set *html_tags; // valid tags
21 string_set *tlds; // valid tlds 21 string_set *tlds; // valid tlds
22 string_set *cctlds; // valid cctlds 22 string_set *tldwilds; // valid wildcard tlds
23 string_set *tldnots; // invalid tlds
23 string_set hosts; 24 string_set hosts;
24 size_t bad_html_tags; 25 size_t bad_html_tags;
25 size_t binary_tags; 26 size_t binary_tags;
26 27
27 public: 28 public:
28 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &cctlds_); 29 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &tldwilds_, string_set &tldnots_);
29 ~recorder() { empty(); }; 30 ~recorder() { empty(); };
30 void empty(); 31 void empty();
31 void new_url(const char *host); 32 void new_url(const char *host);
32 void new_tag(const char *tag); 33 void new_tag(const char *tag);
33 void binary(); 34 void binary();
34 void syslog(const char *buf) { my_syslog(priv, buf); }; 35 void syslog(const char *buf) { my_syslog(priv, buf); };
35 mlfiPriv *get_priv() { return priv; }; 36 mlfiPriv *get_priv() { return priv; };
36 string_set *get_cctlds() { return cctlds; };
37 string_set *get_tlds() { return tlds; }; 37 string_set *get_tlds() { return tlds; };
38 string_set *get_tldwilds() { return tldwilds; };
39 string_set *get_tldnots() { return tldnots; };
38 string_set &get_hosts() { return hosts; }; 40 string_set &get_hosts() { return hosts; };
39 bool excessive_bad_tags(size_t limit){ return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; 41 bool excessive_bad_tags(size_t limit){ return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
40 bool excessive_hosts(size_t limit) { return (limit > 0) && (hosts.size() > limit); }; 42 bool excessive_hosts(size_t limit) { return (limit > 0) && (hosts.size() > limit); };
41 43
42 }; 44 };