Mercurial > dnsbl
comparison src/scanner.h @ 117:aa07452e641b
uribl patch from Jeff Evans <jeffe@tricab.com>
author | carl |
---|---|
date | Sun, 12 Mar 2006 10:15:39 -0800 |
parents | 81f1e400e8ab |
children | ecb40aa3eaa5 |
comparison
equal
deleted
inserted
replaced
116:0094678a16d0 | 117:aa07452e641b |
---|---|
6 //////////////////////////////////////////////// | 6 //////////////////////////////////////////////// |
7 // memory for the content scanner | 7 // memory for the content scanner |
8 // | 8 // |
9 class recorder | 9 class recorder |
10 { | 10 { |
11 mlfiPriv *priv; // needed for syslog | 11 mlfiPriv *priv; // needed for syslog |
12 string_set *html_tags; // valid tags | 12 string_set *html_tags; // valid tags |
13 string_set *tlds; // valid tlds | 13 string_set *tlds; // valid tlds |
14 string_set hosts; | 14 string_set *cctlds; // valid cctlds |
15 int bad_html_tags; | 15 string_set hosts; |
16 int binary_tags; | 16 int bad_html_tags; |
17 int binary_tags; | |
17 | 18 |
18 public: | 19 public: |
19 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_); | 20 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &cctlds_); |
20 ~recorder() { empty(); }; | 21 ~recorder() { empty(); }; |
21 void empty(); | 22 void empty(); |
22 void new_url(char *host); | 23 void new_url(char *host); |
23 void new_tag(char *tag); | 24 void new_tag(char *tag); |
24 void binary(); | 25 void binary(); |
25 mlfiPriv *get_priv() { return priv; }; | 26 mlfiPriv *get_priv() { return priv; }; |
26 string_set *get_tlds() { return tlds; }; | 27 string_set *get_cctlds() { return cctlds; }; |
27 string_set &get_hosts() { return hosts; }; | 28 string_set *get_tlds() { return tlds; }; |
28 bool excessive_bad_tags(int limit) { return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; | 29 string_set &get_hosts() { return hosts; }; |
29 bool excessive_hosts(int limit) { return (limit > 0) && (hosts.size() > limit); }; | 30 bool excessive_bad_tags(int limit) { return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; |
31 bool excessive_hosts(int limit) { return (limit > 0) && (hosts.size() > limit); }; | |
30 }; | 32 }; |
31 | 33 |
32 | 34 |
33 //////////////////////////////////////////////// | 35 //////////////////////////////////////////////// |
34 // the content scanner | 36 // the content scanner |
35 // | 37 // |
36 class fsa; | 38 class fsa; |
37 class url_scanner { | 39 class url_scanner { |
38 fsa *host_parser; | 40 fsa *host_parser; |
39 fsa *tags_parser; | 41 fsa *tags_parser; |
40 fsa *urls_parser; | 42 fsa *urls_parser; |
41 fsa *urld_parser; | 43 fsa *urld_parser; |
42 fsa *html_parser; | 44 fsa *html_parser; |
43 fsa *mime_parser; | 45 fsa *mime_parser; |
44 fsa *b64_parser; | 46 fsa *b64_parser; |
45 fsa *uu_parser; | 47 fsa *uu_parser; |
46 | 48 |
47 public: | 49 public: |
48 url_scanner(recorder *memory); | 50 url_scanner(recorder *memory); |
49 ~url_scanner(); | 51 ~url_scanner(); |
50 void scan(u_char *buffer, size_t length); | 52 void scan(u_char *buffer, size_t length); |
51 }; | 53 }; |
52 | 54 |
53 #endif | 55 #endif |