143
|
1 /*
|
|
2
|
152
|
3 Copyright (c) 2007 Carl Byington - 510 Software Group, released under
|
|
4 the GPL version 3 or any later version at your choice available at
|
|
5 http://www.gnu.org/licenses/gpl-3.0.txt
|
143
|
6
|
|
7 */
|
|
8
|
74
|
9 #ifndef scanner_include
|
|
10 #define scanner_include
|
|
11
|
|
12 #include "dnsbl.h"
|
|
13
|
|
14 ////////////////////////////////////////////////
|
|
15 // memory for the content scanner
|
|
16 //
|
|
17 class recorder
|
|
18 {
|
117
|
19 mlfiPriv *priv; // needed for syslog
|
|
20 string_set *html_tags; // valid tags
|
|
21 string_set *tlds; // valid tlds
|
147
|
22 string_set *cctlds; // valid cctlds
|
117
|
23 string_set hosts;
|
|
24 int bad_html_tags;
|
|
25 int binary_tags;
|
74
|
26
|
|
27 public:
|
117
|
28 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &cctlds_);
|
|
29 ~recorder() { empty(); };
|
|
30 void empty();
|
|
31 void new_url(char *host);
|
|
32 void new_tag(char *tag);
|
|
33 void binary();
|
147
|
34 void syslog(char *buf) { my_syslog(priv, buf); };
|
117
|
35 mlfiPriv *get_priv() { return priv; };
|
|
36 string_set *get_cctlds() { return cctlds; };
|
|
37 string_set *get_tlds() { return tlds; };
|
|
38 string_set &get_hosts() { return hosts; };
|
|
39 bool excessive_bad_tags(int limit) { return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
|
|
40 bool excessive_hosts(int limit) { return (limit > 0) && (hosts.size() > limit); };
|
147
|
41
|
74
|
42 };
|
|
43
|
|
44
|
|
45 ////////////////////////////////////////////////
|
|
46 // the content scanner
|
|
47 //
|
75
|
48 class fsa;
|
74
|
49 class url_scanner {
|
117
|
50 fsa *host_parser;
|
|
51 fsa *tags_parser;
|
|
52 fsa *urls_parser;
|
|
53 fsa *urld_parser;
|
|
54 fsa *html_parser;
|
|
55 fsa *mime_parser;
|
|
56 fsa *b64_parser;
|
|
57 fsa *uu_parser;
|
74
|
58
|
|
59 public:
|
117
|
60 url_scanner(recorder *memory);
|
|
61 ~url_scanner();
|
|
62 void scan(u_char *buffer, size_t length);
|
74
|
63 };
|
|
64
|
|
65 #endif
|