74
|
1 #ifndef scanner_include
|
|
2 #define scanner_include
|
|
3
|
|
4 #include "dnsbl.h"
|
|
5
|
|
6 ////////////////////////////////////////////////
|
|
7 // memory for the content scanner
|
|
8 //
|
|
9 class recorder
|
|
10 {
|
117
|
11 mlfiPriv *priv; // needed for syslog
|
|
12 string_set *html_tags; // valid tags
|
|
13 string_set *tlds; // valid tlds
|
|
14 string_set *cctlds; // valid cctlds
|
|
15 string_set hosts;
|
|
16 int bad_html_tags;
|
|
17 int binary_tags;
|
74
|
18
|
|
19 public:
|
117
|
20 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &cctlds_);
|
|
21 ~recorder() { empty(); };
|
|
22 void empty();
|
|
23 void new_url(char *host);
|
|
24 void new_tag(char *tag);
|
|
25 void binary();
|
|
26 mlfiPriv *get_priv() { return priv; };
|
|
27 string_set *get_cctlds() { return cctlds; };
|
|
28 string_set *get_tlds() { return tlds; };
|
|
29 string_set &get_hosts() { return hosts; };
|
|
30 bool excessive_bad_tags(int limit) { return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
|
|
31 bool excessive_hosts(int limit) { return (limit > 0) && (hosts.size() > limit); };
|
74
|
32 };
|
|
33
|
|
34
|
|
35 ////////////////////////////////////////////////
|
|
36 // the content scanner
|
|
37 //
|
75
|
38 class fsa;
|
74
|
39 class url_scanner {
|
117
|
40 fsa *host_parser;
|
|
41 fsa *tags_parser;
|
|
42 fsa *urls_parser;
|
|
43 fsa *urld_parser;
|
|
44 fsa *html_parser;
|
|
45 fsa *mime_parser;
|
|
46 fsa *b64_parser;
|
|
47 fsa *uu_parser;
|
74
|
48
|
|
49 public:
|
117
|
50 url_scanner(recorder *memory);
|
|
51 ~url_scanner();
|
|
52 void scan(u_char *buffer, size_t length);
|
74
|
53 };
|
|
54
|
|
55 #endif
|