74
|
1 #ifndef scanner_include
|
|
2 #define scanner_include
|
|
3
|
|
4 #include "dnsbl.h"
|
|
5
|
|
6 ////////////////////////////////////////////////
|
|
7 // memory for the content scanner
|
|
8 //
|
|
9 class recorder
|
|
10 {
|
|
11 mlfiPriv *priv; // needed for syslog
|
|
12 string_set *html_tags; // valid tags
|
|
13 string_set *tlds; // valid tlds
|
|
14 string_set hosts;
|
|
15 int bad_html_tags;
|
|
16 int binary_tags;
|
|
17
|
|
18 public:
|
|
19 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_);
|
76
|
20 ~recorder() { empty(); };
|
74
|
21 void empty();
|
|
22 void new_url(char *host);
|
|
23 void new_tag(char *tag);
|
|
24 void binary();
|
76
|
25 mlfiPriv *get_priv() { return priv; };
|
|
26 string_set *get_tlds() { return tlds; };
|
|
27 string_set &get_hosts() { return hosts; };
|
|
28 bool excessive_bad_tags(int limit) { return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
|
|
29 bool excessive_hosts(int limit) { return (limit > 0) && (hosts.size() > limit); };
|
74
|
30 };
|
|
31
|
|
32
|
|
33 ////////////////////////////////////////////////
|
|
34 // the content scanner
|
|
35 //
|
75
|
36 class fsa;
|
74
|
37 class url_scanner {
|
|
38 fsa *host_parser;
|
|
39 fsa *tags_parser;
|
|
40 fsa *urls_parser;
|
|
41 fsa *urld_parser;
|
|
42 fsa *html_parser;
|
|
43 fsa *mime_parser;
|
|
44 fsa *b64_parser;
|
|
45 fsa *uu_parser;
|
|
46
|
|
47 public:
|
|
48 url_scanner(recorder *memory);
|
|
49 ~url_scanner();
|
|
50 void scan(u_char *buffer, size_t length);
|
|
51 };
|
|
52
|
|
53 #endif
|