annotate src/scanner.h @ 98:91c27c00048f

tokenizer errors now go thru syslog to be visible during config file reloads in normal operation
author carl
date Thu, 22 Sep 2005 21:57:08 -0700
parents 81f1e400e8ab
children aa07452e641b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
1 #ifndef scanner_include
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
2 #define scanner_include
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
3
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
4 #include "dnsbl.h"
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
5
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
6 ////////////////////////////////////////////////
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
7 // memory for the content scanner
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
8 //
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
9 class recorder
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
10 {
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
11 mlfiPriv *priv; // needed for syslog
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
12 string_set *html_tags; // valid tags
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
13 string_set *tlds; // valid tlds
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
14 string_set hosts;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
15 int bad_html_tags;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
16 int binary_tags;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
17
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
18 public:
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
19 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_);
76
81f1e400e8ab start coding on new config syntax
carl
parents: 75
diff changeset
20 ~recorder() { empty(); };
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
21 void empty();
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
22 void new_url(char *host);
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
23 void new_tag(char *tag);
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
24 void binary();
76
81f1e400e8ab start coding on new config syntax
carl
parents: 75
diff changeset
25 mlfiPriv *get_priv() { return priv; };
81f1e400e8ab start coding on new config syntax
carl
parents: 75
diff changeset
26 string_set *get_tlds() { return tlds; };
81f1e400e8ab start coding on new config syntax
carl
parents: 75
diff changeset
27 string_set &get_hosts() { return hosts; };
81f1e400e8ab start coding on new config syntax
carl
parents: 75
diff changeset
28 bool excessive_bad_tags(int limit) { return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
81f1e400e8ab start coding on new config syntax
carl
parents: 75
diff changeset
29 bool excessive_hosts(int limit) { return (limit > 0) && (hosts.size() > limit); };
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
30 };
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
31
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
32
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
33 ////////////////////////////////////////////////
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
34 // the content scanner
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
35 //
75
1142e46be550 start coding on new config syntax
carl
parents: 74
diff changeset
36 class fsa;
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
37 class url_scanner {
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
38 fsa *host_parser;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
39 fsa *tags_parser;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
40 fsa *urls_parser;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
41 fsa *urld_parser;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
42 fsa *html_parser;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
43 fsa *mime_parser;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
44 fsa *b64_parser;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
45 fsa *uu_parser;
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
46
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
47 public:
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
48 url_scanner(recorder *memory);
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
49 ~url_scanner();
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
50 void scan(u_char *buffer, size_t length);
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
51 };
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
52
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
53 #endif