Mercurial > dnsbl
annotate src/scanner.h @ 283:7438f79afeb0
Added tag stable-6-0-38 for changeset e276180647ab
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Tue, 24 Dec 2013 08:22:01 -0800 |
parents | f92f24950bd3 |
children | f9165d9aa689 |
rev | line source |
---|---|
143 | 1 /* |
2 | |
152 | 3 Copyright (c) 2007 Carl Byington - 510 Software Group, released under |
4 the GPL version 3 or any later version at your choice available at | |
5 http://www.gnu.org/licenses/gpl-3.0.txt | |
143 | 6 |
7 */ | |
8 | |
74 | 9 #ifndef scanner_include |
10 #define scanner_include | |
11 | |
12 #include "dnsbl.h" | |
13 | |
14 //////////////////////////////////////////////// | |
15 // memory for the content scanner | |
16 // | |
17 class recorder | |
18 { | |
214
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
19 mlfiPriv *priv; // needed for syslog |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
20 string_set *html_tags; // valid tags |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
21 string_set *tlds; // valid tlds |
270
f92f24950bd3
Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents:
236
diff
changeset
|
22 string_set *tldwilds; // valid wildcard tlds |
f92f24950bd3
Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents:
236
diff
changeset
|
23 string_set *tldnots; // invalid tlds |
214
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
24 string_set hosts; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
25 size_t bad_html_tags; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
26 size_t binary_tags; |
74 | 27 |
28 public: | |
270
f92f24950bd3
Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents:
236
diff
changeset
|
29 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &tldwilds_, string_set &tldnots_); |
214
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
30 ~recorder() { empty(); }; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
31 void empty(); |
236
c0d2e99c0a1d
Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name
Carl Byington <carl@five-ten-sg.com>
parents:
214
diff
changeset
|
32 void new_url(const char *host); |
c0d2e99c0a1d
Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name
Carl Byington <carl@five-ten-sg.com>
parents:
214
diff
changeset
|
33 void new_tag(const char *tag); |
214
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
34 void binary(); |
236
c0d2e99c0a1d
Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name
Carl Byington <carl@five-ten-sg.com>
parents:
214
diff
changeset
|
35 void syslog(const char *buf) { my_syslog(priv, buf); }; |
214
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
36 mlfiPriv *get_priv() { return priv; }; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
37 string_set *get_tlds() { return tlds; }; |
270
f92f24950bd3
Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents:
236
diff
changeset
|
38 string_set *get_tldwilds() { return tldwilds; }; |
f92f24950bd3
Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents:
236
diff
changeset
|
39 string_set *get_tldnots() { return tldnots; }; |
214
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
40 string_set &get_hosts() { return hosts; }; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
41 bool excessive_bad_tags(size_t limit){ return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
42 bool excessive_hosts(size_t limit) { return (limit > 0) && (hosts.size() > limit); }; |
147 | 43 |
74 | 44 }; |
45 | |
46 | |
47 //////////////////////////////////////////////// | |
48 // the content scanner | |
49 // | |
75 | 50 class fsa; |
74 | 51 class url_scanner { |
214
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
52 fsa *host_parser; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
53 fsa *tags_parser; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
54 fsa *urls_parser; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
55 fsa *urld_parser; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
56 fsa *html_parser; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
57 fsa *mime_parser; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
58 fsa *b64_parser; |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
59 fsa *uu_parser; |
74 | 60 |
61 public: | |
214
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
62 url_scanner(recorder *memory); |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
63 ~url_scanner(); |
82886d4dd71f
Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents:
152
diff
changeset
|
64 void scan(u_char *buffer, size_t length); |
74 | 65 }; |
66 | |
67 #endif |