annotate src/scanner.h @ 302:52430ef70440 stable-6-0-44

Generic regex now matches against the reverse dns PTR value
author Carl Byington <carl@five-ten-sg.com>
date Mon, 13 Oct 2014 20:50:26 -0700 (2014-10-14)
parents f92f24950bd3
children f9165d9aa689
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
143
ecb40aa3eaa5 require two periods for ip addresses
carl
parents: 117
diff changeset
1 /*
ecb40aa3eaa5 require two periods for ip addresses
carl
parents: 117
diff changeset
2
152
c7fc218686f5 gpl3, block mail to recipients that cannot reply
carl
parents: 147
diff changeset
3 Copyright (c) 2007 Carl Byington - 510 Software Group, released under
c7fc218686f5 gpl3, block mail to recipients that cannot reply
carl
parents: 147
diff changeset
4 the GPL version 3 or any later version at your choice available at
c7fc218686f5 gpl3, block mail to recipients that cannot reply
carl
parents: 147
diff changeset
5 http://www.gnu.org/licenses/gpl-3.0.txt
143
ecb40aa3eaa5 require two periods for ip addresses
carl
parents: 117
diff changeset
6
ecb40aa3eaa5 require two periods for ip addresses
carl
parents: 117
diff changeset
7 */
ecb40aa3eaa5 require two periods for ip addresses
carl
parents: 117
diff changeset
8
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
9 #ifndef scanner_include
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
10 #define scanner_include
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
11
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
12 #include "dnsbl.h"
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
13
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
14 ////////////////////////////////////////////////
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
15 // memory for the content scanner
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
16 //
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
17 class recorder
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
18 {
214
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
19 mlfiPriv *priv; // needed for syslog
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
20 string_set *html_tags; // valid tags
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
21 string_set *tlds; // valid tlds
270
f92f24950bd3 Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents: 236
diff changeset
22 string_set *tldwilds; // valid wildcard tlds
f92f24950bd3 Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents: 236
diff changeset
23 string_set *tldnots; // invalid tlds
214
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
24 string_set hosts;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
25 size_t bad_html_tags;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
26 size_t binary_tags;
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
27
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
28 public:
270
f92f24950bd3 Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents: 236
diff changeset
29 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &tldwilds_, string_set &tldnots_);
214
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
30 ~recorder() { empty(); };
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
31 void empty();
236
c0d2e99c0a1d Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name
Carl Byington <carl@five-ten-sg.com>
parents: 214
diff changeset
32 void new_url(const char *host);
c0d2e99c0a1d Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name
Carl Byington <carl@five-ten-sg.com>
parents: 214
diff changeset
33 void new_tag(const char *tag);
214
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
34 void binary();
236
c0d2e99c0a1d Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name
Carl Byington <carl@five-ten-sg.com>
parents: 214
diff changeset
35 void syslog(const char *buf) { my_syslog(priv, buf); };
214
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
36 mlfiPriv *get_priv() { return priv; };
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
37 string_set *get_tlds() { return tlds; };
270
f92f24950bd3 Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents: 236
diff changeset
38 string_set *get_tldwilds() { return tldwilds; };
f92f24950bd3 Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
Carl Byington <carl@five-ten-sg.com>
parents: 236
diff changeset
39 string_set *get_tldnots() { return tldnots; };
214
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
40 string_set &get_hosts() { return hosts; };
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
41 bool excessive_bad_tags(size_t limit){ return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
42 bool excessive_hosts(size_t limit) { return (limit > 0) && (hosts.size() > limit); };
147
812c80305f26 fix 5.23 bug and add fsa debug logging
carl
parents: 143
diff changeset
43
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
44 };
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
45
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
46
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
47 ////////////////////////////////////////////////
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
48 // the content scanner
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
49 //
75
1142e46be550 start coding on new config syntax
carl
parents: 74
diff changeset
50 class fsa;
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
51 class url_scanner {
214
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
52 fsa *host_parser;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
53 fsa *tags_parser;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
54 fsa *urls_parser;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
55 fsa *urld_parser;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
56 fsa *html_parser;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
57 fsa *mime_parser;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
58 fsa *b64_parser;
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
59 fsa *uu_parser;
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
60
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
61 public:
214
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
62 url_scanner(recorder *memory);
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
63 ~url_scanner();
82886d4dd71f Fixes to compile on Fedora 9 and for const correctness.
Carl Byington <carl@five-ten-sg.com>
parents: 152
diff changeset
64 void scan(u_char *buffer, size_t length);
74
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
65 };
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
66
b7449114ebb0 start coding on new config syntax
carl
parents:
diff changeset
67 #endif