Mercurial > dnsbl
comparison src/scanner.h @ 74:b7449114ebb0
start coding on new config syntax
author | carl |
---|---|
date | Sun, 10 Jul 2005 14:19:00 -0700 |
parents | |
children | 1142e46be550 |
comparison
equal
deleted
inserted
replaced
73:2b369f7db7bf | 74:b7449114ebb0 |
---|---|
1 #ifndef scanner_include | |
2 #define scanner_include | |
3 | |
4 #include "dnsbl.h" | |
5 | |
6 //////////////////////////////////////////////// | |
7 // memory for the content scanner | |
8 // | |
9 class recorder | |
10 { | |
11 mlfiPriv *priv; // needed for syslog | |
12 string_set *html_tags; // valid tags | |
13 string_set *tlds; // valid tlds | |
14 string_set hosts; | |
15 int bad_html_tags; | |
16 int binary_tags; | |
17 | |
18 public: | |
19 recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_); | |
20 ~recorder(); | |
21 void empty(); | |
22 void new_url(char *host); | |
23 void new_tag(char *tag); | |
24 void binary(); | |
25 mlfiPriv *get_priv() {return priv; }; | |
26 string_set *get_tlds() {return tlds; }; | |
27 string_set &get_hosts() {return hosts; }; | |
28 bool excessive_bad_tags(int limit) {return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; | |
29 bool excessive_hosts(int limit) {return (limit > 0) && (hosts.size() > limit); }; | |
30 }; | |
31 | |
32 | |
33 //////////////////////////////////////////////// | |
34 // finite state machine | |
35 // | |
36 enum state {// host name recognizer states | |
37 h_init, | |
38 h_host, | |
39 | |
40 // html tag discarder states | |
41 t_init, | |
42 t_tag1, // seen opening < | |
43 t_tag2, // not comment | |
44 t_com1, // seen ! | |
45 t_com2, // seen first - | |
46 t_com3, // seen second -, looking for --> | |
47 t_com4, // seen first - | |
48 t_com5, // seen second - | |
49 t_disc, // looking for closing > | |
50 | |
51 // url recognizer states | |
52 u_init, | |
53 u_http, | |
54 u_sla, | |
55 u_url, | |
56 | |
57 // url decoder states %xx | |
58 d_init, | |
59 d_pcnt, | |
60 d_1, | |
61 | |
62 // html entity decoder states &#nnn; | |
63 e_init, | |
64 e_amp, | |
65 e_num, | |
66 | |
67 // mime decoder states =xx | |
68 m_init, | |
69 m_eq, | |
70 m_1, | |
71 | |
72 // base64 decoder states | |
73 b_init, | |
74 b_lf, | |
75 b_lf2, | |
76 b_64, | |
77 | |
78 // uuencoding decoder states | |
79 uu_init, | |
80 uu_lf, | |
81 uu_lf2, | |
82 uu_64, | |
83 | |
84 // counter for number of columns in the table | |
85 end_state, | |
86 | |
87 // temporary states | |
88 h_end, | |
89 t_bin, | |
90 t_end, | |
91 u_reco, | |
92 d_2, | |
93 e_semi, | |
94 m_2, | |
95 m_cr, | |
96 m_nl, | |
97 b_cr, | |
98 uu_cr | |
99 }; | |
100 | |
101 #define PENDING_LIMIT 100 | |
102 class fsa { | |
103 u_char pending[PENDING_LIMIT]; | |
104 int count; | |
105 state st; | |
106 state init; | |
107 fsa *next1; | |
108 fsa *next2; | |
109 recorder *memory; | |
110 | |
111 public: | |
112 fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); | |
113 void push(u_char *buf, int len); | |
114 void pusher(); | |
115 void error(char *err); | |
116 }; | |
117 | |
118 | |
119 //////////////////////////////////////////////// | |
120 // the content scanner | |
121 // | |
122 class url_scanner { | |
123 fsa *host_parser; | |
124 fsa *tags_parser; | |
125 fsa *urls_parser; | |
126 fsa *urld_parser; | |
127 fsa *html_parser; | |
128 fsa *mime_parser; | |
129 fsa *b64_parser; | |
130 fsa *uu_parser; | |
131 | |
132 public: | |
133 url_scanner(recorder *memory); | |
134 ~url_scanner(); | |
135 void scan(u_char *buffer, size_t length); | |
136 }; | |
137 | |
138 #endif |