Mercurial > dnsbl
annotate src/scanner.cpp @ 27:43a4f6b3e668 stable-2-3
add configurable host name limit and bad html tag limits.
author | carl |
---|---|
date | Sat, 22 May 2004 22:30:45 -0700 |
parents | 6176e7b2e8af |
children | 33e1e3910506 |
rev | line source |
---|---|
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1 /* |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
2 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
3 Copyright (c) 2004 Carl Byington - 510 Software Group, released under |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
4 the GPL version 2 or any later version at your choice available at |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
5 http://www.fsf.org/licenses/gpl.txt |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
6 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
7 */ |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
8 |
8 | 9 static char* scanner_version="$Id$"; |
6 | 10 |
11 using namespace std; | |
12 | |
24 | 13 |
14 // object to record things we see in the body content | |
15 struct recorder | |
16 { | |
17 string_set *html_tags; // valid tags | |
18 string_set hosts; | |
19 int bad_html_tags; | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
20 int binary_tags; |
24 | 21 recorder(string_set *html_tags_); |
22 ~recorder(); | |
23 void empty(); | |
24 void new_url(char *host); | |
25 void new_tag(char *tag); | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
26 void binary(); |
24 | 27 }; |
28 recorder::recorder(string_set *html_tags_) { | |
29 html_tags = html_tags_; | |
30 bad_html_tags = 0; | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
31 binary_tags = 0; |
24 | 32 } |
33 recorder::~recorder() { | |
34 empty(); | |
35 } | |
36 void recorder::empty() { | |
37 bad_html_tags = 0; | |
38 discard(hosts); | |
39 } | |
40 void recorder::new_url(char *host) { | |
41 register_string(hosts, host); | |
42 } | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
43 void recorder::binary() { |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
44 binary_tags++; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
45 } |
24 | 46 void recorder::new_tag(char *tag) { |
47 string_set::iterator i = html_tags->find(tag); | |
48 if (i == html_tags->end()) { | |
49 bad_html_tags++; | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
50 if (debug_syslog && (bad_html_tags < 10)) { |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
51 // only log the first 10 bad tags |
24 | 52 char buf[200]; |
53 snprintf(buf, sizeof(buf), "bad html tag %s", tag); | |
54 my_syslog(buf); | |
55 } | |
56 } | |
57 } | |
58 | |
59 | |
60 | |
16 | 61 enum state {// host name recognizer states |
62 h_init, | |
63 h_host, | |
64 | |
65 // html tag discarder states | |
66 t_init, | |
24 | 67 t_tag, |
16 | 68 t_disc, |
69 | |
70 // url recognizer states | |
7 | 71 u_init, |
72 u_http, | |
73 u_sla, | |
74 u_url, | |
6 | 75 |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
76 // url decoder states %xx |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
77 d_init, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
78 d_pcnt, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
79 d_1, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
80 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
81 // html entity decoder states &#nnn; |
7 | 82 e_init, |
83 e_amp, | |
84 e_num, | |
85 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
86 // mime decoder states =xx |
7 | 87 m_init, |
6 | 88 m_eq, |
89 m_1, | |
90 | |
7 | 91 // base64 decoder states |
92 b_init, | |
6 | 93 b_lf, |
94 b_lf2, | |
95 b_64, | |
96 | |
7 | 97 // counter for number of columns in the table |
98 end_state, | |
6 | 99 |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
100 // temporary states |
16 | 101 h_end, |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
102 t_bin, |
16 | 103 t_end, |
7 | 104 u_reco, |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
105 d_2, |
7 | 106 e_semi, |
107 m_2, | |
6 | 108 m_cr, |
109 m_nl, | |
110 b_cr, | |
111 }; | |
112 | |
113 typedef state PARSE[end_state]; | |
114 | |
115 static PARSE parse_table[256] = { | |
24 | 116 // h_init, h_host, t_init, t_tag, t_disc, u_init, u_http, u_sla , u_url, d_init, d_pcnt, d_1, e_init, e_amp, e_num, m_init, m_eq, m_1, b_init, b_lf, b_lf2, b_64 |
6 | 117 |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
118 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x00 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
119 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x01 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
120 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x02 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
121 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x03 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
122 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x04 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
123 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x05 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
124 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x06 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
125 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x07 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
126 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x08 |
24 | 127 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x09 <tab> |
128 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_nl, m_init, b_lf, b_init, b_lf2, b_init, }, // 0x0a <lf> | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
129 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
130 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0c |
24 | 131 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_cr, m_init, b_init, b_init, b_init, b_cr, }, // 0x0d <cr> |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
132 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
133 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
134 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x10 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
135 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x11 xon char |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
136 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x12 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
137 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x13 xoff char |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
138 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x14 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
139 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x15 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
140 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x16 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
141 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x17 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
142 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x18 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
143 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x19 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
144 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
145 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
146 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
147 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
148 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
149 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
150 {h_init, h_end, t_init, t_end, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x20 space |
24 | 151 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x21 ! |
152 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x22 "" | |
153 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_num, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x23 # | |
154 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x24 $ | |
155 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_pcnt, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x25 % | |
156 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_amp, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x26 & | |
157 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x27 ' | |
158 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x28 ( | |
159 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x29 ) | |
160 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2A * | |
161 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2B + | |
162 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2C , | |
163 {h_host, h_host, t_init, t_disc, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2D - | |
164 {h_host, h_host, t_init, t_disc, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2E . | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
165 {h_init, h_end, t_init, t_tag, t_disc, u_init, u_sla, u_sla, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2F / |
24 | 166 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x30 0 |
167 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x31 1 | |
168 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x32 2 | |
169 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x33 3 | |
170 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x34 4 | |
171 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x35 5 | |
172 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x36 6 | |
173 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x37 7 | |
174 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x38 8 | |
175 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x39 9 | |
176 {h_init, h_end, t_init, t_disc, t_disc, u_http, u_http, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3A : | |
177 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_semi, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3B ; | |
178 {h_init, h_end, t_tag, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3C < | |
179 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_eq, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x3D = | |
180 {h_init, h_end, t_init, t_end, t_end, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3E > | |
181 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3F ? | |
27
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
182 {h_init, h_host, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @ |
24 | 183 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A |
184 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B | |
185 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C | |
186 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x44 D | |
187 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x45 E | |
188 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x46 F | |
189 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x47 G | |
190 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x48 H | |
191 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x49 I | |
192 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4A J | |
193 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4B K | |
194 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4C L | |
195 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4D M | |
196 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4E N | |
197 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4F O | |
198 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x50 P | |
199 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x51 Q | |
200 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x52 R | |
201 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x53 S | |
202 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x54 T | |
203 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U | |
204 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V | |
205 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W | |
206 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X | |
207 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x59 Y | |
208 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x5A Z | |
209 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5B [ | |
210 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5C brace | |
211 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5D ] | |
212 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5E ^ | |
213 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5F _ | |
214 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x60 ` | |
215 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x61 a | |
216 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x62 b | |
217 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x63 c | |
218 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x64 d | |
219 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x65 e | |
220 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x66 f | |
221 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x67 g | |
222 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x68 h | |
223 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x69 i | |
224 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6A j | |
225 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6B k | |
226 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6C l | |
227 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6D m | |
228 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6E n | |
229 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6F o | |
230 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x70 p | |
231 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x71 q | |
232 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x72 r | |
233 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x73 s | |
234 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x74 t | |
235 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u | |
236 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v | |
237 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w | |
238 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x | |
239 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x79 y | |
240 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x7A z | |
241 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7B { | |
242 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7C | | |
243 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7D } | |
244 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7E ~ | |
245 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7f | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
246 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x80 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
247 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x81 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
248 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x82 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
249 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x83 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
250 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x84 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
251 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x85 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
252 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x86 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
253 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x87 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
254 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x88 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
255 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x89 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
256 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
257 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
258 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
259 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
260 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
261 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
262 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x90 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
263 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x91 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
264 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x92 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
265 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x93 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
266 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x94 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
267 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x95 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
268 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x96 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
269 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x97 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
270 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x98 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
271 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x99 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
272 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
273 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
274 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
275 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
276 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
277 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
278 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
279 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
280 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
281 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
282 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
283 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
284 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
285 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
286 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
287 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
288 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaa |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
289 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xab |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
290 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xac |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
291 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xad |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
292 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xae |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
293 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
294 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
295 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
296 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
297 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
298 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
299 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
300 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
301 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
302 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
303 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
304 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xba |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
305 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
306 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
307 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
308 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbe |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
309 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
310 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
311 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
312 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
313 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
314 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
315 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
316 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
317 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
318 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
319 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
320 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xca |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
321 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
322 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
323 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
324 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xce |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
325 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
326 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
327 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
328 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
329 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
330 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
331 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
332 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
333 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
334 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
335 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
336 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xda |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
337 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
338 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
339 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
340 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xde |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
341 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
342 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
343 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
344 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
345 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
346 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
347 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
348 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
349 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
350 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
351 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
352 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xea |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
353 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xeb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
354 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xec |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
355 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xed |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
356 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xee |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
357 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xef |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
358 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
359 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
360 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
361 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
362 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
363 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
364 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
365 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
366 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
367 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
368 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfa |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
369 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
370 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
371 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
372 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfe |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
373 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xff |
6 | 374 }; |
375 | |
376 | |
16 | 377 char *tlds[] = { |
378 ".com", | |
379 ".net", | |
380 ".org", | |
381 ".biz", | |
27
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
382 ".info", |
16 | 383 NULL |
384 }; | |
385 | |
6 | 386 u_char hex_decode[256] = { |
387 0, // 0x00 | |
388 0, // 0x01 | |
389 0, // 0x02 | |
390 0, // 0x03 | |
391 0, // 0x04 | |
392 0, // 0x05 | |
393 0, // 0x06 | |
394 0, // 0x07 | |
395 0, // 0x08 | |
7 | 396 0, // 0x09 <tab> |
397 0, // 0x0a <lf> | |
6 | 398 0, // 0x0b |
399 0, // 0x0c | |
7 | 400 0, // 0x0d <cr> |
6 | 401 0, // 0x0e |
402 0, // 0x0f | |
403 0, // 0x10 | |
404 0, // 0x11 xon char | |
405 0, // 0x12 | |
406 0, // 0x13 xoff char | |
407 0, // 0x14 | |
408 0, // 0x15 | |
409 0, // 0x16 | |
410 0, // 0x17 | |
411 0, // 0x18 | |
412 0, // 0x19 | |
413 0, // 0x1a | |
414 0, // 0x1b | |
415 0, // 0x1c | |
416 0, // 0x1d | |
417 0, // 0x1e | |
418 0, // 0x1f | |
419 0, // 0x20 space | |
420 0, // 0x21 ! | |
421 0, // 0x22 "" | |
422 0, // 0x23 # | |
423 0, // 0x24 $ | |
424 0, // 0x25 % | |
425 0, // 0x26 & | |
426 0, // 0x27 ' | |
427 0, // 0x28 ( | |
428 0, // 0x29 ) | |
429 0, // 0x2A * | |
430 0, // 0x2B + | |
431 0, // 0x2C , | |
432 0, // 0x2D - | |
433 0, // 0x2E . | |
434 0, // 0x2F / | |
435 0, // 0x30 0 | |
436 1, // 0x31 1 | |
437 2, // 0x32 2 | |
438 3, // 0x33 3 | |
439 4, // 0x34 4 | |
440 5, // 0x35 5 | |
441 6, // 0x36 6 | |
442 7, // 0x37 7 | |
443 8, // 0x38 8 | |
444 9, // 0x39 9 | |
445 0, // 0x3A : | |
446 0, // 0x3B ; | |
447 0, // 0x3C < | |
448 0, // 0x3D = | |
449 0, // 0x3E > | |
450 0, // 0x3F ? | |
451 0, // 0x40 @ | |
452 10, // 0x41 A | |
453 11, // 0x42 B | |
454 12, // 0x43 C | |
455 13, // 0x44 D | |
456 14, // 0x45 E | |
457 15, // 0x46 F | |
458 0, // 0x47 G | |
459 0, // 0x48 H | |
460 0, // 0x49 I | |
461 0, // 0x4A J | |
462 0, // 0x4B K | |
463 0, // 0x4C L | |
464 0, // 0x4D M | |
465 0, // 0x4E N | |
466 0, // 0x4F O | |
467 0, // 0x50 P | |
468 0, // 0x51 Q | |
469 0, // 0x52 R | |
470 0, // 0x53 S | |
471 0, // 0x54 T | |
472 0, // 0x55 U | |
473 0, // 0x56 V | |
474 0, // 0x57 W | |
475 0, // 0x58 X | |
476 0, // 0x59 Y | |
477 0, // 0x5A Z | |
478 0, // 0x5B [ | |
479 0, // 0x5C brace | |
480 0, // 0x5D ] | |
481 0, // 0x5E ^ | |
482 0, // 0x5F _ | |
483 0, // 0x60 ` | |
484 10, // 0x61 a | |
485 11, // 0x62 b | |
486 12, // 0x63 c | |
487 13, // 0x64 d | |
488 14, // 0x65 e | |
489 15, // 0x66 f | |
490 0, // 0x67 g | |
491 0, // 0x68 h | |
492 0, // 0x69 i | |
493 0, // 0x6A j | |
494 0, // 0x6B k | |
495 0, // 0x6C l | |
496 0, // 0x6D m | |
497 0, // 0x6E n | |
498 0, // 0x6F o | |
499 0, // 0x70 p | |
500 0, // 0x71 q | |
501 0, // 0x72 r | |
502 0, // 0x73 s | |
503 0, // 0x74 t | |
504 0, // 0x75 u | |
505 0, // 0x76 v | |
506 0, // 0x77 w | |
507 0, // 0x78 x | |
508 0, // 0x79 y | |
509 0, // 0x7A z | |
510 0, // 0x7B { | |
511 0, // 0x7C | | |
512 0, // 0x7D } | |
513 0, // 0x7E ~ | |
514 0, // 0x7f | |
515 0, // 0x80 | |
516 0, // 0x81 | |
517 0, // 0x82 | |
518 0, // 0x83 | |
519 0, // 0x84 | |
520 0, // 0x85 | |
521 0, // 0x86 | |
522 0, // 0x87 | |
523 0, // 0x88 | |
524 0, // 0x89 | |
525 0, // 0x8a | |
526 0, // 0x8b | |
527 0, // 0x8c | |
528 0, // 0x8d | |
529 0, // 0x8e | |
530 0, // 0x8f | |
531 0, // 0x90 | |
532 0, // 0x91 | |
533 0, // 0x92 | |
534 0, // 0x93 | |
535 0, // 0x94 | |
536 0, // 0x95 | |
537 0, // 0x96 | |
538 0, // 0x97 | |
539 0, // 0x98 | |
540 0, // 0x99 | |
541 0, // 0x9a | |
542 0, // 0x9b | |
543 0, // 0x9c | |
544 0, // 0x9d | |
545 0, // 0x9e | |
546 0, // 0x9f | |
547 0, // 0xa0 | |
548 0, // 0xa1 | |
549 0, // 0xa2 | |
550 0, // 0xa3 | |
551 0, // 0xa4 | |
552 0, // 0xa5 | |
553 0, // 0xa6 | |
554 0, // 0xa7 | |
555 0, // 0xa8 | |
556 0, // 0xa9 | |
557 0, // 0xaa | |
558 0, // 0xab | |
559 0, // 0xac | |
560 0, // 0xad | |
561 0, // 0xae | |
562 0, // 0xaf | |
563 0, // 0xb0 | |
564 0, // 0xb1 | |
565 0, // 0xb2 | |
566 0, // 0xb3 | |
567 0, // 0xb4 | |
568 0, // 0xb5 | |
569 0, // 0xb6 | |
570 0, // 0xb7 | |
571 0, // 0xb8 | |
572 0, // 0xb9 | |
573 0, // 0xba | |
574 0, // 0xbb | |
575 0, // 0xbc | |
576 0, // 0xbd | |
577 0, // 0xbe | |
578 0, // 0xbf | |
579 0, // 0xc0 | |
580 0, // 0xc1 | |
581 0, // 0xc2 | |
582 0, // 0xc3 | |
583 0, // 0xc4 | |
584 0, // 0xc5 | |
585 0, // 0xc6 | |
586 0, // 0xc7 | |
587 0, // 0xc8 | |
588 0, // 0xc9 | |
589 0, // 0xca | |
590 0, // 0xcb | |
591 0, // 0xcc | |
592 0, // 0xcd | |
593 0, // 0xce | |
594 0, // 0xcf | |
595 0, // 0xd0 | |
596 0, // 0xd1 | |
597 0, // 0xd2 | |
598 0, // 0xd3 | |
599 0, // 0xd4 | |
600 0, // 0xd5 | |
601 0, // 0xd6 | |
602 0, // 0xd7 | |
603 0, // 0xd8 | |
604 0, // 0xd9 | |
605 0, // 0xda | |
606 0, // 0xdb | |
607 0, // 0xdc | |
608 0, // 0xdd | |
609 0, // 0xde | |
610 0, // 0xdf | |
611 0, // 0xe0 | |
612 0, // 0xe1 | |
613 0, // 0xe2 | |
614 0, // 0xe3 | |
615 0, // 0xe4 | |
616 0, // 0xe5 | |
617 0, // 0xe6 | |
618 0, // 0xe7 | |
619 0, // 0xe8 | |
620 0, // 0xe9 | |
621 0, // 0xea | |
622 0, // 0xeb | |
623 0, // 0xec | |
624 0, // 0xed | |
625 0, // 0xee | |
626 0, // 0xef | |
627 0, // 0xf0 | |
628 0, // 0xf1 | |
629 0, // 0xf2 | |
630 0, // 0xf3 | |
631 0, // 0xf4 | |
632 0, // 0xf5 | |
633 0, // 0xf6 | |
634 0, // 0xf7 | |
635 0, // 0xf8 | |
636 0, // 0xf9 | |
637 0, // 0xfa | |
638 0, // 0xfb | |
639 0, // 0xfc | |
640 0, // 0xfd | |
641 0, // 0xfe | |
642 0, // 0xff | |
643 }; | |
7 | 644 |
6 | 645 u_char b64_decode[256] = { |
646 0, // 0x00 | |
647 0, // 0x01 | |
648 0, // 0x02 | |
649 0, // 0x03 | |
650 0, // 0x04 | |
651 0, // 0x05 | |
652 0, // 0x06 | |
653 0, // 0x07 | |
654 0, // 0x08 | |
7 | 655 0, // 0x09 <tab> |
656 0, // 0x0a <lf> | |
6 | 657 0, // 0x0b |
658 0, // 0x0c | |
7 | 659 0, // 0x0d <cr> |
6 | 660 0, // 0x0e |
661 0, // 0x0f | |
662 0, // 0x10 | |
663 0, // 0x11 xon char | |
664 0, // 0x12 | |
665 0, // 0x13 xoff char | |
666 0, // 0x14 | |
667 0, // 0x15 | |
668 0, // 0x16 | |
669 0, // 0x17 | |
670 0, // 0x18 | |
671 0, // 0x19 | |
672 0, // 0x1a | |
673 0, // 0x1b | |
674 0, // 0x1c | |
675 0, // 0x1d | |
676 0, // 0x1e | |
677 0, // 0x1f | |
678 0, // 0x20 space | |
679 0, // 0x21 ! | |
680 0, // 0x22 "" | |
681 0, // 0x23 # | |
682 0, // 0x24 $ | |
683 0, // 0x25 % | |
684 0, // 0x26 & | |
685 0, // 0x27 ' | |
686 0, // 0x28 ( | |
687 0, // 0x29 ) | |
688 0, // 0x2A * | |
689 62, // 0x2B + | |
690 0, // 0x2C , | |
691 0, // 0x2D - | |
692 0, // 0x2E . | |
693 63, // 0x2F / | |
694 52, // 0x30 0 | |
695 53, // 0x31 1 | |
696 54, // 0x32 2 | |
697 55, // 0x33 3 | |
698 56, // 0x34 4 | |
699 57, // 0x35 5 | |
700 58, // 0x36 6 | |
701 59, // 0x37 7 | |
702 60, // 0x38 8 | |
703 61, // 0x39 9 | |
704 0, // 0x3A : | |
705 0, // 0x3B ; | |
706 0, // 0x3C < | |
707 0, // 0x3D = | |
708 0, // 0x3E > | |
709 0, // 0x3F ? | |
710 0, // 0x40 @ | |
7 | 711 0, // 0x41 A |
712 1, // 0x42 B | |
713 2, // 0x43 C | |
714 3, // 0x44 D | |
715 4, // 0x45 E | |
716 5, // 0x46 F | |
717 6, // 0x47 G | |
718 7, // 0x48 H | |
719 8, // 0x49 I | |
720 9, // 0x4A J | |
6 | 721 10, // 0x4B K |
722 11, // 0x4C L | |
723 12, // 0x4D M | |
724 13, // 0x4E N | |
725 14, // 0x4F O | |
726 15, // 0x50 P | |
727 16, // 0x51 Q | |
728 17, // 0x52 R | |
729 18, // 0x53 S | |
730 19, // 0x54 T | |
731 20, // 0x55 U | |
732 21, // 0x56 V | |
733 22, // 0x57 W | |
734 23, // 0x58 X | |
735 24, // 0x59 Y | |
736 25, // 0x5A Z | |
737 0, // 0x5B [ | |
738 0, // 0x5C brace | |
739 0, // 0x5D ] | |
740 0, // 0x5E ^ | |
741 0, // 0x5F _ | |
742 0, // 0x60 ` | |
743 26, // 0x61 a | |
744 27, // 0x62 b | |
745 28, // 0x63 c | |
746 29, // 0x64 d | |
747 30, // 0x65 e | |
748 31, // 0x66 f | |
749 32, // 0x67 g | |
750 33, // 0x68 h | |
751 34, // 0x69 i | |
752 35, // 0x6A j | |
753 36, // 0x6B k | |
754 37, // 0x6C l | |
755 38, // 0x6D m | |
756 39, // 0x6E n | |
757 40, // 0x6F o | |
758 41, // 0x70 p | |
759 42, // 0x71 q | |
760 43, // 0x72 r | |
761 44, // 0x73 s | |
762 45, // 0x74 t | |
763 46, // 0x75 u | |
764 47, // 0x76 v | |
765 48, // 0x77 w | |
766 49, // 0x78 x | |
767 50, // 0x79 y | |
768 51, // 0x7A z | |
769 0, // 0x7B { | |
770 0, // 0x7C | | |
771 0, // 0x7D } | |
772 0, // 0x7E ~ | |
773 0, // 0x7f | |
774 0, // 0x80 | |
775 0, // 0x81 | |
776 0, // 0x82 | |
777 0, // 0x83 | |
778 0, // 0x84 | |
779 0, // 0x85 | |
780 0, // 0x86 | |
781 0, // 0x87 | |
782 0, // 0x88 | |
783 0, // 0x89 | |
784 0, // 0x8a | |
785 0, // 0x8b | |
786 0, // 0x8c | |
787 0, // 0x8d | |
788 0, // 0x8e | |
789 0, // 0x8f | |
790 0, // 0x90 | |
791 0, // 0x91 | |
792 0, // 0x92 | |
793 0, // 0x93 | |
794 0, // 0x94 | |
795 0, // 0x95 | |
796 0, // 0x96 | |
797 0, // 0x97 | |
798 0, // 0x98 | |
799 0, // 0x99 | |
800 0, // 0x9a | |
801 0, // 0x9b | |
802 0, // 0x9c | |
803 0, // 0x9d | |
804 0, // 0x9e | |
805 0, // 0x9f | |
806 0, // 0xa0 | |
807 0, // 0xa1 | |
808 0, // 0xa2 | |
809 0, // 0xa3 | |
810 0, // 0xa4 | |
811 0, // 0xa5 | |
812 0, // 0xa6 | |
813 0, // 0xa7 | |
814 0, // 0xa8 | |
815 0, // 0xa9 | |
816 0, // 0xaa | |
817 0, // 0xab | |
818 0, // 0xac | |
819 0, // 0xad | |
820 0, // 0xae | |
821 0, // 0xaf | |
822 0, // 0xb0 | |
823 0, // 0xb1 | |
824 0, // 0xb2 | |
825 0, // 0xb3 | |
826 0, // 0xb4 | |
827 0, // 0xb5 | |
828 0, // 0xb6 | |
829 0, // 0xb7 | |
830 0, // 0xb8 | |
831 0, // 0xb9 | |
832 0, // 0xba | |
833 0, // 0xbb | |
834 0, // 0xbc | |
835 0, // 0xbd | |
836 0, // 0xbe | |
837 0, // 0xbf | |
838 0, // 0xc0 | |
839 0, // 0xc1 | |
840 0, // 0xc2 | |
841 0, // 0xc3 | |
842 0, // 0xc4 | |
843 0, // 0xc5 | |
844 0, // 0xc6 | |
845 0, // 0xc7 | |
846 0, // 0xc8 | |
847 0, // 0xc9 | |
848 0, // 0xca | |
849 0, // 0xcb | |
850 0, // 0xcc | |
851 0, // 0xcd | |
852 0, // 0xce | |
853 0, // 0xcf | |
854 0, // 0xd0 | |
855 0, // 0xd1 | |
856 0, // 0xd2 | |
857 0, // 0xd3 | |
858 0, // 0xd4 | |
859 0, // 0xd5 | |
860 0, // 0xd6 | |
861 0, // 0xd7 | |
862 0, // 0xd8 | |
863 0, // 0xd9 | |
864 0, // 0xda | |
865 0, // 0xdb | |
866 0, // 0xdc | |
867 0, // 0xdd | |
868 0, // 0xde | |
869 0, // 0xdf | |
870 0, // 0xe0 | |
871 0, // 0xe1 | |
872 0, // 0xe2 | |
873 0, // 0xe3 | |
874 0, // 0xe4 | |
875 0, // 0xe5 | |
876 0, // 0xe6 | |
877 0, // 0xe7 | |
878 0, // 0xe8 | |
879 0, // 0xe9 | |
880 0, // 0xea | |
881 0, // 0xeb | |
882 0, // 0xec | |
883 0, // 0xed | |
884 0, // 0xee | |
885 0, // 0xef | |
886 0, // 0xf0 | |
887 0, // 0xf1 | |
888 0, // 0xf2 | |
889 0, // 0xf3 | |
890 0, // 0xf4 | |
891 0, // 0xf5 | |
892 0, // 0xf6 | |
893 0, // 0xf7 | |
894 0, // 0xf8 | |
895 0, // 0xf9 | |
896 0, // 0xfa | |
897 0, // 0xfb | |
898 0, // 0xfc | |
899 0, // 0xfd | |
900 0, // 0xfe | |
901 0, // 0xff | |
902 }; | |
903 | |
8 | 904 #define PENDING_LIMIT 100 |
6 | 905 struct fsa { |
12 | 906 u_char pending[PENDING_LIMIT]; |
907 int count; | |
908 state st; | |
909 state init; | |
16 | 910 fsa* next1; |
911 fsa* next2; | |
24 | 912 recorder *memory; |
6 | 913 |
24 | 914 fsa(state init, fsa* next1_, fsa* next2_, recorder *memory_); |
6 | 915 void push(u_char *buf, int len); |
16 | 916 void pusher(); |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
917 void error(char *err); |
6 | 918 }; |
919 | |
24 | 920 fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) { |
921 count = 0; | |
922 st = init_; | |
923 init = init_; | |
924 next1 = next1_; | |
925 next2 = next2_; | |
926 memory = memory_; | |
6 | 927 } |
928 | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
929 void fsa::error(char *err) { |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
930 count = 0; |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
931 st = init; |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
932 if (err) my_syslog(err); |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
933 } |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
934 |
16 | 935 void fsa::pusher() { |
936 if (next1) next1->push(pending, count); | |
937 if (next2) next2->push(pending, count); | |
938 count = 0; | |
939 } | |
940 | |
6 | 941 void fsa::push(u_char *buf, int len) { |
942 for (int i=0; i<len; i++) { | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
943 if (count == (PENDING_LIMIT-1)) error(NULL); |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
944 if (st >= end_state) error("finite state machine impossible state"); |
6 | 945 u_char c = buf[i]; |
946 pending[count++] = c; | |
947 st = parse_table[c][st]; | |
948 switch (st) { | |
949 | |
950 ////////////////////////////// | |
16 | 951 // host name recognizer |
952 case h_end: { | |
18 | 953 pending[--count] = '\0'; // null terminate host name by overwriting the terminator |
27
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
954 if (!strchr((const char *)pending, '@')) { |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
955 // not an email address or message id |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
956 char *tld; |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
957 for (int i=0; (tld = tlds[i]); i++) { |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
958 int n = strlen(tld); |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
959 if (count > n) { |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
960 if (strncasecmp((const char *)(pending+count-n), tld, n) == 0) { |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
961 memory->new_url((char*)pending); |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
962 break; |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
963 } |
16 | 964 } |
965 } | |
966 } | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
967 st = h_init; |
16 | 968 } // fall thru |
969 | |
970 case h_init: { | |
18 | 971 count = 0; |
16 | 972 } break; |
973 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
974 |
16 | 975 ////////////////////////////// |
18 | 976 // html tag discarder |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
977 case t_bin: { |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
978 memory->binary(); |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
979 st = t_disc; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
980 count = 0; // discard all characters |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
981 } break; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
982 |
18 | 983 case t_end: { |
24 | 984 if (count > 2) { |
985 pending[--count] = '\0'; // null terminate html tag | |
986 for (int i=1; i<count; i++) pending[i] = tolower(pending[i]); | |
987 memory->new_tag((char*)pending+1); | |
988 } | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
989 st = (c == ' ') ? t_disc : t_init; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
990 } // fall thru |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
991 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
992 case t_disc: { |
18 | 993 count = 0; // discard all characters |
16 | 994 } break; |
995 | |
996 case t_init: { | |
997 pusher(); | |
998 } break; | |
999 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1000 |
16 | 1001 ////////////////////////////// |
7 | 1002 // url recognizer |
1003 case u_reco: { | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
1004 if (count > 13) { // need some minimal length host name after the protocol |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
1005 pending[--count] = '\0'; // null terminate host name by overwriting the terminator |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1006 char *p = strrchr((const char *)pending, '/'); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1007 if (p && // have a leading / |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1008 strchr(p, '.') && // require at least one . in a dns name |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1009 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol |
24 | 1010 memory->new_url(++p); // we seem to have a host name, skip the last / |
9 | 1011 } |
8 | 1012 } |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
1013 st = u_init; |
16 | 1014 } // fall thru |
7 | 1015 |
1016 case u_init: { | |
1017 count = 0; // discard all characters | |
1018 } break; | |
1019 | |
1020 | |
1021 ////////////////////////////// | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1022 // url decoder %xx |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1023 case d_2: { |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1024 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]]; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1025 count = 1; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1026 st = d_init; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1027 } // fall thru |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1028 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1029 case d_init: { |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1030 pusher(); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1031 } break; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1032 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1033 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1034 ////////////////////////////// |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1035 // html entity decoder &#nnn; |
6 | 1036 case e_semi: { |
1037 pending[--count] = '\0'; // null terminate the digit string by overwriting the semicolon | |
1038 pending[0] = atoi((const char *)pending+2); | |
1039 count = 1; | |
1040 st = e_init; | |
1041 } // fall thru | |
1042 | |
1043 case e_init: { | |
16 | 1044 pusher(); |
1045 } break; | |
1046 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1047 |
16 | 1048 ////////////////////////////// |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1049 // mime decoder =xx |
16 | 1050 case m_2: { |
1051 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]]; | |
1052 count = 1; | |
1053 st = m_init; | |
1054 } // fall thru | |
1055 | |
1056 case m_init: { | |
1057 pusher(); | |
1058 } break; | |
1059 | |
1060 case m_cr: { | |
1061 count = 1; | |
1062 st = m_eq; | |
1063 } break; | |
1064 | |
1065 case m_nl: { | |
6 | 1066 count = 0; |
16 | 1067 st = m_init; |
6 | 1068 } break; |
1069 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1070 |
6 | 1071 ////////////////////////////// |
1072 // base64 decoder | |
1073 case b_lf2: { | |
1074 count--; | |
1075 } break; | |
1076 | |
1077 case b_cr: { | |
1078 int cnt = 0; | |
1079 if ((count % 4) == 1) { | |
1080 count--; | |
1081 // might have proper b64 data | |
1082 for (int i=0; i<count; i+=4) { | |
1083 unsigned long a1 = b64_decode[pending[i]]; | |
1084 unsigned long a2 = b64_decode[pending[i+1]]; | |
1085 unsigned long a3 = b64_decode[pending[i+2]]; | |
1086 unsigned long a4 = b64_decode[pending[i+3]]; | |
1087 unsigned long a = (a1 << 18) | (a2 << 12) | (a3 << 6) | a4; | |
1088 pending[cnt++] = (a & 0x00ff0000) >> 16; | |
1089 pending[cnt++] = (a & 0x0000ff00) >> 8; | |
1090 pending[cnt++] = (a & 0x000000ff); | |
1091 if ((char)pending[i+3] == '=') cnt--; | |
1092 if ((char)pending[i+2] == '=') cnt--; | |
1093 } | |
1094 count = cnt; | |
1095 st = b_lf2; | |
1096 } | |
1097 else st = b_init; | |
1098 } // fall thru | |
1099 | |
1100 case b_lf: | |
1101 case b_init: { | |
16 | 1102 pusher(); |
6 | 1103 } break; |
1104 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1105 |
6 | 1106 ////////////////////////////// |
1107 // states that just accumulate characters in the pending buffer | |
16 | 1108 case h_host: |
24 | 1109 case t_tag: |
7 | 1110 case u_http: |
1111 case u_url: | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1112 case u_sla: |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1113 case d_pcnt: |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1114 case d_1: |
6 | 1115 case e_amp: |
1116 case e_num: | |
1117 case m_eq: | |
1118 case m_1: | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1119 case b_64: |
6 | 1120 default: { |
1121 } break; | |
1122 } | |
1123 } | |
1124 } | |
1125 | |
8 | 1126 struct url_scanner { |
16 | 1127 fsa *host_parser; |
1128 fsa *tags_parser; | |
8 | 1129 fsa *urls_parser; |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1130 fsa *urld_parser; |
8 | 1131 fsa *html_parser; |
1132 fsa *mime_parser; | |
1133 fsa *b64_parser; | |
6 | 1134 |
24 | 1135 url_scanner(recorder *memory); |
8 | 1136 ~url_scanner(); |
1137 void scan(u_char *buffer, size_t length); | |
1138 }; | |
1139 | |
24 | 1140 url_scanner::url_scanner(recorder *memory) { |
1141 host_parser = new fsa(h_init, NULL, NULL, memory); | |
1142 tags_parser = new fsa(t_init, host_parser, NULL, memory); | |
1143 urls_parser = new fsa(u_init, NULL, NULL, memory); | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1144 urld_parser = new fsa(d_init, urls_parser, tags_parser, NULL); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1145 html_parser = new fsa(e_init, urld_parser, NULL, NULL); |
16 | 1146 mime_parser = new fsa(m_init, html_parser, NULL, NULL); |
1147 b64_parser = new fsa(b_init, mime_parser, NULL, NULL); | |
6 | 1148 } |
1149 | |
8 | 1150 url_scanner::~url_scanner() { |
16 | 1151 delete host_parser; |
1152 delete tags_parser; | |
8 | 1153 delete urls_parser; |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1154 delete urld_parser; |
8 | 1155 delete html_parser; |
1156 delete mime_parser; | |
1157 delete b64_parser; | |
6 | 1158 } |
1159 | |
8 | 1160 void url_scanner::scan(u_char *buffer, size_t length) { |
1161 b64_parser->push(buffer, length); | |
6 | 1162 } |
1163 |