Mercurial > dnsbl
annotate src/scanner.cpp @ 25:6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
author | carl |
---|---|
date | Fri, 21 May 2004 21:45:36 -0700 |
parents | 2e23b7184d2b |
children | 43a4f6b3e668 |
rev | line source |
---|---|
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1 /* |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
2 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
3 Copyright (c) 2004 Carl Byington - 510 Software Group, released under |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
4 the GPL version 2 or any later version at your choice available at |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
5 http://www.fsf.org/licenses/gpl.txt |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
6 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
7 */ |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
8 |
8 | 9 static char* scanner_version="$Id$"; |
6 | 10 |
11 using namespace std; | |
12 | |
24 | 13 |
14 // object to record things we see in the body content | |
15 struct recorder | |
16 { | |
17 string_set *html_tags; // valid tags | |
18 string_set hosts; | |
19 int bad_html_tags; | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
20 int binary_tags; |
24 | 21 recorder(string_set *html_tags_); |
22 ~recorder(); | |
23 void empty(); | |
24 void new_url(char *host); | |
25 void new_tag(char *tag); | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
26 void binary(); |
24 | 27 }; |
28 recorder::recorder(string_set *html_tags_) { | |
29 html_tags = html_tags_; | |
30 bad_html_tags = 0; | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
31 binary_tags = 0; |
24 | 32 } |
33 recorder::~recorder() { | |
34 empty(); | |
35 } | |
36 void recorder::empty() { | |
37 bad_html_tags = 0; | |
38 discard(hosts); | |
39 } | |
40 void recorder::new_url(char *host) { | |
41 register_string(hosts, host); | |
42 } | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
43 void recorder::binary() { |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
44 binary_tags++; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
45 } |
24 | 46 void recorder::new_tag(char *tag) { |
47 string_set::iterator i = html_tags->find(tag); | |
48 if (i == html_tags->end()) { | |
49 bad_html_tags++; | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
50 if (debug_syslog && (bad_html_tags < 10)) { |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
51 // only log the first 10 bad tags |
24 | 52 char buf[200]; |
53 snprintf(buf, sizeof(buf), "bad html tag %s", tag); | |
54 my_syslog(buf); | |
55 } | |
56 } | |
57 } | |
58 | |
59 | |
60 | |
16 | 61 enum state {// host name recognizer states |
62 h_init, | |
63 h_host, | |
64 | |
65 // html tag discarder states | |
66 t_init, | |
24 | 67 t_tag, |
16 | 68 t_disc, |
69 | |
70 // url recognizer states | |
7 | 71 u_init, |
72 u_http, | |
73 u_sla, | |
74 u_url, | |
6 | 75 |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
76 // url decoder states %xx |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
77 d_init, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
78 d_pcnt, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
79 d_1, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
80 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
81 // html entity decoder states &#nnn; |
7 | 82 e_init, |
83 e_amp, | |
84 e_num, | |
85 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
86 // mime decoder states =xx |
7 | 87 m_init, |
6 | 88 m_eq, |
89 m_1, | |
90 | |
7 | 91 // base64 decoder states |
92 b_init, | |
6 | 93 b_lf, |
94 b_lf2, | |
95 b_64, | |
96 | |
7 | 97 // counter for number of columns in the table |
98 end_state, | |
6 | 99 |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
100 // temporary states |
16 | 101 h_end, |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
102 t_bin, |
16 | 103 t_end, |
7 | 104 u_reco, |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
105 d_2, |
7 | 106 e_semi, |
107 m_2, | |
6 | 108 m_cr, |
109 m_nl, | |
110 b_cr, | |
111 }; | |
112 | |
113 typedef state PARSE[end_state]; | |
114 | |
115 static PARSE parse_table[256] = { | |
24 | 116 // h_init, h_host, t_init, t_tag, t_disc, u_init, u_http, u_sla , u_url, d_init, d_pcnt, d_1, e_init, e_amp, e_num, m_init, m_eq, m_1, b_init, b_lf, b_lf2, b_64 |
6 | 117 |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
118 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x00 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
119 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x01 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
120 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x02 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
121 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x03 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
122 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x04 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
123 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x05 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
124 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x06 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
125 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x07 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
126 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x08 |
24 | 127 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x09 <tab> |
128 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_nl, m_init, b_lf, b_init, b_lf2, b_init, }, // 0x0a <lf> | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
129 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
130 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0c |
24 | 131 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_cr, m_init, b_init, b_init, b_init, b_cr, }, // 0x0d <cr> |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
132 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
133 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
134 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x10 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
135 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x11 xon char |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
136 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x12 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
137 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x13 xoff char |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
138 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x14 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
139 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x15 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
140 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x16 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
141 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x17 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
142 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x18 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
143 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x19 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
144 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
145 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
146 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
147 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
148 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
149 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
150 {h_init, h_end, t_init, t_end, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x20 space |
24 | 151 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x21 ! |
152 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x22 "" | |
153 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_num, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x23 # | |
154 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x24 $ | |
155 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_pcnt, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x25 % | |
156 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_amp, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x26 & | |
157 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x27 ' | |
158 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x28 ( | |
159 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x29 ) | |
160 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2A * | |
161 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2B + | |
162 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2C , | |
163 {h_host, h_host, t_init, t_disc, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2D - | |
164 {h_host, h_host, t_init, t_disc, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2E . | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
165 {h_init, h_end, t_init, t_tag, t_disc, u_init, u_sla, u_sla, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2F / |
24 | 166 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x30 0 |
167 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x31 1 | |
168 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x32 2 | |
169 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x33 3 | |
170 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x34 4 | |
171 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x35 5 | |
172 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x36 6 | |
173 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x37 7 | |
174 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x38 8 | |
175 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x39 9 | |
176 {h_init, h_end, t_init, t_disc, t_disc, u_http, u_http, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3A : | |
177 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_semi, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3B ; | |
178 {h_init, h_end, t_tag, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3C < | |
179 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_eq, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x3D = | |
180 {h_init, h_end, t_init, t_end, t_end, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3E > | |
181 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3F ? | |
182 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @ | |
183 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A | |
184 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B | |
185 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C | |
186 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x44 D | |
187 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x45 E | |
188 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x46 F | |
189 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x47 G | |
190 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x48 H | |
191 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x49 I | |
192 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4A J | |
193 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4B K | |
194 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4C L | |
195 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4D M | |
196 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4E N | |
197 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4F O | |
198 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x50 P | |
199 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x51 Q | |
200 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x52 R | |
201 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x53 S | |
202 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x54 T | |
203 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U | |
204 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V | |
205 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W | |
206 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X | |
207 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x59 Y | |
208 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x5A Z | |
209 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5B [ | |
210 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5C brace | |
211 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5D ] | |
212 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5E ^ | |
213 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5F _ | |
214 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x60 ` | |
215 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x61 a | |
216 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x62 b | |
217 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x63 c | |
218 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x64 d | |
219 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x65 e | |
220 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x66 f | |
221 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x67 g | |
222 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x68 h | |
223 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x69 i | |
224 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6A j | |
225 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6B k | |
226 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6C l | |
227 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6D m | |
228 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6E n | |
229 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6F o | |
230 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x70 p | |
231 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x71 q | |
232 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x72 r | |
233 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x73 s | |
234 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x74 t | |
235 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u | |
236 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v | |
237 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w | |
238 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x | |
239 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x79 y | |
240 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x7A z | |
241 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7B { | |
242 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7C | | |
243 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7D } | |
244 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7E ~ | |
245 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7f | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
246 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x80 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
247 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x81 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
248 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x82 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
249 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x83 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
250 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x84 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
251 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x85 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
252 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x86 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
253 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x87 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
254 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x88 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
255 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x89 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
256 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
257 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
258 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
259 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
260 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
261 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
262 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x90 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
263 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x91 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
264 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x92 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
265 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x93 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
266 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x94 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
267 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x95 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
268 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x96 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
269 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x97 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
270 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x98 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
271 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x99 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
272 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
273 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
274 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
275 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
276 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
277 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
278 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
279 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
280 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
281 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
282 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
283 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
284 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
285 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
286 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
287 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
288 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaa |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
289 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xab |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
290 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xac |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
291 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xad |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
292 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xae |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
293 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
294 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
295 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
296 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
297 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
298 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
299 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
300 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
301 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
302 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
303 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
304 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xba |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
305 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
306 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
307 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
308 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbe |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
309 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
310 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
311 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
312 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
313 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
314 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
315 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
316 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
317 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
318 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
319 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
320 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xca |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
321 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
322 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
323 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
324 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xce |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
325 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
326 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
327 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
328 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
329 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
330 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
331 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
332 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
333 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
334 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
335 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
336 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xda |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
337 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
338 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
339 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
340 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xde |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
341 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
342 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
343 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
344 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
345 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
346 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
347 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
348 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
349 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
350 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
351 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
352 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xea |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
353 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xeb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
354 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xec |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
355 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xed |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
356 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xee |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
357 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xef |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
358 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
359 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
360 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
361 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
362 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
363 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
364 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
365 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
366 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
367 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
368 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfa |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
369 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
370 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
371 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
372 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfe |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
373 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xff |
6 | 374 }; |
375 | |
376 | |
16 | 377 char *tlds[] = { |
378 ".com", | |
379 ".net", | |
380 ".org", | |
381 ".biz", | |
382 NULL | |
383 }; | |
384 | |
6 | 385 u_char hex_decode[256] = { |
386 0, // 0x00 | |
387 0, // 0x01 | |
388 0, // 0x02 | |
389 0, // 0x03 | |
390 0, // 0x04 | |
391 0, // 0x05 | |
392 0, // 0x06 | |
393 0, // 0x07 | |
394 0, // 0x08 | |
7 | 395 0, // 0x09 <tab> |
396 0, // 0x0a <lf> | |
6 | 397 0, // 0x0b |
398 0, // 0x0c | |
7 | 399 0, // 0x0d <cr> |
6 | 400 0, // 0x0e |
401 0, // 0x0f | |
402 0, // 0x10 | |
403 0, // 0x11 xon char | |
404 0, // 0x12 | |
405 0, // 0x13 xoff char | |
406 0, // 0x14 | |
407 0, // 0x15 | |
408 0, // 0x16 | |
409 0, // 0x17 | |
410 0, // 0x18 | |
411 0, // 0x19 | |
412 0, // 0x1a | |
413 0, // 0x1b | |
414 0, // 0x1c | |
415 0, // 0x1d | |
416 0, // 0x1e | |
417 0, // 0x1f | |
418 0, // 0x20 space | |
419 0, // 0x21 ! | |
420 0, // 0x22 "" | |
421 0, // 0x23 # | |
422 0, // 0x24 $ | |
423 0, // 0x25 % | |
424 0, // 0x26 & | |
425 0, // 0x27 ' | |
426 0, // 0x28 ( | |
427 0, // 0x29 ) | |
428 0, // 0x2A * | |
429 0, // 0x2B + | |
430 0, // 0x2C , | |
431 0, // 0x2D - | |
432 0, // 0x2E . | |
433 0, // 0x2F / | |
434 0, // 0x30 0 | |
435 1, // 0x31 1 | |
436 2, // 0x32 2 | |
437 3, // 0x33 3 | |
438 4, // 0x34 4 | |
439 5, // 0x35 5 | |
440 6, // 0x36 6 | |
441 7, // 0x37 7 | |
442 8, // 0x38 8 | |
443 9, // 0x39 9 | |
444 0, // 0x3A : | |
445 0, // 0x3B ; | |
446 0, // 0x3C < | |
447 0, // 0x3D = | |
448 0, // 0x3E > | |
449 0, // 0x3F ? | |
450 0, // 0x40 @ | |
451 10, // 0x41 A | |
452 11, // 0x42 B | |
453 12, // 0x43 C | |
454 13, // 0x44 D | |
455 14, // 0x45 E | |
456 15, // 0x46 F | |
457 0, // 0x47 G | |
458 0, // 0x48 H | |
459 0, // 0x49 I | |
460 0, // 0x4A J | |
461 0, // 0x4B K | |
462 0, // 0x4C L | |
463 0, // 0x4D M | |
464 0, // 0x4E N | |
465 0, // 0x4F O | |
466 0, // 0x50 P | |
467 0, // 0x51 Q | |
468 0, // 0x52 R | |
469 0, // 0x53 S | |
470 0, // 0x54 T | |
471 0, // 0x55 U | |
472 0, // 0x56 V | |
473 0, // 0x57 W | |
474 0, // 0x58 X | |
475 0, // 0x59 Y | |
476 0, // 0x5A Z | |
477 0, // 0x5B [ | |
478 0, // 0x5C brace | |
479 0, // 0x5D ] | |
480 0, // 0x5E ^ | |
481 0, // 0x5F _ | |
482 0, // 0x60 ` | |
483 10, // 0x61 a | |
484 11, // 0x62 b | |
485 12, // 0x63 c | |
486 13, // 0x64 d | |
487 14, // 0x65 e | |
488 15, // 0x66 f | |
489 0, // 0x67 g | |
490 0, // 0x68 h | |
491 0, // 0x69 i | |
492 0, // 0x6A j | |
493 0, // 0x6B k | |
494 0, // 0x6C l | |
495 0, // 0x6D m | |
496 0, // 0x6E n | |
497 0, // 0x6F o | |
498 0, // 0x70 p | |
499 0, // 0x71 q | |
500 0, // 0x72 r | |
501 0, // 0x73 s | |
502 0, // 0x74 t | |
503 0, // 0x75 u | |
504 0, // 0x76 v | |
505 0, // 0x77 w | |
506 0, // 0x78 x | |
507 0, // 0x79 y | |
508 0, // 0x7A z | |
509 0, // 0x7B { | |
510 0, // 0x7C | | |
511 0, // 0x7D } | |
512 0, // 0x7E ~ | |
513 0, // 0x7f | |
514 0, // 0x80 | |
515 0, // 0x81 | |
516 0, // 0x82 | |
517 0, // 0x83 | |
518 0, // 0x84 | |
519 0, // 0x85 | |
520 0, // 0x86 | |
521 0, // 0x87 | |
522 0, // 0x88 | |
523 0, // 0x89 | |
524 0, // 0x8a | |
525 0, // 0x8b | |
526 0, // 0x8c | |
527 0, // 0x8d | |
528 0, // 0x8e | |
529 0, // 0x8f | |
530 0, // 0x90 | |
531 0, // 0x91 | |
532 0, // 0x92 | |
533 0, // 0x93 | |
534 0, // 0x94 | |
535 0, // 0x95 | |
536 0, // 0x96 | |
537 0, // 0x97 | |
538 0, // 0x98 | |
539 0, // 0x99 | |
540 0, // 0x9a | |
541 0, // 0x9b | |
542 0, // 0x9c | |
543 0, // 0x9d | |
544 0, // 0x9e | |
545 0, // 0x9f | |
546 0, // 0xa0 | |
547 0, // 0xa1 | |
548 0, // 0xa2 | |
549 0, // 0xa3 | |
550 0, // 0xa4 | |
551 0, // 0xa5 | |
552 0, // 0xa6 | |
553 0, // 0xa7 | |
554 0, // 0xa8 | |
555 0, // 0xa9 | |
556 0, // 0xaa | |
557 0, // 0xab | |
558 0, // 0xac | |
559 0, // 0xad | |
560 0, // 0xae | |
561 0, // 0xaf | |
562 0, // 0xb0 | |
563 0, // 0xb1 | |
564 0, // 0xb2 | |
565 0, // 0xb3 | |
566 0, // 0xb4 | |
567 0, // 0xb5 | |
568 0, // 0xb6 | |
569 0, // 0xb7 | |
570 0, // 0xb8 | |
571 0, // 0xb9 | |
572 0, // 0xba | |
573 0, // 0xbb | |
574 0, // 0xbc | |
575 0, // 0xbd | |
576 0, // 0xbe | |
577 0, // 0xbf | |
578 0, // 0xc0 | |
579 0, // 0xc1 | |
580 0, // 0xc2 | |
581 0, // 0xc3 | |
582 0, // 0xc4 | |
583 0, // 0xc5 | |
584 0, // 0xc6 | |
585 0, // 0xc7 | |
586 0, // 0xc8 | |
587 0, // 0xc9 | |
588 0, // 0xca | |
589 0, // 0xcb | |
590 0, // 0xcc | |
591 0, // 0xcd | |
592 0, // 0xce | |
593 0, // 0xcf | |
594 0, // 0xd0 | |
595 0, // 0xd1 | |
596 0, // 0xd2 | |
597 0, // 0xd3 | |
598 0, // 0xd4 | |
599 0, // 0xd5 | |
600 0, // 0xd6 | |
601 0, // 0xd7 | |
602 0, // 0xd8 | |
603 0, // 0xd9 | |
604 0, // 0xda | |
605 0, // 0xdb | |
606 0, // 0xdc | |
607 0, // 0xdd | |
608 0, // 0xde | |
609 0, // 0xdf | |
610 0, // 0xe0 | |
611 0, // 0xe1 | |
612 0, // 0xe2 | |
613 0, // 0xe3 | |
614 0, // 0xe4 | |
615 0, // 0xe5 | |
616 0, // 0xe6 | |
617 0, // 0xe7 | |
618 0, // 0xe8 | |
619 0, // 0xe9 | |
620 0, // 0xea | |
621 0, // 0xeb | |
622 0, // 0xec | |
623 0, // 0xed | |
624 0, // 0xee | |
625 0, // 0xef | |
626 0, // 0xf0 | |
627 0, // 0xf1 | |
628 0, // 0xf2 | |
629 0, // 0xf3 | |
630 0, // 0xf4 | |
631 0, // 0xf5 | |
632 0, // 0xf6 | |
633 0, // 0xf7 | |
634 0, // 0xf8 | |
635 0, // 0xf9 | |
636 0, // 0xfa | |
637 0, // 0xfb | |
638 0, // 0xfc | |
639 0, // 0xfd | |
640 0, // 0xfe | |
641 0, // 0xff | |
642 }; | |
7 | 643 |
6 | 644 u_char b64_decode[256] = { |
645 0, // 0x00 | |
646 0, // 0x01 | |
647 0, // 0x02 | |
648 0, // 0x03 | |
649 0, // 0x04 | |
650 0, // 0x05 | |
651 0, // 0x06 | |
652 0, // 0x07 | |
653 0, // 0x08 | |
7 | 654 0, // 0x09 <tab> |
655 0, // 0x0a <lf> | |
6 | 656 0, // 0x0b |
657 0, // 0x0c | |
7 | 658 0, // 0x0d <cr> |
6 | 659 0, // 0x0e |
660 0, // 0x0f | |
661 0, // 0x10 | |
662 0, // 0x11 xon char | |
663 0, // 0x12 | |
664 0, // 0x13 xoff char | |
665 0, // 0x14 | |
666 0, // 0x15 | |
667 0, // 0x16 | |
668 0, // 0x17 | |
669 0, // 0x18 | |
670 0, // 0x19 | |
671 0, // 0x1a | |
672 0, // 0x1b | |
673 0, // 0x1c | |
674 0, // 0x1d | |
675 0, // 0x1e | |
676 0, // 0x1f | |
677 0, // 0x20 space | |
678 0, // 0x21 ! | |
679 0, // 0x22 "" | |
680 0, // 0x23 # | |
681 0, // 0x24 $ | |
682 0, // 0x25 % | |
683 0, // 0x26 & | |
684 0, // 0x27 ' | |
685 0, // 0x28 ( | |
686 0, // 0x29 ) | |
687 0, // 0x2A * | |
688 62, // 0x2B + | |
689 0, // 0x2C , | |
690 0, // 0x2D - | |
691 0, // 0x2E . | |
692 63, // 0x2F / | |
693 52, // 0x30 0 | |
694 53, // 0x31 1 | |
695 54, // 0x32 2 | |
696 55, // 0x33 3 | |
697 56, // 0x34 4 | |
698 57, // 0x35 5 | |
699 58, // 0x36 6 | |
700 59, // 0x37 7 | |
701 60, // 0x38 8 | |
702 61, // 0x39 9 | |
703 0, // 0x3A : | |
704 0, // 0x3B ; | |
705 0, // 0x3C < | |
706 0, // 0x3D = | |
707 0, // 0x3E > | |
708 0, // 0x3F ? | |
709 0, // 0x40 @ | |
7 | 710 0, // 0x41 A |
711 1, // 0x42 B | |
712 2, // 0x43 C | |
713 3, // 0x44 D | |
714 4, // 0x45 E | |
715 5, // 0x46 F | |
716 6, // 0x47 G | |
717 7, // 0x48 H | |
718 8, // 0x49 I | |
719 9, // 0x4A J | |
6 | 720 10, // 0x4B K |
721 11, // 0x4C L | |
722 12, // 0x4D M | |
723 13, // 0x4E N | |
724 14, // 0x4F O | |
725 15, // 0x50 P | |
726 16, // 0x51 Q | |
727 17, // 0x52 R | |
728 18, // 0x53 S | |
729 19, // 0x54 T | |
730 20, // 0x55 U | |
731 21, // 0x56 V | |
732 22, // 0x57 W | |
733 23, // 0x58 X | |
734 24, // 0x59 Y | |
735 25, // 0x5A Z | |
736 0, // 0x5B [ | |
737 0, // 0x5C brace | |
738 0, // 0x5D ] | |
739 0, // 0x5E ^ | |
740 0, // 0x5F _ | |
741 0, // 0x60 ` | |
742 26, // 0x61 a | |
743 27, // 0x62 b | |
744 28, // 0x63 c | |
745 29, // 0x64 d | |
746 30, // 0x65 e | |
747 31, // 0x66 f | |
748 32, // 0x67 g | |
749 33, // 0x68 h | |
750 34, // 0x69 i | |
751 35, // 0x6A j | |
752 36, // 0x6B k | |
753 37, // 0x6C l | |
754 38, // 0x6D m | |
755 39, // 0x6E n | |
756 40, // 0x6F o | |
757 41, // 0x70 p | |
758 42, // 0x71 q | |
759 43, // 0x72 r | |
760 44, // 0x73 s | |
761 45, // 0x74 t | |
762 46, // 0x75 u | |
763 47, // 0x76 v | |
764 48, // 0x77 w | |
765 49, // 0x78 x | |
766 50, // 0x79 y | |
767 51, // 0x7A z | |
768 0, // 0x7B { | |
769 0, // 0x7C | | |
770 0, // 0x7D } | |
771 0, // 0x7E ~ | |
772 0, // 0x7f | |
773 0, // 0x80 | |
774 0, // 0x81 | |
775 0, // 0x82 | |
776 0, // 0x83 | |
777 0, // 0x84 | |
778 0, // 0x85 | |
779 0, // 0x86 | |
780 0, // 0x87 | |
781 0, // 0x88 | |
782 0, // 0x89 | |
783 0, // 0x8a | |
784 0, // 0x8b | |
785 0, // 0x8c | |
786 0, // 0x8d | |
787 0, // 0x8e | |
788 0, // 0x8f | |
789 0, // 0x90 | |
790 0, // 0x91 | |
791 0, // 0x92 | |
792 0, // 0x93 | |
793 0, // 0x94 | |
794 0, // 0x95 | |
795 0, // 0x96 | |
796 0, // 0x97 | |
797 0, // 0x98 | |
798 0, // 0x99 | |
799 0, // 0x9a | |
800 0, // 0x9b | |
801 0, // 0x9c | |
802 0, // 0x9d | |
803 0, // 0x9e | |
804 0, // 0x9f | |
805 0, // 0xa0 | |
806 0, // 0xa1 | |
807 0, // 0xa2 | |
808 0, // 0xa3 | |
809 0, // 0xa4 | |
810 0, // 0xa5 | |
811 0, // 0xa6 | |
812 0, // 0xa7 | |
813 0, // 0xa8 | |
814 0, // 0xa9 | |
815 0, // 0xaa | |
816 0, // 0xab | |
817 0, // 0xac | |
818 0, // 0xad | |
819 0, // 0xae | |
820 0, // 0xaf | |
821 0, // 0xb0 | |
822 0, // 0xb1 | |
823 0, // 0xb2 | |
824 0, // 0xb3 | |
825 0, // 0xb4 | |
826 0, // 0xb5 | |
827 0, // 0xb6 | |
828 0, // 0xb7 | |
829 0, // 0xb8 | |
830 0, // 0xb9 | |
831 0, // 0xba | |
832 0, // 0xbb | |
833 0, // 0xbc | |
834 0, // 0xbd | |
835 0, // 0xbe | |
836 0, // 0xbf | |
837 0, // 0xc0 | |
838 0, // 0xc1 | |
839 0, // 0xc2 | |
840 0, // 0xc3 | |
841 0, // 0xc4 | |
842 0, // 0xc5 | |
843 0, // 0xc6 | |
844 0, // 0xc7 | |
845 0, // 0xc8 | |
846 0, // 0xc9 | |
847 0, // 0xca | |
848 0, // 0xcb | |
849 0, // 0xcc | |
850 0, // 0xcd | |
851 0, // 0xce | |
852 0, // 0xcf | |
853 0, // 0xd0 | |
854 0, // 0xd1 | |
855 0, // 0xd2 | |
856 0, // 0xd3 | |
857 0, // 0xd4 | |
858 0, // 0xd5 | |
859 0, // 0xd6 | |
860 0, // 0xd7 | |
861 0, // 0xd8 | |
862 0, // 0xd9 | |
863 0, // 0xda | |
864 0, // 0xdb | |
865 0, // 0xdc | |
866 0, // 0xdd | |
867 0, // 0xde | |
868 0, // 0xdf | |
869 0, // 0xe0 | |
870 0, // 0xe1 | |
871 0, // 0xe2 | |
872 0, // 0xe3 | |
873 0, // 0xe4 | |
874 0, // 0xe5 | |
875 0, // 0xe6 | |
876 0, // 0xe7 | |
877 0, // 0xe8 | |
878 0, // 0xe9 | |
879 0, // 0xea | |
880 0, // 0xeb | |
881 0, // 0xec | |
882 0, // 0xed | |
883 0, // 0xee | |
884 0, // 0xef | |
885 0, // 0xf0 | |
886 0, // 0xf1 | |
887 0, // 0xf2 | |
888 0, // 0xf3 | |
889 0, // 0xf4 | |
890 0, // 0xf5 | |
891 0, // 0xf6 | |
892 0, // 0xf7 | |
893 0, // 0xf8 | |
894 0, // 0xf9 | |
895 0, // 0xfa | |
896 0, // 0xfb | |
897 0, // 0xfc | |
898 0, // 0xfd | |
899 0, // 0xfe | |
900 0, // 0xff | |
901 }; | |
902 | |
8 | 903 #define PENDING_LIMIT 100 |
6 | 904 struct fsa { |
12 | 905 u_char pending[PENDING_LIMIT]; |
906 int count; | |
907 state st; | |
908 state init; | |
16 | 909 fsa* next1; |
910 fsa* next2; | |
24 | 911 recorder *memory; |
6 | 912 |
24 | 913 fsa(state init, fsa* next1_, fsa* next2_, recorder *memory_); |
6 | 914 void push(u_char *buf, int len); |
16 | 915 void pusher(); |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
916 void error(char *err); |
6 | 917 }; |
918 | |
24 | 919 fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) { |
920 count = 0; | |
921 st = init_; | |
922 init = init_; | |
923 next1 = next1_; | |
924 next2 = next2_; | |
925 memory = memory_; | |
6 | 926 } |
927 | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
928 void fsa::error(char *err) { |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
929 count = 0; |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
930 st = init; |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
931 if (err) my_syslog(err); |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
932 } |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
933 |
16 | 934 void fsa::pusher() { |
935 if (next1) next1->push(pending, count); | |
936 if (next2) next2->push(pending, count); | |
937 count = 0; | |
938 } | |
939 | |
6 | 940 void fsa::push(u_char *buf, int len) { |
941 for (int i=0; i<len; i++) { | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
942 if (count == (PENDING_LIMIT-1)) error(NULL); |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
943 if (st >= end_state) error("finite state machine impossible state"); |
6 | 944 u_char c = buf[i]; |
945 pending[count++] = c; | |
946 st = parse_table[c][st]; | |
947 switch (st) { | |
948 | |
949 ////////////////////////////// | |
16 | 950 // host name recognizer |
951 case h_end: { | |
18 | 952 pending[--count] = '\0'; // null terminate host name by overwriting the terminator |
16 | 953 char *tld; |
954 for (int i=0; (tld = tlds[i]); i++) { | |
955 int n = strlen(tld); | |
956 if (count > n) { | |
957 if (strncasecmp((const char *)(pending+count-n), tld, n) == 0) { | |
24 | 958 memory->new_url((char*)pending); |
17 | 959 break; |
16 | 960 } |
961 } | |
962 } | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
963 st = h_init; |
16 | 964 } // fall thru |
965 | |
966 case h_init: { | |
18 | 967 count = 0; |
16 | 968 } break; |
969 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
970 |
16 | 971 ////////////////////////////// |
18 | 972 // html tag discarder |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
973 case t_bin: { |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
974 memory->binary(); |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
975 st = t_disc; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
976 count = 0; // discard all characters |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
977 } break; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
978 |
18 | 979 case t_end: { |
24 | 980 if (count > 2) { |
981 pending[--count] = '\0'; // null terminate html tag | |
982 for (int i=1; i<count; i++) pending[i] = tolower(pending[i]); | |
983 memory->new_tag((char*)pending+1); | |
984 } | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
985 st = (c == ' ') ? t_disc : t_init; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
986 } // fall thru |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
987 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
988 case t_disc: { |
18 | 989 count = 0; // discard all characters |
16 | 990 } break; |
991 | |
992 case t_init: { | |
993 pusher(); | |
994 } break; | |
995 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
996 |
16 | 997 ////////////////////////////// |
7 | 998 // url recognizer |
999 case u_reco: { | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
1000 if (count > 13) { // need some minimal length host name after the protocol |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
1001 pending[--count] = '\0'; // null terminate host name by overwriting the terminator |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1002 char *p = strrchr((const char *)pending, '/'); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1003 if (p && // have a leading / |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1004 strchr(p, '.') && // require at least one . in a dns name |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1005 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol |
24 | 1006 memory->new_url(++p); // we seem to have a host name, skip the last / |
9 | 1007 } |
8 | 1008 } |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
1009 st = u_init; |
16 | 1010 } // fall thru |
7 | 1011 |
1012 case u_init: { | |
1013 count = 0; // discard all characters | |
1014 } break; | |
1015 | |
1016 | |
1017 ////////////////////////////// | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1018 // url decoder %xx |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1019 case d_2: { |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1020 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]]; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1021 count = 1; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1022 st = d_init; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1023 } // fall thru |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1024 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1025 case d_init: { |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1026 pusher(); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1027 } break; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1028 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1029 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1030 ////////////////////////////// |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1031 // html entity decoder &#nnn; |
6 | 1032 case e_semi: { |
1033 pending[--count] = '\0'; // null terminate the digit string by overwriting the semicolon | |
1034 pending[0] = atoi((const char *)pending+2); | |
1035 count = 1; | |
1036 st = e_init; | |
1037 } // fall thru | |
1038 | |
1039 case e_init: { | |
16 | 1040 pusher(); |
1041 } break; | |
1042 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1043 |
16 | 1044 ////////////////////////////// |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1045 // mime decoder =xx |
16 | 1046 case m_2: { |
1047 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]]; | |
1048 count = 1; | |
1049 st = m_init; | |
1050 } // fall thru | |
1051 | |
1052 case m_init: { | |
1053 pusher(); | |
1054 } break; | |
1055 | |
1056 case m_cr: { | |
1057 count = 1; | |
1058 st = m_eq; | |
1059 } break; | |
1060 | |
1061 case m_nl: { | |
6 | 1062 count = 0; |
16 | 1063 st = m_init; |
6 | 1064 } break; |
1065 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1066 |
6 | 1067 ////////////////////////////// |
1068 // base64 decoder | |
1069 case b_lf2: { | |
1070 count--; | |
1071 } break; | |
1072 | |
1073 case b_cr: { | |
1074 int cnt = 0; | |
1075 if ((count % 4) == 1) { | |
1076 count--; | |
1077 // might have proper b64 data | |
1078 for (int i=0; i<count; i+=4) { | |
1079 unsigned long a1 = b64_decode[pending[i]]; | |
1080 unsigned long a2 = b64_decode[pending[i+1]]; | |
1081 unsigned long a3 = b64_decode[pending[i+2]]; | |
1082 unsigned long a4 = b64_decode[pending[i+3]]; | |
1083 unsigned long a = (a1 << 18) | (a2 << 12) | (a3 << 6) | a4; | |
1084 pending[cnt++] = (a & 0x00ff0000) >> 16; | |
1085 pending[cnt++] = (a & 0x0000ff00) >> 8; | |
1086 pending[cnt++] = (a & 0x000000ff); | |
1087 if ((char)pending[i+3] == '=') cnt--; | |
1088 if ((char)pending[i+2] == '=') cnt--; | |
1089 } | |
1090 count = cnt; | |
1091 st = b_lf2; | |
1092 } | |
1093 else st = b_init; | |
1094 } // fall thru | |
1095 | |
1096 case b_lf: | |
1097 case b_init: { | |
16 | 1098 pusher(); |
6 | 1099 } break; |
1100 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1101 |
6 | 1102 ////////////////////////////// |
1103 // states that just accumulate characters in the pending buffer | |
16 | 1104 case h_host: |
24 | 1105 case t_tag: |
7 | 1106 case u_http: |
1107 case u_url: | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1108 case u_sla: |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1109 case d_pcnt: |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1110 case d_1: |
6 | 1111 case e_amp: |
1112 case e_num: | |
1113 case m_eq: | |
1114 case m_1: | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1115 case b_64: |
6 | 1116 default: { |
1117 } break; | |
1118 } | |
1119 } | |
1120 } | |
1121 | |
8 | 1122 struct url_scanner { |
16 | 1123 fsa *host_parser; |
1124 fsa *tags_parser; | |
8 | 1125 fsa *urls_parser; |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1126 fsa *urld_parser; |
8 | 1127 fsa *html_parser; |
1128 fsa *mime_parser; | |
1129 fsa *b64_parser; | |
6 | 1130 |
24 | 1131 url_scanner(recorder *memory); |
8 | 1132 ~url_scanner(); |
1133 void scan(u_char *buffer, size_t length); | |
1134 }; | |
1135 | |
24 | 1136 url_scanner::url_scanner(recorder *memory) { |
1137 host_parser = new fsa(h_init, NULL, NULL, memory); | |
1138 tags_parser = new fsa(t_init, host_parser, NULL, memory); | |
1139 urls_parser = new fsa(u_init, NULL, NULL, memory); | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1140 urld_parser = new fsa(d_init, urls_parser, tags_parser, NULL); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1141 html_parser = new fsa(e_init, urld_parser, NULL, NULL); |
16 | 1142 mime_parser = new fsa(m_init, html_parser, NULL, NULL); |
1143 b64_parser = new fsa(b_init, mime_parser, NULL, NULL); | |
6 | 1144 } |
1145 | |
8 | 1146 url_scanner::~url_scanner() { |
16 | 1147 delete host_parser; |
1148 delete tags_parser; | |
8 | 1149 delete urls_parser; |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1150 delete urld_parser; |
8 | 1151 delete html_parser; |
1152 delete mime_parser; | |
1153 delete b64_parser; | |
6 | 1154 } |
1155 | |
8 | 1156 void url_scanner::scan(u_char *buffer, size_t length) { |
1157 b64_parser->push(buffer, length); | |
6 | 1158 } |
1159 |