Mercurial > dnsbl
annotate src/scanner.cpp @ 28:33e1e3910506
add configurable list of tlds
author | carl |
---|---|
date | Thu, 27 May 2004 10:08:51 -0700 |
parents | 43a4f6b3e668 |
children | 4dfdf33f1db0 |
rev | line source |
---|---|
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1 /* |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
2 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
3 Copyright (c) 2004 Carl Byington - 510 Software Group, released under |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
4 the GPL version 2 or any later version at your choice available at |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
5 http://www.fsf.org/licenses/gpl.txt |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
6 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
7 */ |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
8 |
8 | 9 static char* scanner_version="$Id$"; |
6 | 10 |
11 using namespace std; | |
12 | |
24 | 13 |
14 // object to record things we see in the body content | |
15 struct recorder | |
16 { | |
17 string_set *html_tags; // valid tags | |
28 | 18 string_set *tlds; // valid tlds |
24 | 19 string_set hosts; |
20 int bad_html_tags; | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
21 int binary_tags; |
28 | 22 recorder(string_set *html_tags_, string_set *tlds_); |
24 | 23 ~recorder(); |
24 void empty(); | |
25 void new_url(char *host); | |
26 void new_tag(char *tag); | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
27 void binary(); |
24 | 28 }; |
28 | 29 recorder::recorder(string_set *html_tags_, string_set *tlds_) { |
24 | 30 html_tags = html_tags_; |
28 | 31 tlds = tlds_; |
24 | 32 bad_html_tags = 0; |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
33 binary_tags = 0; |
24 | 34 } |
35 recorder::~recorder() { | |
36 empty(); | |
37 } | |
38 void recorder::empty() { | |
39 bad_html_tags = 0; | |
28 | 40 binary_tags = 0; |
24 | 41 discard(hosts); |
42 } | |
43 void recorder::new_url(char *host) { | |
44 register_string(hosts, host); | |
45 } | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
46 void recorder::binary() { |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
47 binary_tags++; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
48 } |
24 | 49 void recorder::new_tag(char *tag) { |
50 string_set::iterator i = html_tags->find(tag); | |
51 if (i == html_tags->end()) { | |
52 bad_html_tags++; | |
28 | 53 if (debug_syslog && (bad_html_tags < 10) && (binary_tags < 10)) { |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
54 // only log the first 10 bad tags |
24 | 55 char buf[200]; |
56 snprintf(buf, sizeof(buf), "bad html tag %s", tag); | |
57 my_syslog(buf); | |
58 } | |
59 } | |
60 } | |
61 | |
62 | |
63 | |
16 | 64 enum state {// host name recognizer states |
65 h_init, | |
66 h_host, | |
67 | |
68 // html tag discarder states | |
69 t_init, | |
24 | 70 t_tag, |
16 | 71 t_disc, |
72 | |
73 // url recognizer states | |
7 | 74 u_init, |
75 u_http, | |
76 u_sla, | |
77 u_url, | |
6 | 78 |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
79 // url decoder states %xx |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
80 d_init, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
81 d_pcnt, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
82 d_1, |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
83 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
84 // html entity decoder states &#nnn; |
7 | 85 e_init, |
86 e_amp, | |
87 e_num, | |
88 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
89 // mime decoder states =xx |
7 | 90 m_init, |
6 | 91 m_eq, |
92 m_1, | |
93 | |
7 | 94 // base64 decoder states |
95 b_init, | |
6 | 96 b_lf, |
97 b_lf2, | |
98 b_64, | |
99 | |
7 | 100 // counter for number of columns in the table |
101 end_state, | |
6 | 102 |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
103 // temporary states |
16 | 104 h_end, |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
105 t_bin, |
16 | 106 t_end, |
7 | 107 u_reco, |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
108 d_2, |
7 | 109 e_semi, |
110 m_2, | |
6 | 111 m_cr, |
112 m_nl, | |
113 b_cr, | |
114 }; | |
115 | |
116 typedef state PARSE[end_state]; | |
117 | |
118 static PARSE parse_table[256] = { | |
24 | 119 // h_init, h_host, t_init, t_tag, t_disc, u_init, u_http, u_sla , u_url, d_init, d_pcnt, d_1, e_init, e_amp, e_num, m_init, m_eq, m_1, b_init, b_lf, b_lf2, b_64 |
6 | 120 |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
121 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x00 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
122 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x01 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
123 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x02 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
124 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x03 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
125 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x04 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
126 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x05 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
127 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x06 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
128 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x07 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
129 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x08 |
24 | 130 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x09 <tab> |
131 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_nl, m_init, b_lf, b_init, b_lf2, b_init, }, // 0x0a <lf> | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
132 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
133 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0c |
24 | 134 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_cr, m_init, b_init, b_init, b_init, b_cr, }, // 0x0d <cr> |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
135 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
136 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
137 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x10 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
138 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x11 xon char |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
139 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x12 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
140 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x13 xoff char |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
141 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x14 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
142 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x15 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
143 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x16 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
144 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x17 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
145 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x18 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
146 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x19 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
147 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
148 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
149 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
150 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
151 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
152 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
153 {h_init, h_end, t_init, t_end, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x20 space |
24 | 154 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x21 ! |
155 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x22 "" | |
156 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_num, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x23 # | |
157 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x24 $ | |
158 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_pcnt, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x25 % | |
159 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_amp, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x26 & | |
160 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x27 ' | |
161 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x28 ( | |
162 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x29 ) | |
163 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2A * | |
164 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2B + | |
165 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2C , | |
166 {h_host, h_host, t_init, t_disc, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2D - | |
167 {h_host, h_host, t_init, t_disc, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2E . | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
168 {h_init, h_end, t_init, t_tag, t_disc, u_init, u_sla, u_sla, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2F / |
24 | 169 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x30 0 |
170 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x31 1 | |
171 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x32 2 | |
172 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x33 3 | |
173 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x34 4 | |
174 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x35 5 | |
175 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x36 6 | |
176 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x37 7 | |
177 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x38 8 | |
178 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x39 9 | |
179 {h_init, h_end, t_init, t_disc, t_disc, u_http, u_http, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3A : | |
180 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_semi, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3B ; | |
181 {h_init, h_end, t_tag, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3C < | |
182 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_eq, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x3D = | |
183 {h_init, h_end, t_init, t_end, t_end, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3E > | |
184 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3F ? | |
27
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
185 {h_init, h_host, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @ |
24 | 186 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A |
187 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B | |
188 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C | |
189 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x44 D | |
190 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x45 E | |
191 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x46 F | |
192 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x47 G | |
193 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x48 H | |
194 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x49 I | |
195 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4A J | |
196 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4B K | |
197 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4C L | |
198 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4D M | |
199 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4E N | |
200 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4F O | |
201 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x50 P | |
202 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x51 Q | |
203 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x52 R | |
204 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x53 S | |
205 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x54 T | |
206 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U | |
207 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V | |
208 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W | |
209 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X | |
210 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x59 Y | |
211 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x5A Z | |
212 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5B [ | |
213 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5C brace | |
214 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5D ] | |
215 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5E ^ | |
216 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5F _ | |
217 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x60 ` | |
218 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x61 a | |
219 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x62 b | |
220 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x63 c | |
221 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x64 d | |
222 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x65 e | |
223 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x66 f | |
224 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x67 g | |
225 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x68 h | |
226 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x69 i | |
227 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6A j | |
228 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6B k | |
229 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6C l | |
230 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6D m | |
231 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6E n | |
232 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6F o | |
233 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x70 p | |
234 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x71 q | |
235 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x72 r | |
236 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x73 s | |
237 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x74 t | |
238 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u | |
239 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v | |
240 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w | |
241 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x | |
242 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x79 y | |
243 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x7A z | |
244 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7B { | |
245 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7C | | |
246 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7D } | |
247 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7E ~ | |
248 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7f | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
249 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x80 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
250 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x81 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
251 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x82 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
252 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x83 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
253 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x84 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
254 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x85 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
255 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x86 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
256 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x87 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
257 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x88 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
258 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x89 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
259 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
260 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
261 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
262 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
263 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
264 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
265 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x90 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
266 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x91 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
267 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x92 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
268 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x93 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
269 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x94 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
270 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x95 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
271 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x96 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
272 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x97 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
273 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x98 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
274 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x99 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
275 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9a |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
276 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9b |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
277 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9c |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
278 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9d |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
279 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9e |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
280 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9f |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
281 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
282 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
283 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
284 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
285 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
286 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
287 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
288 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
289 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
290 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
291 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaa |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
292 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xab |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
293 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xac |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
294 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xad |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
295 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xae |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
296 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
297 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
298 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
299 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
300 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
301 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
302 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
303 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
304 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
305 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
306 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
307 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xba |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
308 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
309 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
310 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
311 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbe |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
312 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
313 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
314 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
315 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
316 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
317 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
318 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
319 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
320 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
321 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
322 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
323 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xca |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
324 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
325 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
326 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
327 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xce |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
328 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
329 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
330 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
331 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
332 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
333 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
334 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
335 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
336 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
337 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
338 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
339 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xda |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
340 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
341 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
342 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
343 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xde |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
344 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdf |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
345 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
346 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
347 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
348 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
349 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
350 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
351 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
352 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
353 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
354 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
355 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xea |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
356 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xeb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
357 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xec |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
358 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xed |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
359 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xee |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
360 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xef |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
361 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf0 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
362 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf1 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
363 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf2 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
364 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf3 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
365 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf4 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
366 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf5 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
367 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf6 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
368 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf7 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
369 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf8 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
370 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf9 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
371 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfa |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
372 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfb |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
373 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfc |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
374 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfd |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
375 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfe |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
376 {h_init, h_end, t_init, t_bin, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xff |
6 | 377 }; |
378 | |
379 | |
380 u_char hex_decode[256] = { | |
381 0, // 0x00 | |
382 0, // 0x01 | |
383 0, // 0x02 | |
384 0, // 0x03 | |
385 0, // 0x04 | |
386 0, // 0x05 | |
387 0, // 0x06 | |
388 0, // 0x07 | |
389 0, // 0x08 | |
7 | 390 0, // 0x09 <tab> |
391 0, // 0x0a <lf> | |
6 | 392 0, // 0x0b |
393 0, // 0x0c | |
7 | 394 0, // 0x0d <cr> |
6 | 395 0, // 0x0e |
396 0, // 0x0f | |
397 0, // 0x10 | |
398 0, // 0x11 xon char | |
399 0, // 0x12 | |
400 0, // 0x13 xoff char | |
401 0, // 0x14 | |
402 0, // 0x15 | |
403 0, // 0x16 | |
404 0, // 0x17 | |
405 0, // 0x18 | |
406 0, // 0x19 | |
407 0, // 0x1a | |
408 0, // 0x1b | |
409 0, // 0x1c | |
410 0, // 0x1d | |
411 0, // 0x1e | |
412 0, // 0x1f | |
413 0, // 0x20 space | |
414 0, // 0x21 ! | |
415 0, // 0x22 "" | |
416 0, // 0x23 # | |
417 0, // 0x24 $ | |
418 0, // 0x25 % | |
419 0, // 0x26 & | |
420 0, // 0x27 ' | |
421 0, // 0x28 ( | |
422 0, // 0x29 ) | |
423 0, // 0x2A * | |
424 0, // 0x2B + | |
425 0, // 0x2C , | |
426 0, // 0x2D - | |
427 0, // 0x2E . | |
428 0, // 0x2F / | |
429 0, // 0x30 0 | |
430 1, // 0x31 1 | |
431 2, // 0x32 2 | |
432 3, // 0x33 3 | |
433 4, // 0x34 4 | |
434 5, // 0x35 5 | |
435 6, // 0x36 6 | |
436 7, // 0x37 7 | |
437 8, // 0x38 8 | |
438 9, // 0x39 9 | |
439 0, // 0x3A : | |
440 0, // 0x3B ; | |
441 0, // 0x3C < | |
442 0, // 0x3D = | |
443 0, // 0x3E > | |
444 0, // 0x3F ? | |
445 0, // 0x40 @ | |
446 10, // 0x41 A | |
447 11, // 0x42 B | |
448 12, // 0x43 C | |
449 13, // 0x44 D | |
450 14, // 0x45 E | |
451 15, // 0x46 F | |
452 0, // 0x47 G | |
453 0, // 0x48 H | |
454 0, // 0x49 I | |
455 0, // 0x4A J | |
456 0, // 0x4B K | |
457 0, // 0x4C L | |
458 0, // 0x4D M | |
459 0, // 0x4E N | |
460 0, // 0x4F O | |
461 0, // 0x50 P | |
462 0, // 0x51 Q | |
463 0, // 0x52 R | |
464 0, // 0x53 S | |
465 0, // 0x54 T | |
466 0, // 0x55 U | |
467 0, // 0x56 V | |
468 0, // 0x57 W | |
469 0, // 0x58 X | |
470 0, // 0x59 Y | |
471 0, // 0x5A Z | |
472 0, // 0x5B [ | |
473 0, // 0x5C brace | |
474 0, // 0x5D ] | |
475 0, // 0x5E ^ | |
476 0, // 0x5F _ | |
477 0, // 0x60 ` | |
478 10, // 0x61 a | |
479 11, // 0x62 b | |
480 12, // 0x63 c | |
481 13, // 0x64 d | |
482 14, // 0x65 e | |
483 15, // 0x66 f | |
484 0, // 0x67 g | |
485 0, // 0x68 h | |
486 0, // 0x69 i | |
487 0, // 0x6A j | |
488 0, // 0x6B k | |
489 0, // 0x6C l | |
490 0, // 0x6D m | |
491 0, // 0x6E n | |
492 0, // 0x6F o | |
493 0, // 0x70 p | |
494 0, // 0x71 q | |
495 0, // 0x72 r | |
496 0, // 0x73 s | |
497 0, // 0x74 t | |
498 0, // 0x75 u | |
499 0, // 0x76 v | |
500 0, // 0x77 w | |
501 0, // 0x78 x | |
502 0, // 0x79 y | |
503 0, // 0x7A z | |
504 0, // 0x7B { | |
505 0, // 0x7C | | |
506 0, // 0x7D } | |
507 0, // 0x7E ~ | |
508 0, // 0x7f | |
509 0, // 0x80 | |
510 0, // 0x81 | |
511 0, // 0x82 | |
512 0, // 0x83 | |
513 0, // 0x84 | |
514 0, // 0x85 | |
515 0, // 0x86 | |
516 0, // 0x87 | |
517 0, // 0x88 | |
518 0, // 0x89 | |
519 0, // 0x8a | |
520 0, // 0x8b | |
521 0, // 0x8c | |
522 0, // 0x8d | |
523 0, // 0x8e | |
524 0, // 0x8f | |
525 0, // 0x90 | |
526 0, // 0x91 | |
527 0, // 0x92 | |
528 0, // 0x93 | |
529 0, // 0x94 | |
530 0, // 0x95 | |
531 0, // 0x96 | |
532 0, // 0x97 | |
533 0, // 0x98 | |
534 0, // 0x99 | |
535 0, // 0x9a | |
536 0, // 0x9b | |
537 0, // 0x9c | |
538 0, // 0x9d | |
539 0, // 0x9e | |
540 0, // 0x9f | |
541 0, // 0xa0 | |
542 0, // 0xa1 | |
543 0, // 0xa2 | |
544 0, // 0xa3 | |
545 0, // 0xa4 | |
546 0, // 0xa5 | |
547 0, // 0xa6 | |
548 0, // 0xa7 | |
549 0, // 0xa8 | |
550 0, // 0xa9 | |
551 0, // 0xaa | |
552 0, // 0xab | |
553 0, // 0xac | |
554 0, // 0xad | |
555 0, // 0xae | |
556 0, // 0xaf | |
557 0, // 0xb0 | |
558 0, // 0xb1 | |
559 0, // 0xb2 | |
560 0, // 0xb3 | |
561 0, // 0xb4 | |
562 0, // 0xb5 | |
563 0, // 0xb6 | |
564 0, // 0xb7 | |
565 0, // 0xb8 | |
566 0, // 0xb9 | |
567 0, // 0xba | |
568 0, // 0xbb | |
569 0, // 0xbc | |
570 0, // 0xbd | |
571 0, // 0xbe | |
572 0, // 0xbf | |
573 0, // 0xc0 | |
574 0, // 0xc1 | |
575 0, // 0xc2 | |
576 0, // 0xc3 | |
577 0, // 0xc4 | |
578 0, // 0xc5 | |
579 0, // 0xc6 | |
580 0, // 0xc7 | |
581 0, // 0xc8 | |
582 0, // 0xc9 | |
583 0, // 0xca | |
584 0, // 0xcb | |
585 0, // 0xcc | |
586 0, // 0xcd | |
587 0, // 0xce | |
588 0, // 0xcf | |
589 0, // 0xd0 | |
590 0, // 0xd1 | |
591 0, // 0xd2 | |
592 0, // 0xd3 | |
593 0, // 0xd4 | |
594 0, // 0xd5 | |
595 0, // 0xd6 | |
596 0, // 0xd7 | |
597 0, // 0xd8 | |
598 0, // 0xd9 | |
599 0, // 0xda | |
600 0, // 0xdb | |
601 0, // 0xdc | |
602 0, // 0xdd | |
603 0, // 0xde | |
604 0, // 0xdf | |
605 0, // 0xe0 | |
606 0, // 0xe1 | |
607 0, // 0xe2 | |
608 0, // 0xe3 | |
609 0, // 0xe4 | |
610 0, // 0xe5 | |
611 0, // 0xe6 | |
612 0, // 0xe7 | |
613 0, // 0xe8 | |
614 0, // 0xe9 | |
615 0, // 0xea | |
616 0, // 0xeb | |
617 0, // 0xec | |
618 0, // 0xed | |
619 0, // 0xee | |
620 0, // 0xef | |
621 0, // 0xf0 | |
622 0, // 0xf1 | |
623 0, // 0xf2 | |
624 0, // 0xf3 | |
625 0, // 0xf4 | |
626 0, // 0xf5 | |
627 0, // 0xf6 | |
628 0, // 0xf7 | |
629 0, // 0xf8 | |
630 0, // 0xf9 | |
631 0, // 0xfa | |
632 0, // 0xfb | |
633 0, // 0xfc | |
634 0, // 0xfd | |
635 0, // 0xfe | |
636 0, // 0xff | |
637 }; | |
7 | 638 |
6 | 639 u_char b64_decode[256] = { |
640 0, // 0x00 | |
641 0, // 0x01 | |
642 0, // 0x02 | |
643 0, // 0x03 | |
644 0, // 0x04 | |
645 0, // 0x05 | |
646 0, // 0x06 | |
647 0, // 0x07 | |
648 0, // 0x08 | |
7 | 649 0, // 0x09 <tab> |
650 0, // 0x0a <lf> | |
6 | 651 0, // 0x0b |
652 0, // 0x0c | |
7 | 653 0, // 0x0d <cr> |
6 | 654 0, // 0x0e |
655 0, // 0x0f | |
656 0, // 0x10 | |
657 0, // 0x11 xon char | |
658 0, // 0x12 | |
659 0, // 0x13 xoff char | |
660 0, // 0x14 | |
661 0, // 0x15 | |
662 0, // 0x16 | |
663 0, // 0x17 | |
664 0, // 0x18 | |
665 0, // 0x19 | |
666 0, // 0x1a | |
667 0, // 0x1b | |
668 0, // 0x1c | |
669 0, // 0x1d | |
670 0, // 0x1e | |
671 0, // 0x1f | |
672 0, // 0x20 space | |
673 0, // 0x21 ! | |
674 0, // 0x22 "" | |
675 0, // 0x23 # | |
676 0, // 0x24 $ | |
677 0, // 0x25 % | |
678 0, // 0x26 & | |
679 0, // 0x27 ' | |
680 0, // 0x28 ( | |
681 0, // 0x29 ) | |
682 0, // 0x2A * | |
683 62, // 0x2B + | |
684 0, // 0x2C , | |
685 0, // 0x2D - | |
686 0, // 0x2E . | |
687 63, // 0x2F / | |
688 52, // 0x30 0 | |
689 53, // 0x31 1 | |
690 54, // 0x32 2 | |
691 55, // 0x33 3 | |
692 56, // 0x34 4 | |
693 57, // 0x35 5 | |
694 58, // 0x36 6 | |
695 59, // 0x37 7 | |
696 60, // 0x38 8 | |
697 61, // 0x39 9 | |
698 0, // 0x3A : | |
699 0, // 0x3B ; | |
700 0, // 0x3C < | |
701 0, // 0x3D = | |
702 0, // 0x3E > | |
703 0, // 0x3F ? | |
704 0, // 0x40 @ | |
7 | 705 0, // 0x41 A |
706 1, // 0x42 B | |
707 2, // 0x43 C | |
708 3, // 0x44 D | |
709 4, // 0x45 E | |
710 5, // 0x46 F | |
711 6, // 0x47 G | |
712 7, // 0x48 H | |
713 8, // 0x49 I | |
714 9, // 0x4A J | |
6 | 715 10, // 0x4B K |
716 11, // 0x4C L | |
717 12, // 0x4D M | |
718 13, // 0x4E N | |
719 14, // 0x4F O | |
720 15, // 0x50 P | |
721 16, // 0x51 Q | |
722 17, // 0x52 R | |
723 18, // 0x53 S | |
724 19, // 0x54 T | |
725 20, // 0x55 U | |
726 21, // 0x56 V | |
727 22, // 0x57 W | |
728 23, // 0x58 X | |
729 24, // 0x59 Y | |
730 25, // 0x5A Z | |
731 0, // 0x5B [ | |
732 0, // 0x5C brace | |
733 0, // 0x5D ] | |
734 0, // 0x5E ^ | |
735 0, // 0x5F _ | |
736 0, // 0x60 ` | |
737 26, // 0x61 a | |
738 27, // 0x62 b | |
739 28, // 0x63 c | |
740 29, // 0x64 d | |
741 30, // 0x65 e | |
742 31, // 0x66 f | |
743 32, // 0x67 g | |
744 33, // 0x68 h | |
745 34, // 0x69 i | |
746 35, // 0x6A j | |
747 36, // 0x6B k | |
748 37, // 0x6C l | |
749 38, // 0x6D m | |
750 39, // 0x6E n | |
751 40, // 0x6F o | |
752 41, // 0x70 p | |
753 42, // 0x71 q | |
754 43, // 0x72 r | |
755 44, // 0x73 s | |
756 45, // 0x74 t | |
757 46, // 0x75 u | |
758 47, // 0x76 v | |
759 48, // 0x77 w | |
760 49, // 0x78 x | |
761 50, // 0x79 y | |
762 51, // 0x7A z | |
763 0, // 0x7B { | |
764 0, // 0x7C | | |
765 0, // 0x7D } | |
766 0, // 0x7E ~ | |
767 0, // 0x7f | |
768 0, // 0x80 | |
769 0, // 0x81 | |
770 0, // 0x82 | |
771 0, // 0x83 | |
772 0, // 0x84 | |
773 0, // 0x85 | |
774 0, // 0x86 | |
775 0, // 0x87 | |
776 0, // 0x88 | |
777 0, // 0x89 | |
778 0, // 0x8a | |
779 0, // 0x8b | |
780 0, // 0x8c | |
781 0, // 0x8d | |
782 0, // 0x8e | |
783 0, // 0x8f | |
784 0, // 0x90 | |
785 0, // 0x91 | |
786 0, // 0x92 | |
787 0, // 0x93 | |
788 0, // 0x94 | |
789 0, // 0x95 | |
790 0, // 0x96 | |
791 0, // 0x97 | |
792 0, // 0x98 | |
793 0, // 0x99 | |
794 0, // 0x9a | |
795 0, // 0x9b | |
796 0, // 0x9c | |
797 0, // 0x9d | |
798 0, // 0x9e | |
799 0, // 0x9f | |
800 0, // 0xa0 | |
801 0, // 0xa1 | |
802 0, // 0xa2 | |
803 0, // 0xa3 | |
804 0, // 0xa4 | |
805 0, // 0xa5 | |
806 0, // 0xa6 | |
807 0, // 0xa7 | |
808 0, // 0xa8 | |
809 0, // 0xa9 | |
810 0, // 0xaa | |
811 0, // 0xab | |
812 0, // 0xac | |
813 0, // 0xad | |
814 0, // 0xae | |
815 0, // 0xaf | |
816 0, // 0xb0 | |
817 0, // 0xb1 | |
818 0, // 0xb2 | |
819 0, // 0xb3 | |
820 0, // 0xb4 | |
821 0, // 0xb5 | |
822 0, // 0xb6 | |
823 0, // 0xb7 | |
824 0, // 0xb8 | |
825 0, // 0xb9 | |
826 0, // 0xba | |
827 0, // 0xbb | |
828 0, // 0xbc | |
829 0, // 0xbd | |
830 0, // 0xbe | |
831 0, // 0xbf | |
832 0, // 0xc0 | |
833 0, // 0xc1 | |
834 0, // 0xc2 | |
835 0, // 0xc3 | |
836 0, // 0xc4 | |
837 0, // 0xc5 | |
838 0, // 0xc6 | |
839 0, // 0xc7 | |
840 0, // 0xc8 | |
841 0, // 0xc9 | |
842 0, // 0xca | |
843 0, // 0xcb | |
844 0, // 0xcc | |
845 0, // 0xcd | |
846 0, // 0xce | |
847 0, // 0xcf | |
848 0, // 0xd0 | |
849 0, // 0xd1 | |
850 0, // 0xd2 | |
851 0, // 0xd3 | |
852 0, // 0xd4 | |
853 0, // 0xd5 | |
854 0, // 0xd6 | |
855 0, // 0xd7 | |
856 0, // 0xd8 | |
857 0, // 0xd9 | |
858 0, // 0xda | |
859 0, // 0xdb | |
860 0, // 0xdc | |
861 0, // 0xdd | |
862 0, // 0xde | |
863 0, // 0xdf | |
864 0, // 0xe0 | |
865 0, // 0xe1 | |
866 0, // 0xe2 | |
867 0, // 0xe3 | |
868 0, // 0xe4 | |
869 0, // 0xe5 | |
870 0, // 0xe6 | |
871 0, // 0xe7 | |
872 0, // 0xe8 | |
873 0, // 0xe9 | |
874 0, // 0xea | |
875 0, // 0xeb | |
876 0, // 0xec | |
877 0, // 0xed | |
878 0, // 0xee | |
879 0, // 0xef | |
880 0, // 0xf0 | |
881 0, // 0xf1 | |
882 0, // 0xf2 | |
883 0, // 0xf3 | |
884 0, // 0xf4 | |
885 0, // 0xf5 | |
886 0, // 0xf6 | |
887 0, // 0xf7 | |
888 0, // 0xf8 | |
889 0, // 0xf9 | |
890 0, // 0xfa | |
891 0, // 0xfb | |
892 0, // 0xfc | |
893 0, // 0xfd | |
894 0, // 0xfe | |
895 0, // 0xff | |
896 }; | |
897 | |
8 | 898 #define PENDING_LIMIT 100 |
6 | 899 struct fsa { |
12 | 900 u_char pending[PENDING_LIMIT]; |
901 int count; | |
902 state st; | |
903 state init; | |
16 | 904 fsa* next1; |
905 fsa* next2; | |
24 | 906 recorder *memory; |
6 | 907 |
24 | 908 fsa(state init, fsa* next1_, fsa* next2_, recorder *memory_); |
6 | 909 void push(u_char *buf, int len); |
16 | 910 void pusher(); |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
911 void error(char *err); |
6 | 912 }; |
913 | |
24 | 914 fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) { |
915 count = 0; | |
916 st = init_; | |
917 init = init_; | |
918 next1 = next1_; | |
919 next2 = next2_; | |
920 memory = memory_; | |
6 | 921 } |
922 | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
923 void fsa::error(char *err) { |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
924 count = 0; |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
925 st = init; |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
926 if (err) my_syslog(err); |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
927 } |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
928 |
16 | 929 void fsa::pusher() { |
930 if (next1) next1->push(pending, count); | |
931 if (next2) next2->push(pending, count); | |
932 count = 0; | |
933 } | |
934 | |
6 | 935 void fsa::push(u_char *buf, int len) { |
936 for (int i=0; i<len; i++) { | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
937 if (count == (PENDING_LIMIT-1)) error(NULL); |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
938 if (st >= end_state) error("finite state machine impossible state"); |
6 | 939 u_char c = buf[i]; |
940 pending[count++] = c; | |
941 st = parse_table[c][st]; | |
942 switch (st) { | |
943 | |
944 ////////////////////////////// | |
16 | 945 // host name recognizer |
946 case h_end: { | |
18 | 947 pending[--count] = '\0'; // null terminate host name by overwriting the terminator |
27
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
948 if (!strchr((const char *)pending, '@')) { |
43a4f6b3e668
add configurable host name limit and bad html tag limits.
carl
parents:
25
diff
changeset
|
949 // not an email address or message id |
28 | 950 char *p1 = strchr((const char *)pending, '.'); |
951 char *p2 = strrchr((const char *)pending, '.'); | |
952 if (p1 && (p1 != p2)) { | |
953 // have two periods, so three components | |
954 for (int i=1; i<count; i++) pending[i] = tolower(pending[i]); | |
955 // is last component a tld? | |
956 string_set::iterator i = memory->tlds->find(p2); | |
957 if (i != memory->tlds->end()) memory->new_url((char*)pending); | |
16 | 958 } |
959 } | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
960 st = h_init; |
16 | 961 } // fall thru |
962 | |
963 case h_init: { | |
18 | 964 count = 0; |
16 | 965 } break; |
966 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
967 |
16 | 968 ////////////////////////////// |
18 | 969 // html tag discarder |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
970 case t_bin: { |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
971 memory->binary(); |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
972 st = t_disc; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
973 count = 0; // discard all characters |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
974 } break; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
975 |
18 | 976 case t_end: { |
24 | 977 if (count > 2) { |
978 pending[--count] = '\0'; // null terminate html tag | |
979 for (int i=1; i<count; i++) pending[i] = tolower(pending[i]); | |
980 memory->new_tag((char*)pending+1); | |
981 } | |
25
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
982 st = (c == ' ') ? t_disc : t_init; |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
983 } // fall thru |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
984 |
6176e7b2e8af
better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents:
24
diff
changeset
|
985 case t_disc: { |
18 | 986 count = 0; // discard all characters |
16 | 987 } break; |
988 | |
989 case t_init: { | |
990 pusher(); | |
991 } break; | |
992 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
993 |
16 | 994 ////////////////////////////// |
7 | 995 // url recognizer |
996 case u_reco: { | |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
997 if (count > 13) { // need some minimal length host name after the protocol |
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
998 pending[--count] = '\0'; // null terminate host name by overwriting the terminator |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
999 char *p = strrchr((const char *)pending, '/'); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1000 if (p && // have a leading / |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1001 strchr(p, '.') && // require at least one . in a dns name |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1002 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol |
24 | 1003 memory->new_url(++p); // we seem to have a host name, skip the last / |
9 | 1004 } |
8 | 1005 } |
19
b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
carl
parents:
18
diff
changeset
|
1006 st = u_init; |
16 | 1007 } // fall thru |
7 | 1008 |
1009 case u_init: { | |
1010 count = 0; // discard all characters | |
1011 } break; | |
1012 | |
1013 | |
1014 ////////////////////////////// | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1015 // url decoder %xx |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1016 case d_2: { |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1017 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]]; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1018 count = 1; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1019 st = d_init; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1020 } // fall thru |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1021 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1022 case d_init: { |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1023 pusher(); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1024 } break; |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1025 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1026 |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1027 ////////////////////////////// |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1028 // html entity decoder &#nnn; |
6 | 1029 case e_semi: { |
1030 pending[--count] = '\0'; // null terminate the digit string by overwriting the semicolon | |
1031 pending[0] = atoi((const char *)pending+2); | |
1032 count = 1; | |
1033 st = e_init; | |
1034 } // fall thru | |
1035 | |
1036 case e_init: { | |
16 | 1037 pusher(); |
1038 } break; | |
1039 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1040 |
16 | 1041 ////////////////////////////// |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1042 // mime decoder =xx |
16 | 1043 case m_2: { |
1044 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]]; | |
1045 count = 1; | |
1046 st = m_init; | |
1047 } // fall thru | |
1048 | |
1049 case m_init: { | |
1050 pusher(); | |
1051 } break; | |
1052 | |
1053 case m_cr: { | |
1054 count = 1; | |
1055 st = m_eq; | |
1056 } break; | |
1057 | |
1058 case m_nl: { | |
6 | 1059 count = 0; |
16 | 1060 st = m_init; |
6 | 1061 } break; |
1062 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1063 |
6 | 1064 ////////////////////////////// |
1065 // base64 decoder | |
1066 case b_lf2: { | |
1067 count--; | |
1068 } break; | |
1069 | |
1070 case b_cr: { | |
1071 int cnt = 0; | |
1072 if ((count % 4) == 1) { | |
1073 count--; | |
1074 // might have proper b64 data | |
1075 for (int i=0; i<count; i+=4) { | |
1076 unsigned long a1 = b64_decode[pending[i]]; | |
1077 unsigned long a2 = b64_decode[pending[i+1]]; | |
1078 unsigned long a3 = b64_decode[pending[i+2]]; | |
1079 unsigned long a4 = b64_decode[pending[i+3]]; | |
1080 unsigned long a = (a1 << 18) | (a2 << 12) | (a3 << 6) | a4; | |
1081 pending[cnt++] = (a & 0x00ff0000) >> 16; | |
1082 pending[cnt++] = (a & 0x0000ff00) >> 8; | |
1083 pending[cnt++] = (a & 0x000000ff); | |
1084 if ((char)pending[i+3] == '=') cnt--; | |
1085 if ((char)pending[i+2] == '=') cnt--; | |
1086 } | |
1087 count = cnt; | |
1088 st = b_lf2; | |
1089 } | |
1090 else st = b_init; | |
1091 } // fall thru | |
1092 | |
1093 case b_lf: | |
1094 case b_init: { | |
16 | 1095 pusher(); |
6 | 1096 } break; |
1097 | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1098 |
6 | 1099 ////////////////////////////// |
1100 // states that just accumulate characters in the pending buffer | |
16 | 1101 case h_host: |
24 | 1102 case t_tag: |
7 | 1103 case u_http: |
1104 case u_url: | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1105 case u_sla: |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1106 case d_pcnt: |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1107 case d_1: |
6 | 1108 case e_amp: |
1109 case e_num: | |
1110 case m_eq: | |
1111 case m_1: | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1112 case b_64: |
6 | 1113 default: { |
1114 } break; | |
1115 } | |
1116 } | |
1117 } | |
1118 | |
8 | 1119 struct url_scanner { |
16 | 1120 fsa *host_parser; |
1121 fsa *tags_parser; | |
8 | 1122 fsa *urls_parser; |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1123 fsa *urld_parser; |
8 | 1124 fsa *html_parser; |
1125 fsa *mime_parser; | |
1126 fsa *b64_parser; | |
6 | 1127 |
24 | 1128 url_scanner(recorder *memory); |
8 | 1129 ~url_scanner(); |
1130 void scan(u_char *buffer, size_t length); | |
1131 }; | |
1132 | |
24 | 1133 url_scanner::url_scanner(recorder *memory) { |
1134 host_parser = new fsa(h_init, NULL, NULL, memory); | |
1135 tags_parser = new fsa(t_init, host_parser, NULL, memory); | |
1136 urls_parser = new fsa(u_init, NULL, NULL, memory); | |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1137 urld_parser = new fsa(d_init, urls_parser, tags_parser, NULL); |
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1138 html_parser = new fsa(e_init, urld_parser, NULL, NULL); |
16 | 1139 mime_parser = new fsa(m_init, html_parser, NULL, NULL); |
1140 b64_parser = new fsa(b_init, mime_parser, NULL, NULL); | |
6 | 1141 } |
1142 | |
8 | 1143 url_scanner::~url_scanner() { |
16 | 1144 delete host_parser; |
1145 delete tags_parser; | |
8 | 1146 delete urls_parser; |
23
06de5ab6a232
add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents:
19
diff
changeset
|
1147 delete urld_parser; |
8 | 1148 delete html_parser; |
1149 delete mime_parser; | |
1150 delete b64_parser; | |
6 | 1151 } |
1152 | |
8 | 1153 void url_scanner::scan(u_char *buffer, size_t length) { |
1154 b64_parser->push(buffer, length); | |
6 | 1155 } |
1156 |