annotate src/scanner.cpp @ 32:ccb02fc678aa

new version
author carl
date Sat, 29 May 2004 15:05:45 -0700
parents d16b27371533
children ce229348cdbe
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1 /*
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
2
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
3 Copyright (c) 2004 Carl Byington - 510 Software Group, released under
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
4 the GPL version 2 or any later version at your choice available at
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
5 http://www.fsf.org/licenses/gpl.txt
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
6
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
7 */
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
8
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
9 static char* scanner_version="$Id$";
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
10
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
11 using namespace std;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
12
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
13
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
14 // object to record things we see in the body content
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
15 struct recorder
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
16 {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
17 string_set *html_tags; // valid tags
28
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
18 string_set *tlds; // valid tlds
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
19 string_set hosts;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
20 int bad_html_tags;
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
21 int binary_tags;
28
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
22 recorder(string_set *html_tags_, string_set *tlds_);
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
23 ~recorder();
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
24 void empty();
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
25 void new_url(char *host);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
26 void new_tag(char *tag);
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
27 void binary();
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
28 };
28
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
29 recorder::recorder(string_set *html_tags_, string_set *tlds_) {
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
30 html_tags = html_tags_;
28
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
31 tlds = tlds_;
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
32 bad_html_tags = 0;
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
33 binary_tags = 0;
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
34 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
35 recorder::~recorder() {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
36 empty();
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
37 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
38 void recorder::empty() {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
39 bad_html_tags = 0;
28
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
40 binary_tags = 0;
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
41 discard(hosts);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
42 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
43 void recorder::new_url(char *host) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
44 register_string(hosts, host);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
45 }
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
46 void recorder::binary() {
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
47 binary_tags++;
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
48 }
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
49 void recorder::new_tag(char *tag) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
50 string_set::iterator i = html_tags->find(tag);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
51 if (i == html_tags->end()) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
52 bad_html_tags++;
28
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
53 if (debug_syslog && (bad_html_tags < 10) && (binary_tags < 10)) {
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
54 // only log the first 10 bad tags
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
55 char buf[200];
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
56 snprintf(buf, sizeof(buf), "bad html tag %s", tag);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
57 my_syslog(buf);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
58 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
59 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
60 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
61
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
62
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
63
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
64 enum state {// host name recognizer states
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
65 h_init,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
66 h_host,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
67
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
68 // html tag discarder states
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
69 t_init,
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
70 t_tag1, // seen opening <
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
71 t_tag2, // not comment
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
72 t_com1, // seen !
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
73 t_com2, // seen first -
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
74 t_com3, // seen second -, looking for -->
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
75 t_com4, // seen first -
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
76 t_com5, // seen second -
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
77 t_disc, // looking for closing >
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
78
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
79 // url recognizer states
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
80 u_init,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
81 u_http,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
82 u_sla,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
83 u_url,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
84
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
85 // url decoder states %xx
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
86 d_init,
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
87 d_pcnt,
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
88 d_1,
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
89
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
90 // html entity decoder states &#nnn;
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
91 e_init,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
92 e_amp,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
93 e_num,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
94
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
95 // mime decoder states =xx
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
96 m_init,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
97 m_eq,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
98 m_1,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
99
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
100 // base64 decoder states
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
101 b_init,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
102 b_lf,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
103 b_lf2,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
104 b_64,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
105
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
106 // counter for number of columns in the table
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
107 end_state,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
108
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
109 // temporary states
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
110 h_end,
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
111 t_bin,
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
112 t_end,
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
113 u_reco,
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
114 d_2,
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
115 e_semi,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
116 m_2,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
117 m_cr,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
118 m_nl,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
119 b_cr,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
120 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
121
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
122 typedef state PARSE[end_state];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
123
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
124 static PARSE parse_table[256] = {
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
125 // h_init, h_host, t_init, t_tag1, t_tag2, t_com1, t_com2, t_com3, t_com4, t_com5, t_disc, u_init, u_http, u_sla , u_url, d_init, d_pcnt, d_1, e_init, e_amp, e_num, m_init, m_eq, m_1, b_init, b_lf, b_lf2, b_64
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
126
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
127 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x00
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
128 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x01
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
129 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x02
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
130 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x03
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
131 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x04
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
132 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x05
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
133 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x06
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
134 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x07
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
135 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x08
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
136 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com5, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x09 <tab>
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
137 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com5, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_nl, m_init, b_lf, b_init, b_lf2, b_init, }, // 0x0a <lf>
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
138 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0b
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
139 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0c
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
140 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com5, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_cr, m_init, b_init, b_init, b_init, b_cr, }, // 0x0d <cr>
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
141 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0e
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
142 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0f
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
143 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x10
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
144 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x11 xon char
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
145 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x12
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
146 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x13 xoff char
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
147 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x14
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
148 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x15
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
149 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x16
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
150 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x17
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
151 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x18
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
152 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x19
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
153 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1a
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
154 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1b
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
155 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1c
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
156 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1d
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
157 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1e
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
158 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1f
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
159 {h_init, h_end, t_init, t_end, t_end, t_end, t_end, t_com3, t_com3, t_com5, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x20 space
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
160 {h_init, h_end, t_init, t_com1, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x21 !
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
161 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x22 ""
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
162 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_num, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x23 #
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
163 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x24 $
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
164 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_pcnt, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x25 %
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
165 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_amp, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x26 &
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
166 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x27 '
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
167 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x28 (
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
168 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x29 )
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
169 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2A *
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
170 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2B +
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
171 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2C ,
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
172 {h_host, h_host, t_init, t_disc, t_disc, t_com2, t_com3, t_com4, t_com5, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2D -
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
173 {h_host, h_host, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2E .
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
174 {h_init, h_end, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_sla, u_sla, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2F /
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
175 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x30 0
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
176 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x31 1
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
177 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x32 2
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
178 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x33 3
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
179 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x34 4
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
180 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x35 5
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
181 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x36 6
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
182 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x37 7
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
183 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x38 8
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
184 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x39 9
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
185 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3A :
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
186 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_semi, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3B ;
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
187 {h_init, h_end, t_tag1, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3C <
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
188 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_eq, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x3D =
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
189 {h_init, h_end, t_init, t_end, t_end, t_end, t_end, t_end, t_end, t_end, t_end, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3E >
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
190 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3F ?
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
191 {h_init, h_host, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
192 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
193 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
194 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
195 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x44 D
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
196 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x45 E
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
197 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x46 F
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
198 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x47 G
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
199 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x48 H
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
200 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x49 I
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
201 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4A J
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
202 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4B K
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
203 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4C L
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
204 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4D M
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
205 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4E N
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
206 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4F O
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
207 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x50 P
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
208 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x51 Q
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
209 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x52 R
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
210 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x53 S
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
211 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x54 T
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
212 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
213 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
214 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
215 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
216 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x59 Y
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
217 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x5A Z
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
218 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5B [
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
219 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5C brace
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
220 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5D ]
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
221 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5E ^
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
222 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5F _
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
223 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x60 `
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
224 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x61 a
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
225 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x62 b
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
226 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x63 c
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
227 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x64 d
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
228 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x65 e
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
229 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x66 f
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
230 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x67 g
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
231 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x68 h
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
232 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x69 i
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
233 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6A j
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
234 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6B k
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
235 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6C l
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
236 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6D m
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
237 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6E n
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
238 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6F o
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
239 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x70 p
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
240 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x71 q
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
241 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x72 r
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
242 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x73 s
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
243 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x74 t
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
244 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
245 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
246 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
247 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
248 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x79 y
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
249 {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x7A z
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
250 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7B {
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
251 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7C |
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
252 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7D }
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
253 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7E ~
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
254 {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7f
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
255 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x80
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
256 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x81
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
257 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x82
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
258 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x83
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
259 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x84
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
260 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x85
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
261 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x86
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
262 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x87
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
263 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x88
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
264 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x89
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
265 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8a
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
266 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8b
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
267 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8c
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
268 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8d
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
269 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8e
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
270 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8f
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
271 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x90
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
272 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x91
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
273 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x92
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
274 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x93
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
275 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x94
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
276 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x95
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
277 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x96
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
278 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x97
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
279 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x98
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
280 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x99
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
281 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9a
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
282 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9b
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
283 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9c
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
284 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9d
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
285 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9e
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
286 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9f
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
287 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa0
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
288 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa1
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
289 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa2
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
290 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa3
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
291 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa4
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
292 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa5
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
293 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa6
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
294 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa7
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
295 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa8
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
296 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa9
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
297 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaa
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
298 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xab
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
299 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xac
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
300 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xad
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
301 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xae
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
302 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaf
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
303 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb0
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
304 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb1
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
305 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb2
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
306 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb3
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
307 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb4
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
308 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb5
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
309 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb6
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
310 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb7
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
311 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb8
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
312 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb9
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
313 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xba
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
314 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbb
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
315 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbc
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
316 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbd
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
317 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbe
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
318 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbf
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
319 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc0
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
320 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc1
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
321 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc2
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
322 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc3
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
323 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc4
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
324 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc5
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
325 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc6
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
326 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc7
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
327 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc8
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
328 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc9
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
329 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xca
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
330 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcb
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
331 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcc
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
332 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcd
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
333 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xce
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
334 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcf
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
335 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd0
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
336 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd1
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
337 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd2
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
338 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd3
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
339 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd4
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
340 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd5
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
341 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd6
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
342 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd7
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
343 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd8
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
344 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd9
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
345 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xda
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
346 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdb
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
347 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdc
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
348 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdd
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
349 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xde
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
350 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdf
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
351 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe0
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
352 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe1
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
353 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe2
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
354 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe3
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
355 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe4
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
356 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe5
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
357 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe6
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
358 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe7
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
359 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe8
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
360 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe9
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
361 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xea
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
362 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xeb
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
363 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xec
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
364 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xed
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
365 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xee
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
366 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xef
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
367 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf0
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
368 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf1
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
369 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf2
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
370 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf3
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
371 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf4
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
372 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf5
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
373 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf6
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
374 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf7
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
375 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf8
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
376 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf9
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
377 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfa
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
378 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfb
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
379 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfc
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
380 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfd
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
381 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfe
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
382 {h_init, h_end, t_init, t_bin, t_bin, t_bin, t_bin, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xff
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
383 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
384
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
385
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
386 u_char hex_decode[256] = {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
387 0, // 0x00
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
388 0, // 0x01
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
389 0, // 0x02
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
390 0, // 0x03
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
391 0, // 0x04
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
392 0, // 0x05
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
393 0, // 0x06
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
394 0, // 0x07
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
395 0, // 0x08
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
396 0, // 0x09 <tab>
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
397 0, // 0x0a <lf>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
398 0, // 0x0b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
399 0, // 0x0c
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
400 0, // 0x0d <cr>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
401 0, // 0x0e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
402 0, // 0x0f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
403 0, // 0x10
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
404 0, // 0x11 xon char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
405 0, // 0x12
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
406 0, // 0x13 xoff char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
407 0, // 0x14
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
408 0, // 0x15
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
409 0, // 0x16
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
410 0, // 0x17
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
411 0, // 0x18
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
412 0, // 0x19
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
413 0, // 0x1a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
414 0, // 0x1b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
415 0, // 0x1c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
416 0, // 0x1d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
417 0, // 0x1e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
418 0, // 0x1f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
419 0, // 0x20 space
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
420 0, // 0x21 !
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
421 0, // 0x22 ""
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
422 0, // 0x23 #
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
423 0, // 0x24 $
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
424 0, // 0x25 %
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
425 0, // 0x26 &
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
426 0, // 0x27 '
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
427 0, // 0x28 (
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
428 0, // 0x29 )
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
429 0, // 0x2A *
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
430 0, // 0x2B +
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
431 0, // 0x2C ,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
432 0, // 0x2D -
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
433 0, // 0x2E .
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
434 0, // 0x2F /
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
435 0, // 0x30 0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
436 1, // 0x31 1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
437 2, // 0x32 2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
438 3, // 0x33 3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
439 4, // 0x34 4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
440 5, // 0x35 5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
441 6, // 0x36 6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
442 7, // 0x37 7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
443 8, // 0x38 8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
444 9, // 0x39 9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
445 0, // 0x3A :
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
446 0, // 0x3B ;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
447 0, // 0x3C <
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
448 0, // 0x3D =
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
449 0, // 0x3E >
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
450 0, // 0x3F ?
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
451 0, // 0x40 @
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
452 10, // 0x41 A
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
453 11, // 0x42 B
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
454 12, // 0x43 C
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
455 13, // 0x44 D
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
456 14, // 0x45 E
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
457 15, // 0x46 F
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
458 0, // 0x47 G
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
459 0, // 0x48 H
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
460 0, // 0x49 I
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
461 0, // 0x4A J
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
462 0, // 0x4B K
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
463 0, // 0x4C L
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
464 0, // 0x4D M
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
465 0, // 0x4E N
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
466 0, // 0x4F O
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
467 0, // 0x50 P
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
468 0, // 0x51 Q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
469 0, // 0x52 R
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
470 0, // 0x53 S
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
471 0, // 0x54 T
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
472 0, // 0x55 U
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
473 0, // 0x56 V
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
474 0, // 0x57 W
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
475 0, // 0x58 X
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
476 0, // 0x59 Y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
477 0, // 0x5A Z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
478 0, // 0x5B [
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
479 0, // 0x5C brace
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
480 0, // 0x5D ]
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
481 0, // 0x5E ^
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
482 0, // 0x5F _
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
483 0, // 0x60 `
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
484 10, // 0x61 a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
485 11, // 0x62 b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
486 12, // 0x63 c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
487 13, // 0x64 d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
488 14, // 0x65 e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
489 15, // 0x66 f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
490 0, // 0x67 g
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
491 0, // 0x68 h
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
492 0, // 0x69 i
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
493 0, // 0x6A j
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
494 0, // 0x6B k
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
495 0, // 0x6C l
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
496 0, // 0x6D m
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
497 0, // 0x6E n
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
498 0, // 0x6F o
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
499 0, // 0x70 p
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
500 0, // 0x71 q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
501 0, // 0x72 r
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
502 0, // 0x73 s
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
503 0, // 0x74 t
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
504 0, // 0x75 u
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
505 0, // 0x76 v
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
506 0, // 0x77 w
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
507 0, // 0x78 x
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
508 0, // 0x79 y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
509 0, // 0x7A z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
510 0, // 0x7B {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
511 0, // 0x7C |
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
512 0, // 0x7D }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
513 0, // 0x7E ~
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
514 0, // 0x7f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
515 0, // 0x80
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
516 0, // 0x81
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
517 0, // 0x82
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
518 0, // 0x83
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
519 0, // 0x84
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
520 0, // 0x85
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
521 0, // 0x86
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
522 0, // 0x87
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
523 0, // 0x88
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
524 0, // 0x89
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
525 0, // 0x8a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
526 0, // 0x8b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
527 0, // 0x8c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
528 0, // 0x8d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
529 0, // 0x8e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
530 0, // 0x8f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
531 0, // 0x90
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
532 0, // 0x91
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
533 0, // 0x92
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
534 0, // 0x93
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
535 0, // 0x94
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
536 0, // 0x95
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
537 0, // 0x96
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
538 0, // 0x97
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
539 0, // 0x98
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
540 0, // 0x99
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
541 0, // 0x9a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
542 0, // 0x9b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
543 0, // 0x9c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
544 0, // 0x9d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
545 0, // 0x9e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
546 0, // 0x9f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
547 0, // 0xa0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
548 0, // 0xa1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
549 0, // 0xa2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
550 0, // 0xa3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
551 0, // 0xa4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
552 0, // 0xa5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
553 0, // 0xa6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
554 0, // 0xa7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
555 0, // 0xa8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
556 0, // 0xa9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
557 0, // 0xaa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
558 0, // 0xab
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
559 0, // 0xac
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
560 0, // 0xad
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
561 0, // 0xae
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
562 0, // 0xaf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
563 0, // 0xb0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
564 0, // 0xb1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
565 0, // 0xb2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
566 0, // 0xb3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
567 0, // 0xb4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
568 0, // 0xb5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
569 0, // 0xb6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
570 0, // 0xb7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
571 0, // 0xb8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
572 0, // 0xb9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
573 0, // 0xba
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
574 0, // 0xbb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
575 0, // 0xbc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
576 0, // 0xbd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
577 0, // 0xbe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
578 0, // 0xbf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
579 0, // 0xc0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
580 0, // 0xc1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
581 0, // 0xc2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
582 0, // 0xc3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
583 0, // 0xc4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
584 0, // 0xc5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
585 0, // 0xc6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
586 0, // 0xc7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
587 0, // 0xc8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
588 0, // 0xc9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
589 0, // 0xca
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
590 0, // 0xcb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
591 0, // 0xcc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
592 0, // 0xcd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
593 0, // 0xce
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
594 0, // 0xcf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
595 0, // 0xd0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
596 0, // 0xd1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
597 0, // 0xd2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
598 0, // 0xd3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
599 0, // 0xd4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
600 0, // 0xd5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
601 0, // 0xd6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
602 0, // 0xd7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
603 0, // 0xd8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
604 0, // 0xd9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
605 0, // 0xda
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
606 0, // 0xdb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
607 0, // 0xdc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
608 0, // 0xdd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
609 0, // 0xde
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
610 0, // 0xdf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
611 0, // 0xe0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
612 0, // 0xe1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
613 0, // 0xe2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
614 0, // 0xe3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
615 0, // 0xe4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
616 0, // 0xe5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
617 0, // 0xe6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
618 0, // 0xe7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
619 0, // 0xe8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
620 0, // 0xe9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
621 0, // 0xea
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
622 0, // 0xeb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
623 0, // 0xec
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
624 0, // 0xed
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
625 0, // 0xee
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
626 0, // 0xef
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
627 0, // 0xf0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
628 0, // 0xf1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
629 0, // 0xf2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
630 0, // 0xf3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
631 0, // 0xf4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
632 0, // 0xf5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
633 0, // 0xf6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
634 0, // 0xf7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
635 0, // 0xf8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
636 0, // 0xf9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
637 0, // 0xfa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
638 0, // 0xfb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
639 0, // 0xfc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
640 0, // 0xfd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
641 0, // 0xfe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
642 0, // 0xff
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
643 };
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
644
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
645 u_char b64_decode[256] = {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
646 0, // 0x00
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
647 0, // 0x01
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
648 0, // 0x02
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
649 0, // 0x03
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
650 0, // 0x04
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
651 0, // 0x05
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
652 0, // 0x06
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
653 0, // 0x07
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
654 0, // 0x08
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
655 0, // 0x09 <tab>
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
656 0, // 0x0a <lf>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
657 0, // 0x0b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
658 0, // 0x0c
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
659 0, // 0x0d <cr>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
660 0, // 0x0e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
661 0, // 0x0f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
662 0, // 0x10
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
663 0, // 0x11 xon char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
664 0, // 0x12
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
665 0, // 0x13 xoff char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
666 0, // 0x14
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
667 0, // 0x15
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
668 0, // 0x16
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
669 0, // 0x17
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
670 0, // 0x18
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
671 0, // 0x19
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
672 0, // 0x1a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
673 0, // 0x1b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
674 0, // 0x1c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
675 0, // 0x1d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
676 0, // 0x1e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
677 0, // 0x1f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
678 0, // 0x20 space
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
679 0, // 0x21 !
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
680 0, // 0x22 ""
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
681 0, // 0x23 #
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
682 0, // 0x24 $
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
683 0, // 0x25 %
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
684 0, // 0x26 &
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
685 0, // 0x27 '
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
686 0, // 0x28 (
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
687 0, // 0x29 )
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
688 0, // 0x2A *
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
689 62, // 0x2B +
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
690 0, // 0x2C ,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
691 0, // 0x2D -
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
692 0, // 0x2E .
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
693 63, // 0x2F /
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
694 52, // 0x30 0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
695 53, // 0x31 1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
696 54, // 0x32 2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
697 55, // 0x33 3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
698 56, // 0x34 4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
699 57, // 0x35 5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
700 58, // 0x36 6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
701 59, // 0x37 7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
702 60, // 0x38 8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
703 61, // 0x39 9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
704 0, // 0x3A :
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
705 0, // 0x3B ;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
706 0, // 0x3C <
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
707 0, // 0x3D =
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
708 0, // 0x3E >
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
709 0, // 0x3F ?
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
710 0, // 0x40 @
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
711 0, // 0x41 A
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
712 1, // 0x42 B
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
713 2, // 0x43 C
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
714 3, // 0x44 D
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
715 4, // 0x45 E
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
716 5, // 0x46 F
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
717 6, // 0x47 G
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
718 7, // 0x48 H
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
719 8, // 0x49 I
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
720 9, // 0x4A J
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
721 10, // 0x4B K
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
722 11, // 0x4C L
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
723 12, // 0x4D M
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
724 13, // 0x4E N
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
725 14, // 0x4F O
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
726 15, // 0x50 P
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
727 16, // 0x51 Q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
728 17, // 0x52 R
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
729 18, // 0x53 S
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
730 19, // 0x54 T
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
731 20, // 0x55 U
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
732 21, // 0x56 V
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
733 22, // 0x57 W
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
734 23, // 0x58 X
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
735 24, // 0x59 Y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
736 25, // 0x5A Z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
737 0, // 0x5B [
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
738 0, // 0x5C brace
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
739 0, // 0x5D ]
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
740 0, // 0x5E ^
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
741 0, // 0x5F _
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
742 0, // 0x60 `
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
743 26, // 0x61 a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
744 27, // 0x62 b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
745 28, // 0x63 c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
746 29, // 0x64 d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
747 30, // 0x65 e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
748 31, // 0x66 f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
749 32, // 0x67 g
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
750 33, // 0x68 h
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
751 34, // 0x69 i
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
752 35, // 0x6A j
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
753 36, // 0x6B k
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
754 37, // 0x6C l
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
755 38, // 0x6D m
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
756 39, // 0x6E n
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
757 40, // 0x6F o
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
758 41, // 0x70 p
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
759 42, // 0x71 q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
760 43, // 0x72 r
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
761 44, // 0x73 s
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
762 45, // 0x74 t
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
763 46, // 0x75 u
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
764 47, // 0x76 v
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
765 48, // 0x77 w
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
766 49, // 0x78 x
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
767 50, // 0x79 y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
768 51, // 0x7A z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
769 0, // 0x7B {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
770 0, // 0x7C |
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
771 0, // 0x7D }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
772 0, // 0x7E ~
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
773 0, // 0x7f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
774 0, // 0x80
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
775 0, // 0x81
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
776 0, // 0x82
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
777 0, // 0x83
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
778 0, // 0x84
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
779 0, // 0x85
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
780 0, // 0x86
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
781 0, // 0x87
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
782 0, // 0x88
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
783 0, // 0x89
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
784 0, // 0x8a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
785 0, // 0x8b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
786 0, // 0x8c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
787 0, // 0x8d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
788 0, // 0x8e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
789 0, // 0x8f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
790 0, // 0x90
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
791 0, // 0x91
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
792 0, // 0x92
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
793 0, // 0x93
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
794 0, // 0x94
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
795 0, // 0x95
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
796 0, // 0x96
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
797 0, // 0x97
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
798 0, // 0x98
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
799 0, // 0x99
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
800 0, // 0x9a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
801 0, // 0x9b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
802 0, // 0x9c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
803 0, // 0x9d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
804 0, // 0x9e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
805 0, // 0x9f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
806 0, // 0xa0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
807 0, // 0xa1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
808 0, // 0xa2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
809 0, // 0xa3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
810 0, // 0xa4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
811 0, // 0xa5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
812 0, // 0xa6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
813 0, // 0xa7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
814 0, // 0xa8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
815 0, // 0xa9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
816 0, // 0xaa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
817 0, // 0xab
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
818 0, // 0xac
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
819 0, // 0xad
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
820 0, // 0xae
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
821 0, // 0xaf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
822 0, // 0xb0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
823 0, // 0xb1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
824 0, // 0xb2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
825 0, // 0xb3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
826 0, // 0xb4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
827 0, // 0xb5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
828 0, // 0xb6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
829 0, // 0xb7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
830 0, // 0xb8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
831 0, // 0xb9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
832 0, // 0xba
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
833 0, // 0xbb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
834 0, // 0xbc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
835 0, // 0xbd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
836 0, // 0xbe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
837 0, // 0xbf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
838 0, // 0xc0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
839 0, // 0xc1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
840 0, // 0xc2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
841 0, // 0xc3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
842 0, // 0xc4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
843 0, // 0xc5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
844 0, // 0xc6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
845 0, // 0xc7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
846 0, // 0xc8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
847 0, // 0xc9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
848 0, // 0xca
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
849 0, // 0xcb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
850 0, // 0xcc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
851 0, // 0xcd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
852 0, // 0xce
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
853 0, // 0xcf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
854 0, // 0xd0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
855 0, // 0xd1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
856 0, // 0xd2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
857 0, // 0xd3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
858 0, // 0xd4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
859 0, // 0xd5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
860 0, // 0xd6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
861 0, // 0xd7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
862 0, // 0xd8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
863 0, // 0xd9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
864 0, // 0xda
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
865 0, // 0xdb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
866 0, // 0xdc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
867 0, // 0xdd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
868 0, // 0xde
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
869 0, // 0xdf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
870 0, // 0xe0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
871 0, // 0xe1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
872 0, // 0xe2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
873 0, // 0xe3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
874 0, // 0xe4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
875 0, // 0xe5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
876 0, // 0xe6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
877 0, // 0xe7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
878 0, // 0xe8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
879 0, // 0xe9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
880 0, // 0xea
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
881 0, // 0xeb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
882 0, // 0xec
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
883 0, // 0xed
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
884 0, // 0xee
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
885 0, // 0xef
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
886 0, // 0xf0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
887 0, // 0xf1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
888 0, // 0xf2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
889 0, // 0xf3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
890 0, // 0xf4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
891 0, // 0xf5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
892 0, // 0xf6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
893 0, // 0xf7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
894 0, // 0xf8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
895 0, // 0xf9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
896 0, // 0xfa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
897 0, // 0xfb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
898 0, // 0xfc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
899 0, // 0xfd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
900 0, // 0xfe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
901 0, // 0xff
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
902 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
903
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
904 #define PENDING_LIMIT 100
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
905 struct fsa {
12
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
906 u_char pending[PENDING_LIMIT];
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
907 int count;
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
908 state st;
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
909 state init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
910 fsa* next1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
911 fsa* next2;
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
912 recorder *memory;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
913
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
914 fsa(state init, fsa* next1_, fsa* next2_, recorder *memory_);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
915 void push(u_char *buf, int len);
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
916 void pusher();
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
917 void error(char *err);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
918 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
919
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
920 fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
921 count = 0;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
922 st = init_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
923 init = init_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
924 next1 = next1_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
925 next2 = next2_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
926 memory = memory_;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
927 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
928
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
929 void fsa::error(char *err) {
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
930 count = 0;
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
931 st = init;
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
932 if (err) my_syslog(err);
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
933 }
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
934
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
935 void fsa::pusher() {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
936 if (next1) next1->push(pending, count);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
937 if (next2) next2->push(pending, count);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
938 count = 0;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
939 }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
940
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
941 void fsa::push(u_char *buf, int len) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
942 for (int i=0; i<len; i++) {
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
943 if (count == (PENDING_LIMIT-1)) error(NULL);
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
944 if (st >= end_state) error("finite state machine impossible state");
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
945 u_char c = buf[i];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
946 pending[count++] = c;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
947 st = parse_table[c][st];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
948 switch (st) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
949
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
950 //////////////////////////////
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
951 // host name recognizer
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
952 case h_end: {
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
953 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
27
43a4f6b3e668 add configurable host name limit and bad html tag limits.
carl
parents: 25
diff changeset
954 if (!strchr((const char *)pending, '@')) {
43a4f6b3e668 add configurable host name limit and bad html tag limits.
carl
parents: 25
diff changeset
955 // not an email address or message id
28
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
956 char *p1 = strchr((const char *)pending, '.');
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
957 char *p2 = strrchr((const char *)pending, '.');
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
958 if (p1 && (p1 != p2)) {
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
959 // have two periods, so three components
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
960 for (int i=1; i<count; i++) pending[i] = tolower(pending[i]);
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
961 // is last component a tld?
29
4dfdf33f1db0 add syslog msg freeing memory, use bare tld names without leading period
carl
parents: 28
diff changeset
962 string_set::iterator i = memory->tlds->find(p2+1);
28
33e1e3910506 add configurable list of tlds
carl
parents: 27
diff changeset
963 if (i != memory->tlds->end()) memory->new_url((char*)pending);
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
964 }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
965 }
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
966 st = h_init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
967 } // fall thru
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
968
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
969 case h_init: {
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
970 count = 0;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
971 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
972
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
973
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
974 //////////////////////////////
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
975 // html tag discarder
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
976 case t_bin: {
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
977 memory->binary();
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
978 st = t_disc;
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
979 count = 0; // discard all characters
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
980 } break;
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
981
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
982 case t_end: {
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
983 if (count > 1) {
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
984 pending[--count] = '\0'; // null terminate html tag
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
985 for (int i=1; i<count; i++) pending[i] = tolower(pending[i]);
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
986 memory->new_tag((char*)pending);
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
987 }
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
988 st = (c == ' ') ? t_disc : t_init;
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
989 } // fall thru
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
990
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
991 case t_tag1:
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
992 case t_com2:
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
993 case t_com3:
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
994 case t_com4:
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
995 case t_com5:
25
6176e7b2e8af better scanning for bad html tags, allow binary zip and gz files with random char sequences
carl
parents: 24
diff changeset
996 case t_disc: {
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
997 count = 0; // discard all characters
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
998 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
999
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1000 case t_init: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1001 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1002 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1003
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1004
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1005 //////////////////////////////
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1006 // url recognizer
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1007 case u_reco: {
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
1008 if (count > 13) { // need some minimal length host name after the protocol
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
1009 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1010 char *p = strrchr((const char *)pending, '/');
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1011 if (p && // have a leading /
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1012 strchr(p, '.') && // require at least one . in a dns name
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1013 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1014 memory->new_url(++p); // we seem to have a host name, skip the last /
9
8c65411cd7ab integration work on url scanner
carl
parents: 8
diff changeset
1015 }
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1016 }
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
1017 st = u_init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1018 } // fall thru
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1019
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1020 case u_init: {
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1021 count = 0; // discard all characters
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1022 } break;
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1023
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1024
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1025 //////////////////////////////
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1026 // url decoder %xx
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1027 case d_2: {
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1028 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]];
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1029 count = 1;
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1030 st = d_init;
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1031 } // fall thru
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1032
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1033 case d_init: {
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1034 pusher();
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1035 } break;
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1036
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1037
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1038 //////////////////////////////
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1039 // html entity decoder &#nnn;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1040 case e_semi: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1041 pending[--count] = '\0'; // null terminate the digit string by overwriting the semicolon
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1042 pending[0] = atoi((const char *)pending+2);
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1043 count = 1;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1044 st = e_init;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1045 } // fall thru
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1046
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1047 case e_init: {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1048 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1049 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1050
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1051
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1052 //////////////////////////////
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1053 // mime decoder =xx
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1054 case m_2: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1055 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]];
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1056 count = 1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1057 st = m_init;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1058 } // fall thru
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1059
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1060 case m_init: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1061 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1062 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1063
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1064 case m_cr: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1065 count = 1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1066 st = m_eq;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1067 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1068
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1069 case m_nl: {
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1070 count = 0;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1071 st = m_init;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1072 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1073
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1074
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1075 //////////////////////////////
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1076 // base64 decoder
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1077 case b_lf2: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1078 count--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1079 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1080
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1081 case b_cr: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1082 int cnt = 0;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1083 if ((count % 4) == 1) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1084 count--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1085 // might have proper b64 data
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1086 for (int i=0; i<count; i+=4) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1087 unsigned long a1 = b64_decode[pending[i]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1088 unsigned long a2 = b64_decode[pending[i+1]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1089 unsigned long a3 = b64_decode[pending[i+2]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1090 unsigned long a4 = b64_decode[pending[i+3]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1091 unsigned long a = (a1 << 18) | (a2 << 12) | (a3 << 6) | a4;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1092 pending[cnt++] = (a & 0x00ff0000) >> 16;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1093 pending[cnt++] = (a & 0x0000ff00) >> 8;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1094 pending[cnt++] = (a & 0x000000ff);
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1095 if ((char)pending[i+3] == '=') cnt--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1096 if ((char)pending[i+2] == '=') cnt--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1097 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1098 count = cnt;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1099 st = b_lf2;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1100 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1101 else st = b_init;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1102 } // fall thru
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1103
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1104 case b_lf:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1105 case b_init: {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1106 pusher();
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1107 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1108
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1109
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1110 //////////////////////////////
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1111 // states that just accumulate characters in the pending buffer
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1112 case h_host:
30
8f43f8fa1ad7 ignore text including bad html tags inside comments
carl
parents: 29
diff changeset
1113 case t_tag2:
31
d16b27371533 consider <!tag> to be bad html tag
carl
parents: 30
diff changeset
1114 case t_com1:
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1115 case u_http:
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1116 case u_url:
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1117 case u_sla:
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1118 case d_pcnt:
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1119 case d_1:
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1120 case e_amp:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1121 case e_num:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1122 case m_eq:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1123 case m_1:
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1124 case b_64:
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1125 default: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1126 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1127 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1128 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1129 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1130
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1131 struct url_scanner {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1132 fsa *host_parser;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1133 fsa *tags_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1134 fsa *urls_parser;
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1135 fsa *urld_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1136 fsa *html_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1137 fsa *mime_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1138 fsa *b64_parser;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1139
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1140 url_scanner(recorder *memory);
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1141 ~url_scanner();
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1142 void scan(u_char *buffer, size_t length);
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1143 };
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1144
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1145 url_scanner::url_scanner(recorder *memory) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1146 host_parser = new fsa(h_init, NULL, NULL, memory);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1147 tags_parser = new fsa(t_init, host_parser, NULL, memory);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1148 urls_parser = new fsa(u_init, NULL, NULL, memory);
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1149 urld_parser = new fsa(d_init, urls_parser, tags_parser, NULL);
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1150 html_parser = new fsa(e_init, urld_parser, NULL, NULL);
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1151 mime_parser = new fsa(m_init, html_parser, NULL, NULL);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1152 b64_parser = new fsa(b_init, mime_parser, NULL, NULL);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1153 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1154
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1155 url_scanner::~url_scanner() {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1156 delete host_parser;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1157 delete tags_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1158 delete urls_parser;
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1159 delete urld_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1160 delete html_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1161 delete mime_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1162 delete b64_parser;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1163 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1164
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1165 void url_scanner::scan(u_char *buffer, size_t length) {
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1166 b64_parser->push(buffer, length);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1167 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1168