annotate src/scanner.cpp @ 24:2e23b7184d2b

start coding for bad html tag detection
author carl
date Wed, 19 May 2004 21:40:50 -0700
parents 06de5ab6a232
children 6176e7b2e8af
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1 /*
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
2
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
3 Copyright (c) 2004 Carl Byington - 510 Software Group, released under
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
4 the GPL version 2 or any later version at your choice available at
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
5 http://www.fsf.org/licenses/gpl.txt
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
6
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
7 */
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
8
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
9 static char* scanner_version="$Id$";
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
10
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
11 using namespace std;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
12
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
13
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
14 // object to record things we see in the body content
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
15 struct recorder
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
16 {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
17 string_set *html_tags; // valid tags
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
18 string_set hosts;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
19 int bad_html_tags;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
20 recorder(string_set *html_tags_);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
21 ~recorder();
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
22 void empty();
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
23 void new_url(char *host);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
24 void new_tag(char *tag);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
25 };
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
26 recorder::recorder(string_set *html_tags_) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
27 html_tags = html_tags_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
28 bad_html_tags = 0;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
29 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
30 recorder::~recorder() {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
31 empty();
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
32 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
33 void recorder::empty() {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
34 bad_html_tags = 0;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
35 discard(hosts);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
36 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
37 void recorder::new_url(char *host) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
38 register_string(hosts, host);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
39 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
40 void recorder::new_tag(char *tag) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
41 string_set::iterator i = html_tags->find(tag);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
42 if (i == html_tags->end()) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
43 bad_html_tags++;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
44 if (debug_syslog) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
45 char buf[200];
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
46 snprintf(buf, sizeof(buf), "bad html tag %s", tag);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
47 my_syslog(buf);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
48 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
49 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
50 }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
51
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
52
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
53
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
54 enum state {// host name recognizer states
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
55 h_init,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
56 h_host,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
57
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
58 // html tag discarder states
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
59 t_init,
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
60 t_tag,
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
61 t_disc,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
62
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
63 // url recognizer states
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
64 u_init,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
65 u_http,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
66 u_sla,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
67 u_url,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
68
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
69 // url decoder states %xx
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
70 d_init,
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
71 d_pcnt,
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
72 d_1,
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
73
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
74 // html entity decoder states &#nnn;
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
75 e_init,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
76 e_amp,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
77 e_num,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
78
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
79 // mime decoder states =xx
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
80 m_init,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
81 m_eq,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
82 m_1,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
83
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
84 // base64 decoder states
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
85 b_init,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
86 b_lf,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
87 b_lf2,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
88 b_64,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
89
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
90 // counter for number of columns in the table
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
91 end_state,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
92
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
93 // temporary states
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
94 h_end,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
95 t_end,
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
96 u_reco,
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
97 d_2,
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
98 e_semi,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
99 m_2,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
100 m_cr,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
101 m_nl,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
102 b_cr,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
103 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
104
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
105 typedef state PARSE[end_state];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
106
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
107 static PARSE parse_table[256] = {
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
108 // h_init, h_host, t_init, t_tag, t_disc, u_init, u_http, u_sla , u_url, d_init, d_pcnt, d_1, e_init, e_amp, e_num, m_init, m_eq, m_1, b_init, b_lf, b_lf2, b_64
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
109
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
110 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x00
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
111 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x01
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
112 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x02
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
113 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x03
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
114 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x04
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
115 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x05
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
116 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x06
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
117 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x07
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
118 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x08
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
119 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x09 <tab>
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
120 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_nl, m_init, b_lf, b_init, b_lf2, b_init, }, // 0x0a <lf>
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
121 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0b
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
122 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0c
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
123 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_cr, m_init, b_init, b_init, b_init, b_cr, }, // 0x0d <cr>
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
124 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0e
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
125 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0f
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
126 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x10
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
127 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x11 xon char
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
128 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x12
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
129 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x13 xoff char
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
130 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x14
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
131 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x15
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
132 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x16
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
133 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x17
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
134 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x18
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
135 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x19
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
136 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1a
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
137 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1b
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
138 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1c
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
139 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1d
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
140 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1e
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
141 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1f
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
142 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x20 space
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
143 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x21 !
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
144 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x22 ""
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
145 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_num, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x23 #
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
146 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x24 $
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
147 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_pcnt, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x25 %
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
148 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_amp, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x26 &
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
149 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x27 '
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
150 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x28 (
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
151 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x29 )
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
152 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2A *
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
153 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2B +
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
154 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2C ,
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
155 {h_host, h_host, t_init, t_disc, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2D -
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
156 {h_host, h_host, t_init, t_disc, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2E .
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
157 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_sla, u_sla, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2F /
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
158 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x30 0
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
159 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x31 1
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
160 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x32 2
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
161 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x33 3
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
162 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x34 4
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
163 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x35 5
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
164 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x36 6
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
165 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x37 7
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
166 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x38 8
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
167 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x39 9
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
168 {h_init, h_end, t_init, t_disc, t_disc, u_http, u_http, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3A :
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
169 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_semi, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3B ;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
170 {h_init, h_end, t_tag, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3C <
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
171 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_eq, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x3D =
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
172 {h_init, h_end, t_init, t_end, t_end, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3E >
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
173 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3F ?
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
174 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
175 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
176 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
177 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
178 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x44 D
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
179 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x45 E
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
180 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x46 F
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
181 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x47 G
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
182 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x48 H
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
183 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x49 I
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
184 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4A J
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
185 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4B K
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
186 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4C L
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
187 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4D M
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
188 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4E N
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
189 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4F O
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
190 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x50 P
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
191 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x51 Q
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
192 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x52 R
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
193 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x53 S
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
194 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x54 T
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
195 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
196 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
197 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
198 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
199 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x59 Y
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
200 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x5A Z
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
201 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5B [
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
202 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5C brace
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
203 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5D ]
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
204 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5E ^
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
205 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5F _
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
206 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x60 `
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
207 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x61 a
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
208 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x62 b
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
209 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x63 c
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
210 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x64 d
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
211 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x65 e
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
212 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x66 f
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
213 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x67 g
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
214 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x68 h
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
215 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x69 i
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
216 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6A j
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
217 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6B k
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
218 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6C l
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
219 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6D m
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
220 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6E n
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
221 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6F o
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
222 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x70 p
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
223 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x71 q
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
224 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x72 r
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
225 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x73 s
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
226 {h_host, h_host, t_init, t_tag, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x74 t
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
227 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
228 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
229 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
230 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
231 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x79 y
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
232 {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x7A z
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
233 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7B {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
234 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7C |
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
235 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7D }
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
236 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7E ~
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
237 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7f
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
238 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x80
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
239 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x81
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
240 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x82
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
241 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x83
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
242 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x84
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
243 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x85
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
244 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x86
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
245 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x87
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
246 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x88
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
247 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x89
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
248 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8a
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
249 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8b
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
250 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8c
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
251 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8d
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
252 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8e
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
253 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8f
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
254 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x90
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
255 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x91
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
256 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x92
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
257 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x93
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
258 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x94
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
259 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x95
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
260 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x96
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
261 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x97
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
262 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x98
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
263 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x99
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
264 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9a
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
265 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9b
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
266 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9c
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
267 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9d
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
268 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9e
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
269 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9f
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
270 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa0
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
271 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa1
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
272 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa2
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
273 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa3
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
274 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa4
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
275 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa5
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
276 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa6
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
277 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa7
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
278 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa8
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
279 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa9
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
280 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaa
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
281 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xab
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
282 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xac
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
283 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xad
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
284 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xae
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
285 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaf
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
286 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb0
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
287 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb1
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
288 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb2
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
289 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb3
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
290 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb4
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
291 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb5
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
292 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb6
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
293 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb7
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
294 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb8
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
295 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb9
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
296 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xba
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
297 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbb
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
298 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbc
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
299 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbd
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
300 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbe
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
301 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbf
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
302 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc0
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
303 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc1
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
304 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc2
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
305 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc3
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
306 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc4
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
307 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc5
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
308 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc6
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
309 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc7
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
310 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc8
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
311 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc9
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
312 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xca
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
313 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcb
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
314 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcc
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
315 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcd
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
316 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xce
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
317 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcf
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
318 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd0
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
319 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd1
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
320 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd2
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
321 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd3
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
322 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd4
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
323 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd5
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
324 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd6
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
325 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd7
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
326 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd8
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
327 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd9
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
328 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xda
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
329 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdb
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
330 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdc
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
331 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdd
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
332 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xde
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
333 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdf
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
334 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe0
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
335 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe1
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
336 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe2
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
337 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe3
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
338 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe4
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
339 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe5
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
340 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe6
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
341 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe7
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
342 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe8
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
343 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe9
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
344 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xea
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
345 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xeb
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
346 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xec
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
347 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xed
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
348 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xee
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
349 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xef
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
350 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf0
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
351 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf1
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
352 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf2
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
353 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf3
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
354 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf4
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
355 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf5
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
356 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf6
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
357 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf7
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
358 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf8
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
359 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf9
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
360 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfa
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
361 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfb
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
362 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfc
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
363 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfd
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
364 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfe
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
365 {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xff
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
366 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
367
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
368
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
369 char *tlds[] = {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
370 ".com",
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
371 ".net",
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
372 ".org",
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
373 ".biz",
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
374 NULL
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
375 };
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
376
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
377 u_char hex_decode[256] = {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
378 0, // 0x00
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
379 0, // 0x01
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
380 0, // 0x02
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
381 0, // 0x03
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
382 0, // 0x04
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
383 0, // 0x05
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
384 0, // 0x06
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
385 0, // 0x07
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
386 0, // 0x08
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
387 0, // 0x09 <tab>
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
388 0, // 0x0a <lf>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
389 0, // 0x0b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
390 0, // 0x0c
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
391 0, // 0x0d <cr>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
392 0, // 0x0e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
393 0, // 0x0f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
394 0, // 0x10
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
395 0, // 0x11 xon char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
396 0, // 0x12
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
397 0, // 0x13 xoff char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
398 0, // 0x14
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
399 0, // 0x15
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
400 0, // 0x16
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
401 0, // 0x17
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
402 0, // 0x18
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
403 0, // 0x19
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
404 0, // 0x1a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
405 0, // 0x1b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
406 0, // 0x1c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
407 0, // 0x1d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
408 0, // 0x1e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
409 0, // 0x1f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
410 0, // 0x20 space
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
411 0, // 0x21 !
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
412 0, // 0x22 ""
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
413 0, // 0x23 #
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
414 0, // 0x24 $
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
415 0, // 0x25 %
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
416 0, // 0x26 &
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
417 0, // 0x27 '
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
418 0, // 0x28 (
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
419 0, // 0x29 )
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
420 0, // 0x2A *
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
421 0, // 0x2B +
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
422 0, // 0x2C ,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
423 0, // 0x2D -
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
424 0, // 0x2E .
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
425 0, // 0x2F /
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
426 0, // 0x30 0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
427 1, // 0x31 1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
428 2, // 0x32 2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
429 3, // 0x33 3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
430 4, // 0x34 4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
431 5, // 0x35 5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
432 6, // 0x36 6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
433 7, // 0x37 7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
434 8, // 0x38 8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
435 9, // 0x39 9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
436 0, // 0x3A :
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
437 0, // 0x3B ;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
438 0, // 0x3C <
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
439 0, // 0x3D =
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
440 0, // 0x3E >
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
441 0, // 0x3F ?
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
442 0, // 0x40 @
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
443 10, // 0x41 A
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
444 11, // 0x42 B
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
445 12, // 0x43 C
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
446 13, // 0x44 D
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
447 14, // 0x45 E
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
448 15, // 0x46 F
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
449 0, // 0x47 G
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
450 0, // 0x48 H
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
451 0, // 0x49 I
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
452 0, // 0x4A J
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
453 0, // 0x4B K
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
454 0, // 0x4C L
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
455 0, // 0x4D M
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
456 0, // 0x4E N
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
457 0, // 0x4F O
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
458 0, // 0x50 P
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
459 0, // 0x51 Q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
460 0, // 0x52 R
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
461 0, // 0x53 S
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
462 0, // 0x54 T
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
463 0, // 0x55 U
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
464 0, // 0x56 V
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
465 0, // 0x57 W
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
466 0, // 0x58 X
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
467 0, // 0x59 Y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
468 0, // 0x5A Z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
469 0, // 0x5B [
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
470 0, // 0x5C brace
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
471 0, // 0x5D ]
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
472 0, // 0x5E ^
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
473 0, // 0x5F _
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
474 0, // 0x60 `
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
475 10, // 0x61 a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
476 11, // 0x62 b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
477 12, // 0x63 c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
478 13, // 0x64 d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
479 14, // 0x65 e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
480 15, // 0x66 f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
481 0, // 0x67 g
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
482 0, // 0x68 h
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
483 0, // 0x69 i
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
484 0, // 0x6A j
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
485 0, // 0x6B k
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
486 0, // 0x6C l
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
487 0, // 0x6D m
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
488 0, // 0x6E n
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
489 0, // 0x6F o
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
490 0, // 0x70 p
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
491 0, // 0x71 q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
492 0, // 0x72 r
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
493 0, // 0x73 s
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
494 0, // 0x74 t
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
495 0, // 0x75 u
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
496 0, // 0x76 v
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
497 0, // 0x77 w
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
498 0, // 0x78 x
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
499 0, // 0x79 y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
500 0, // 0x7A z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
501 0, // 0x7B {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
502 0, // 0x7C |
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
503 0, // 0x7D }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
504 0, // 0x7E ~
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
505 0, // 0x7f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
506 0, // 0x80
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
507 0, // 0x81
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
508 0, // 0x82
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
509 0, // 0x83
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
510 0, // 0x84
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
511 0, // 0x85
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
512 0, // 0x86
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
513 0, // 0x87
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
514 0, // 0x88
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
515 0, // 0x89
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
516 0, // 0x8a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
517 0, // 0x8b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
518 0, // 0x8c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
519 0, // 0x8d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
520 0, // 0x8e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
521 0, // 0x8f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
522 0, // 0x90
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
523 0, // 0x91
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
524 0, // 0x92
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
525 0, // 0x93
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
526 0, // 0x94
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
527 0, // 0x95
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
528 0, // 0x96
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
529 0, // 0x97
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
530 0, // 0x98
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
531 0, // 0x99
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
532 0, // 0x9a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
533 0, // 0x9b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
534 0, // 0x9c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
535 0, // 0x9d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
536 0, // 0x9e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
537 0, // 0x9f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
538 0, // 0xa0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
539 0, // 0xa1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
540 0, // 0xa2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
541 0, // 0xa3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
542 0, // 0xa4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
543 0, // 0xa5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
544 0, // 0xa6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
545 0, // 0xa7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
546 0, // 0xa8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
547 0, // 0xa9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
548 0, // 0xaa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
549 0, // 0xab
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
550 0, // 0xac
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
551 0, // 0xad
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
552 0, // 0xae
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
553 0, // 0xaf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
554 0, // 0xb0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
555 0, // 0xb1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
556 0, // 0xb2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
557 0, // 0xb3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
558 0, // 0xb4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
559 0, // 0xb5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
560 0, // 0xb6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
561 0, // 0xb7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
562 0, // 0xb8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
563 0, // 0xb9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
564 0, // 0xba
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
565 0, // 0xbb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
566 0, // 0xbc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
567 0, // 0xbd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
568 0, // 0xbe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
569 0, // 0xbf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
570 0, // 0xc0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
571 0, // 0xc1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
572 0, // 0xc2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
573 0, // 0xc3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
574 0, // 0xc4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
575 0, // 0xc5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
576 0, // 0xc6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
577 0, // 0xc7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
578 0, // 0xc8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
579 0, // 0xc9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
580 0, // 0xca
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
581 0, // 0xcb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
582 0, // 0xcc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
583 0, // 0xcd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
584 0, // 0xce
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
585 0, // 0xcf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
586 0, // 0xd0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
587 0, // 0xd1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
588 0, // 0xd2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
589 0, // 0xd3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
590 0, // 0xd4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
591 0, // 0xd5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
592 0, // 0xd6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
593 0, // 0xd7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
594 0, // 0xd8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
595 0, // 0xd9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
596 0, // 0xda
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
597 0, // 0xdb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
598 0, // 0xdc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
599 0, // 0xdd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
600 0, // 0xde
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
601 0, // 0xdf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
602 0, // 0xe0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
603 0, // 0xe1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
604 0, // 0xe2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
605 0, // 0xe3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
606 0, // 0xe4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
607 0, // 0xe5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
608 0, // 0xe6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
609 0, // 0xe7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
610 0, // 0xe8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
611 0, // 0xe9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
612 0, // 0xea
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
613 0, // 0xeb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
614 0, // 0xec
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
615 0, // 0xed
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
616 0, // 0xee
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
617 0, // 0xef
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
618 0, // 0xf0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
619 0, // 0xf1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
620 0, // 0xf2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
621 0, // 0xf3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
622 0, // 0xf4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
623 0, // 0xf5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
624 0, // 0xf6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
625 0, // 0xf7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
626 0, // 0xf8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
627 0, // 0xf9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
628 0, // 0xfa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
629 0, // 0xfb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
630 0, // 0xfc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
631 0, // 0xfd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
632 0, // 0xfe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
633 0, // 0xff
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
634 };
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
635
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
636 u_char b64_decode[256] = {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
637 0, // 0x00
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
638 0, // 0x01
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
639 0, // 0x02
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
640 0, // 0x03
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
641 0, // 0x04
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
642 0, // 0x05
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
643 0, // 0x06
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
644 0, // 0x07
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
645 0, // 0x08
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
646 0, // 0x09 <tab>
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
647 0, // 0x0a <lf>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
648 0, // 0x0b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
649 0, // 0x0c
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
650 0, // 0x0d <cr>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
651 0, // 0x0e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
652 0, // 0x0f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
653 0, // 0x10
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
654 0, // 0x11 xon char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
655 0, // 0x12
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
656 0, // 0x13 xoff char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
657 0, // 0x14
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
658 0, // 0x15
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
659 0, // 0x16
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
660 0, // 0x17
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
661 0, // 0x18
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
662 0, // 0x19
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
663 0, // 0x1a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
664 0, // 0x1b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
665 0, // 0x1c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
666 0, // 0x1d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
667 0, // 0x1e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
668 0, // 0x1f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
669 0, // 0x20 space
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
670 0, // 0x21 !
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
671 0, // 0x22 ""
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
672 0, // 0x23 #
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
673 0, // 0x24 $
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
674 0, // 0x25 %
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
675 0, // 0x26 &
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
676 0, // 0x27 '
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
677 0, // 0x28 (
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
678 0, // 0x29 )
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
679 0, // 0x2A *
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
680 62, // 0x2B +
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
681 0, // 0x2C ,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
682 0, // 0x2D -
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
683 0, // 0x2E .
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
684 63, // 0x2F /
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
685 52, // 0x30 0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
686 53, // 0x31 1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
687 54, // 0x32 2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
688 55, // 0x33 3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
689 56, // 0x34 4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
690 57, // 0x35 5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
691 58, // 0x36 6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
692 59, // 0x37 7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
693 60, // 0x38 8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
694 61, // 0x39 9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
695 0, // 0x3A :
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
696 0, // 0x3B ;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
697 0, // 0x3C <
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
698 0, // 0x3D =
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
699 0, // 0x3E >
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
700 0, // 0x3F ?
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
701 0, // 0x40 @
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
702 0, // 0x41 A
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
703 1, // 0x42 B
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
704 2, // 0x43 C
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
705 3, // 0x44 D
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
706 4, // 0x45 E
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
707 5, // 0x46 F
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
708 6, // 0x47 G
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
709 7, // 0x48 H
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
710 8, // 0x49 I
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
711 9, // 0x4A J
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
712 10, // 0x4B K
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
713 11, // 0x4C L
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
714 12, // 0x4D M
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
715 13, // 0x4E N
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
716 14, // 0x4F O
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
717 15, // 0x50 P
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
718 16, // 0x51 Q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
719 17, // 0x52 R
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
720 18, // 0x53 S
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
721 19, // 0x54 T
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
722 20, // 0x55 U
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
723 21, // 0x56 V
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
724 22, // 0x57 W
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
725 23, // 0x58 X
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
726 24, // 0x59 Y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
727 25, // 0x5A Z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
728 0, // 0x5B [
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
729 0, // 0x5C brace
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
730 0, // 0x5D ]
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
731 0, // 0x5E ^
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
732 0, // 0x5F _
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
733 0, // 0x60 `
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
734 26, // 0x61 a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
735 27, // 0x62 b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
736 28, // 0x63 c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
737 29, // 0x64 d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
738 30, // 0x65 e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
739 31, // 0x66 f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
740 32, // 0x67 g
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
741 33, // 0x68 h
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
742 34, // 0x69 i
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
743 35, // 0x6A j
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
744 36, // 0x6B k
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
745 37, // 0x6C l
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
746 38, // 0x6D m
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
747 39, // 0x6E n
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
748 40, // 0x6F o
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
749 41, // 0x70 p
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
750 42, // 0x71 q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
751 43, // 0x72 r
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
752 44, // 0x73 s
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
753 45, // 0x74 t
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
754 46, // 0x75 u
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
755 47, // 0x76 v
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
756 48, // 0x77 w
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
757 49, // 0x78 x
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
758 50, // 0x79 y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
759 51, // 0x7A z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
760 0, // 0x7B {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
761 0, // 0x7C |
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
762 0, // 0x7D }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
763 0, // 0x7E ~
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
764 0, // 0x7f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
765 0, // 0x80
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
766 0, // 0x81
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
767 0, // 0x82
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
768 0, // 0x83
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
769 0, // 0x84
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
770 0, // 0x85
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
771 0, // 0x86
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
772 0, // 0x87
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
773 0, // 0x88
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
774 0, // 0x89
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
775 0, // 0x8a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
776 0, // 0x8b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
777 0, // 0x8c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
778 0, // 0x8d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
779 0, // 0x8e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
780 0, // 0x8f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
781 0, // 0x90
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
782 0, // 0x91
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
783 0, // 0x92
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
784 0, // 0x93
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
785 0, // 0x94
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
786 0, // 0x95
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
787 0, // 0x96
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
788 0, // 0x97
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
789 0, // 0x98
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
790 0, // 0x99
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
791 0, // 0x9a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
792 0, // 0x9b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
793 0, // 0x9c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
794 0, // 0x9d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
795 0, // 0x9e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
796 0, // 0x9f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
797 0, // 0xa0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
798 0, // 0xa1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
799 0, // 0xa2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
800 0, // 0xa3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
801 0, // 0xa4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
802 0, // 0xa5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
803 0, // 0xa6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
804 0, // 0xa7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
805 0, // 0xa8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
806 0, // 0xa9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
807 0, // 0xaa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
808 0, // 0xab
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
809 0, // 0xac
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
810 0, // 0xad
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
811 0, // 0xae
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
812 0, // 0xaf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
813 0, // 0xb0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
814 0, // 0xb1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
815 0, // 0xb2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
816 0, // 0xb3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
817 0, // 0xb4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
818 0, // 0xb5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
819 0, // 0xb6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
820 0, // 0xb7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
821 0, // 0xb8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
822 0, // 0xb9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
823 0, // 0xba
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
824 0, // 0xbb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
825 0, // 0xbc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
826 0, // 0xbd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
827 0, // 0xbe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
828 0, // 0xbf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
829 0, // 0xc0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
830 0, // 0xc1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
831 0, // 0xc2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
832 0, // 0xc3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
833 0, // 0xc4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
834 0, // 0xc5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
835 0, // 0xc6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
836 0, // 0xc7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
837 0, // 0xc8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
838 0, // 0xc9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
839 0, // 0xca
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
840 0, // 0xcb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
841 0, // 0xcc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
842 0, // 0xcd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
843 0, // 0xce
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
844 0, // 0xcf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
845 0, // 0xd0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
846 0, // 0xd1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
847 0, // 0xd2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
848 0, // 0xd3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
849 0, // 0xd4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
850 0, // 0xd5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
851 0, // 0xd6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
852 0, // 0xd7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
853 0, // 0xd8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
854 0, // 0xd9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
855 0, // 0xda
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
856 0, // 0xdb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
857 0, // 0xdc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
858 0, // 0xdd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
859 0, // 0xde
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
860 0, // 0xdf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
861 0, // 0xe0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
862 0, // 0xe1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
863 0, // 0xe2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
864 0, // 0xe3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
865 0, // 0xe4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
866 0, // 0xe5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
867 0, // 0xe6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
868 0, // 0xe7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
869 0, // 0xe8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
870 0, // 0xe9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
871 0, // 0xea
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
872 0, // 0xeb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
873 0, // 0xec
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
874 0, // 0xed
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
875 0, // 0xee
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
876 0, // 0xef
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
877 0, // 0xf0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
878 0, // 0xf1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
879 0, // 0xf2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
880 0, // 0xf3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
881 0, // 0xf4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
882 0, // 0xf5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
883 0, // 0xf6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
884 0, // 0xf7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
885 0, // 0xf8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
886 0, // 0xf9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
887 0, // 0xfa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
888 0, // 0xfb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
889 0, // 0xfc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
890 0, // 0xfd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
891 0, // 0xfe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
892 0, // 0xff
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
893 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
894
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
895 #define PENDING_LIMIT 100
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
896 struct fsa {
12
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
897 u_char pending[PENDING_LIMIT];
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
898 int count;
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
899 state st;
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
900 state init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
901 fsa* next1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
902 fsa* next2;
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
903 recorder *memory;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
904
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
905 fsa(state init, fsa* next1_, fsa* next2_, recorder *memory_);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
906 void push(u_char *buf, int len);
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
907 void pusher();
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
908 void error(char *err);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
909 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
910
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
911 fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
912 count = 0;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
913 st = init_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
914 init = init_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
915 next1 = next1_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
916 next2 = next2_;
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
917 memory = memory_;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
918 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
919
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
920 void fsa::error(char *err) {
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
921 count = 0;
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
922 st = init;
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
923 if (err) my_syslog(err);
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
924 }
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
925
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
926 void fsa::pusher() {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
927 if (next1) next1->push(pending, count);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
928 if (next2) next2->push(pending, count);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
929 count = 0;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
930 }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
931
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
932 void fsa::push(u_char *buf, int len) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
933 for (int i=0; i<len; i++) {
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
934 if (count == (PENDING_LIMIT-1)) error(NULL);
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
935 if (st >= end_state) error("finite state machine impossible state");
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
936 u_char c = buf[i];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
937 pending[count++] = c;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
938 st = parse_table[c][st];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
939 switch (st) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
940
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
941 //////////////////////////////
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
942 // host name recognizer
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
943 case h_end: {
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
944 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
945 char *tld;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
946 for (int i=0; (tld = tlds[i]); i++) {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
947 int n = strlen(tld);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
948 if (count > n) {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
949 if (strncasecmp((const char *)(pending+count-n), tld, n) == 0) {
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
950 memory->new_url((char*)pending);
17
b6a4b72bb96e add scanning for bare hostnames
carl
parents: 16
diff changeset
951 break;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
952 }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
953 }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
954 }
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
955 st = h_init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
956 } // fall thru
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
957
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
958 case h_init: {
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
959 count = 0;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
960 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
961
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
962
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
963 //////////////////////////////
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
964 // html tag discarder
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
965 case t_end: {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
966 st = t_init;
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
967 } // fall thru
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
968
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
969 case t_disc: {
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
970 if (count > 2) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
971 pending[--count] = '\0'; // null terminate html tag
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
972 for (int i=1; i<count; i++) pending[i] = tolower(pending[i]);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
973 memory->new_tag((char*)pending+1);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
974 }
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
975 count = 0; // discard all characters
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
976 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
977
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
978 case t_init: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
979 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
980 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
981
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
982
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
983 //////////////////////////////
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
984 // url recognizer
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
985 case u_reco: {
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
986 if (count > 13) { // need some minimal length host name after the protocol
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
987 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
988 char *p = strrchr((const char *)pending, '/');
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
989 if (p && // have a leading /
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
990 strchr(p, '.') && // require at least one . in a dns name
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
991 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
992 memory->new_url(++p); // we seem to have a host name, skip the last /
9
8c65411cd7ab integration work on url scanner
carl
parents: 8
diff changeset
993 }
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
994 }
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
995 st = u_init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
996 } // fall thru
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
997
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
998 case u_init: {
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
999 count = 0; // discard all characters
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1000 } break;
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1001
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1002
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1003 //////////////////////////////
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1004 // url decoder %xx
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1005 case d_2: {
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1006 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]];
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1007 count = 1;
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1008 st = d_init;
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1009 } // fall thru
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1010
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1011 case d_init: {
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1012 pusher();
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1013 } break;
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1014
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1015
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1016 //////////////////////////////
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1017 // html entity decoder &#nnn;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1018 case e_semi: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1019 pending[--count] = '\0'; // null terminate the digit string by overwriting the semicolon
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1020 pending[0] = atoi((const char *)pending+2);
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1021 count = 1;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1022 st = e_init;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1023 } // fall thru
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1024
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1025 case e_init: {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1026 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1027 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1028
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1029
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1030 //////////////////////////////
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1031 // mime decoder =xx
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1032 case m_2: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1033 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]];
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1034 count = 1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1035 st = m_init;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1036 } // fall thru
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1037
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1038 case m_init: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1039 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1040 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1041
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1042 case m_cr: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1043 count = 1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1044 st = m_eq;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1045 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1046
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1047 case m_nl: {
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1048 count = 0;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1049 st = m_init;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1050 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1051
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1052
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1053 //////////////////////////////
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1054 // base64 decoder
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1055 case b_lf2: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1056 count--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1057 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1058
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1059 case b_cr: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1060 int cnt = 0;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1061 if ((count % 4) == 1) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1062 count--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1063 // might have proper b64 data
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1064 for (int i=0; i<count; i+=4) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1065 unsigned long a1 = b64_decode[pending[i]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1066 unsigned long a2 = b64_decode[pending[i+1]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1067 unsigned long a3 = b64_decode[pending[i+2]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1068 unsigned long a4 = b64_decode[pending[i+3]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1069 unsigned long a = (a1 << 18) | (a2 << 12) | (a3 << 6) | a4;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1070 pending[cnt++] = (a & 0x00ff0000) >> 16;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1071 pending[cnt++] = (a & 0x0000ff00) >> 8;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1072 pending[cnt++] = (a & 0x000000ff);
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1073 if ((char)pending[i+3] == '=') cnt--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1074 if ((char)pending[i+2] == '=') cnt--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1075 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1076 count = cnt;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1077 st = b_lf2;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1078 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1079 else st = b_init;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1080 } // fall thru
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1081
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1082 case b_lf:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1083 case b_init: {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1084 pusher();
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1085 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1086
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1087
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1088 //////////////////////////////
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1089 // states that just accumulate characters in the pending buffer
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1090 case h_host:
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1091 case t_tag:
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1092 case u_http:
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1093 case u_url:
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1094 case u_sla:
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1095 case d_pcnt:
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1096 case d_1:
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1097 case e_amp:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1098 case e_num:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1099 case m_eq:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1100 case m_1:
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1101 case b_64:
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1102 default: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1103 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1104 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1105 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1106 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1107
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1108 struct url_scanner {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1109 fsa *host_parser;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1110 fsa *tags_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1111 fsa *urls_parser;
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1112 fsa *urld_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1113 fsa *html_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1114 fsa *mime_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1115 fsa *b64_parser;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1116
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1117 url_scanner(recorder *memory);
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1118 ~url_scanner();
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1119 void scan(u_char *buffer, size_t length);
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1120 };
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1121
24
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1122 url_scanner::url_scanner(recorder *memory) {
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1123 host_parser = new fsa(h_init, NULL, NULL, memory);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1124 tags_parser = new fsa(t_init, host_parser, NULL, memory);
2e23b7184d2b start coding for bad html tag detection
carl
parents: 23
diff changeset
1125 urls_parser = new fsa(u_init, NULL, NULL, memory);
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1126 urld_parser = new fsa(d_init, urls_parser, tags_parser, NULL);
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1127 html_parser = new fsa(e_init, urld_parser, NULL, NULL);
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1128 mime_parser = new fsa(m_init, html_parser, NULL, NULL);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1129 b64_parser = new fsa(b_init, mime_parser, NULL, NULL);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1130 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1131
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1132 url_scanner::~url_scanner() {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1133 delete host_parser;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1134 delete tags_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1135 delete urls_parser;
23
06de5ab6a232 add url decoding stage, allow http:/ single / in yahoo redirector, allow ip address hostnames
carl
parents: 19
diff changeset
1136 delete urld_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1137 delete html_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1138 delete mime_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1139 delete b64_parser;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1140 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1141
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1142 void url_scanner::scan(u_char *buffer, size_t length) {
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1143 b64_parser->push(buffer, length);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1144 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1145