annotate src/scanner.cpp @ 19:b8f5fa3dd5b8

fix problems in the state transitions causing impossible states
author carl
date Fri, 30 Apr 2004 22:44:56 -0700
parents 041ea016b684
children 06de5ab6a232
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1 static char* scanner_version="$Id$";
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
3 using namespace std;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
4
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
5 enum state {// host name recognizer states
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
6 h_init,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
7 h_host,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
8
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
9 // html tag discarder states
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
10 t_init,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
11 t_disc,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
12
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
13 // url recognizer states
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
14 u_init,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
15 u_http,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
16 u_sla,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
17 u_url,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
18
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
19 // html entity decoder states
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
20 e_init,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
21 e_amp,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
22 e_num,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
23
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
24 // mime decoder states
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
25 m_init,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
26 m_eq,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
27 m_1,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
28
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
29 // base64 decoder states
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
30 b_init,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
31 b_lf,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
32 b_lf2,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
33 b_64,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
34
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
35 // counter for number of columns in the table
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
36 end_state,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
37
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
38 // temporary states
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
39 h_end,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
40 t_end,
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
41 u_reco,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
42 e_semi,
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
43 m_2,
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
44 m_cr,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
45 m_nl,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
46 b_cr,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
47 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
48
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
49 typedef state PARSE[end_state];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
50
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
51 static PARSE parse_table[256] = {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
52 // h_init, h_host, t_init, t_disc, u_init, u_http, u_sla , u_url, e_init, e_amp, e_num, m_init, m_eq, m_1, b_init, b_lf, b_lf2, b_64
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
53
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
54 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x00
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
55 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x01
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
56 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x02
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
57 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x03
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
58 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x04
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
59 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x05
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
60 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x06
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
61 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x07
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
62 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x08
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
63 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x09 <tab>
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
64 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_nl, m_init, b_lf, b_init, b_lf2, b_init, }, // 0x0a <lf>
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
65 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0b
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
66 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0c
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
67 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_cr, m_init, b_init, b_init, b_init, b_cr, }, // 0x0d <cr>
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
68 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0e
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
69 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x0f
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
70 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x10
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
71 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x11 xon char
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
72 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x12
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
73 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x13 xoff char
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
74 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x14
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
75 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x15
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
76 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
77 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x17
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
78 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x18
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
79 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x19
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
80 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1a
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
81 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1b
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
82 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1c
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
83 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1d
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
84 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1e
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
85 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x1f
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
86 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x20 space
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
87 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x21 !
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
88 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x22 ""
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
89 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_num, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x23 #
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
90 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x24 $
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
91 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x25 %
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
92 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_amp, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x26 &
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
93 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x27 '
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
94 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x28 (
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
95 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x29 )
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
96 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2A *
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
97 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2B +
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
98 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2C ,
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
99 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2D -
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
100 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x2E .
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
101 {h_init, h_end, t_init, t_disc, u_init, u_sla, u_sla, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x2F /
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
102 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x30 0
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
103 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x31 1
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
104 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x32 2
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
105 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x33 3
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
106 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x34 4
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
107 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x35 5
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
108 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x36 6
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
109 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x37 7
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
110 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x38 8
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
111 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x39 9
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
112 {h_init, h_end, t_init, t_disc, u_http, u_http, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3A :
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
113 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_semi, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3B ;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
114 {h_init, h_end, t_disc, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3C <
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
115 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_eq, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x3D =
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
116 {h_init, h_end, t_init, t_end, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3E >
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
117 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3F ?
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
118 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
119 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
120 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
121 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
122 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x44 D
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
123 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x45 E
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
124 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x46 F
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
125 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x47 G
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
126 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x48 H
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
127 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x49 I
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
128 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4A J
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
129 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4B K
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
130 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4C L
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
131 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4D M
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
132 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4E N
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
133 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4F O
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
134 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x50 P
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
135 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x51 Q
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
136 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x52 R
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
137 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x53 S
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
138 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x54 T
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
139 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
140 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
141 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
142 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
143 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x59 Y
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
144 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x5A Z
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
145 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5B [
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
146 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5C brace
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
147 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5D ]
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
148 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5E ^
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
149 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5F _
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
150 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x60 `
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
151 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x61 a
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
152 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x62 b
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
153 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x63 c
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
154 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x64 d
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
155 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x65 e
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
156 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x66 f
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
157 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x67 g
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
158 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x68 h
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
159 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x69 i
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
160 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6A j
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
161 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6B k
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
162 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6C l
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
163 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6D m
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
164 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6E n
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
165 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6F o
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
166 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x70 p
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
167 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x71 q
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
168 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x72 r
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
169 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x73 s
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
170 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x74 t
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
171 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
172 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
173 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
174 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
175 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x79 y
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
176 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x7A z
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
177 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7B {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
178 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7C |
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
179 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7D }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
180 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7E ~
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
181 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7f
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
182 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x80
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
183 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x81
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
184 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x82
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
185 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x83
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
186 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x84
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
187 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x85
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
188 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x86
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
189 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x87
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
190 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x88
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
191 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x89
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
192 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8a
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
193 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8b
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
194 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8c
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
195 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8d
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
196 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8e
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
197 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x8f
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
198 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x90
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
199 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x91
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
200 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x92
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
201 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x93
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
202 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x94
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
203 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x95
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
204 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x96
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
205 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x97
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
206 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x98
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
207 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x99
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
208 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9a
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
209 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9b
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
210 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9c
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
211 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9d
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
212 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9e
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
213 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x9f
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
214 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa0
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
215 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa1
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
216 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa2
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
217 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa3
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
218 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa4
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
219 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa5
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
220 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa6
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
221 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa7
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
222 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa8
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
223 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xa9
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
224 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaa
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
225 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xab
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
226 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xac
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
227 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xad
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
228 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xae
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
229 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xaf
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
230 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb0
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
231 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb1
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
232 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb2
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
233 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb3
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
234 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb4
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
235 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb5
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
236 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb6
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
237 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb7
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
238 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb8
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
239 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xb9
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
240 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xba
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
241 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbb
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
242 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbc
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
243 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbd
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
244 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbe
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
245 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xbf
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
246 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc0
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
247 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc1
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
248 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc2
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
249 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc3
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
250 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc4
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
251 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc5
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
252 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc6
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
253 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc7
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
254 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc8
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
255 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xc9
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
256 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xca
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
257 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcb
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
258 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcc
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
259 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcd
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
260 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xce
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
261 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xcf
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
262 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd0
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
263 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd1
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
264 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd2
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
265 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd3
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
266 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd4
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
267 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd5
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
268 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd6
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
269 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd7
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
270 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd8
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
271 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xd9
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
272 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xda
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
273 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdb
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
274 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdc
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
275 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdd
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
276 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xde
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
277 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xdf
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
278 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe0
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
279 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe1
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
280 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe2
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
281 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe3
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
282 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe4
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
283 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe5
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
284 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe6
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
285 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe7
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
286 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe8
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
287 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xe9
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
288 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xea
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
289 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xeb
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
290 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xec
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
291 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xed
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
292 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xee
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
293 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xef
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
294 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf0
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
295 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf1
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
296 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf2
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
297 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf3
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
298 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf4
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
299 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf5
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
300 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf6
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
301 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf7
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
302 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf8
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
303 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xf9
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
304 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfa
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
305 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfb
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
306 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfc
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
307 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfd
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
308 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xfe
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
309 {h_init, h_end, t_init, t_disc, u_init, u_init, u_init, u_reco, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0xff
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
310 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
311
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
312
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
313 char *tlds[] = {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
314 ".com",
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
315 ".net",
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
316 ".org",
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
317 ".biz",
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
318 NULL
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
319 };
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
320
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
321 u_char hex_decode[256] = {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
322 0, // 0x00
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
323 0, // 0x01
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
324 0, // 0x02
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
325 0, // 0x03
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
326 0, // 0x04
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
327 0, // 0x05
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
328 0, // 0x06
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
329 0, // 0x07
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
330 0, // 0x08
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
331 0, // 0x09 <tab>
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
332 0, // 0x0a <lf>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
333 0, // 0x0b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
334 0, // 0x0c
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
335 0, // 0x0d <cr>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
336 0, // 0x0e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
337 0, // 0x0f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
338 0, // 0x10
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
339 0, // 0x11 xon char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
340 0, // 0x12
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
341 0, // 0x13 xoff char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
342 0, // 0x14
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
343 0, // 0x15
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
344 0, // 0x16
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
345 0, // 0x17
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
346 0, // 0x18
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
347 0, // 0x19
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
348 0, // 0x1a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
349 0, // 0x1b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
350 0, // 0x1c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
351 0, // 0x1d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
352 0, // 0x1e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
353 0, // 0x1f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
354 0, // 0x20 space
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
355 0, // 0x21 !
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
356 0, // 0x22 ""
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
357 0, // 0x23 #
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
358 0, // 0x24 $
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
359 0, // 0x25 %
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
360 0, // 0x26 &
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
361 0, // 0x27 '
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
362 0, // 0x28 (
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
363 0, // 0x29 )
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
364 0, // 0x2A *
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
365 0, // 0x2B +
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
366 0, // 0x2C ,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
367 0, // 0x2D -
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
368 0, // 0x2E .
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
369 0, // 0x2F /
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
370 0, // 0x30 0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
371 1, // 0x31 1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
372 2, // 0x32 2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
373 3, // 0x33 3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
374 4, // 0x34 4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
375 5, // 0x35 5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
376 6, // 0x36 6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
377 7, // 0x37 7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
378 8, // 0x38 8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
379 9, // 0x39 9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
380 0, // 0x3A :
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
381 0, // 0x3B ;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
382 0, // 0x3C <
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
383 0, // 0x3D =
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
384 0, // 0x3E >
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
385 0, // 0x3F ?
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
386 0, // 0x40 @
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
387 10, // 0x41 A
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
388 11, // 0x42 B
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
389 12, // 0x43 C
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
390 13, // 0x44 D
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
391 14, // 0x45 E
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
392 15, // 0x46 F
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
393 0, // 0x47 G
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
394 0, // 0x48 H
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
395 0, // 0x49 I
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
396 0, // 0x4A J
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
397 0, // 0x4B K
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
398 0, // 0x4C L
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
399 0, // 0x4D M
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
400 0, // 0x4E N
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
401 0, // 0x4F O
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
402 0, // 0x50 P
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
403 0, // 0x51 Q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
404 0, // 0x52 R
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
405 0, // 0x53 S
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
406 0, // 0x54 T
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
407 0, // 0x55 U
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
408 0, // 0x56 V
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
409 0, // 0x57 W
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
410 0, // 0x58 X
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
411 0, // 0x59 Y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
412 0, // 0x5A Z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
413 0, // 0x5B [
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
414 0, // 0x5C brace
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
415 0, // 0x5D ]
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
416 0, // 0x5E ^
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
417 0, // 0x5F _
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
418 0, // 0x60 `
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
419 10, // 0x61 a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
420 11, // 0x62 b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
421 12, // 0x63 c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
422 13, // 0x64 d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
423 14, // 0x65 e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
424 15, // 0x66 f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
425 0, // 0x67 g
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
426 0, // 0x68 h
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
427 0, // 0x69 i
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
428 0, // 0x6A j
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
429 0, // 0x6B k
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
430 0, // 0x6C l
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
431 0, // 0x6D m
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
432 0, // 0x6E n
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
433 0, // 0x6F o
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
434 0, // 0x70 p
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
435 0, // 0x71 q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
436 0, // 0x72 r
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
437 0, // 0x73 s
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
438 0, // 0x74 t
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
439 0, // 0x75 u
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
440 0, // 0x76 v
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
441 0, // 0x77 w
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
442 0, // 0x78 x
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
443 0, // 0x79 y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
444 0, // 0x7A z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
445 0, // 0x7B {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
446 0, // 0x7C |
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
447 0, // 0x7D }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
448 0, // 0x7E ~
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
449 0, // 0x7f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
450 0, // 0x80
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
451 0, // 0x81
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
452 0, // 0x82
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
453 0, // 0x83
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
454 0, // 0x84
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
455 0, // 0x85
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
456 0, // 0x86
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
457 0, // 0x87
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
458 0, // 0x88
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
459 0, // 0x89
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
460 0, // 0x8a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
461 0, // 0x8b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
462 0, // 0x8c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
463 0, // 0x8d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
464 0, // 0x8e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
465 0, // 0x8f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
466 0, // 0x90
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
467 0, // 0x91
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
468 0, // 0x92
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
469 0, // 0x93
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
470 0, // 0x94
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
471 0, // 0x95
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
472 0, // 0x96
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
473 0, // 0x97
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
474 0, // 0x98
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
475 0, // 0x99
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
476 0, // 0x9a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
477 0, // 0x9b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
478 0, // 0x9c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
479 0, // 0x9d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
480 0, // 0x9e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
481 0, // 0x9f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
482 0, // 0xa0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
483 0, // 0xa1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
484 0, // 0xa2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
485 0, // 0xa3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
486 0, // 0xa4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
487 0, // 0xa5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
488 0, // 0xa6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
489 0, // 0xa7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
490 0, // 0xa8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
491 0, // 0xa9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
492 0, // 0xaa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
493 0, // 0xab
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
494 0, // 0xac
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
495 0, // 0xad
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
496 0, // 0xae
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
497 0, // 0xaf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
498 0, // 0xb0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
499 0, // 0xb1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
500 0, // 0xb2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
501 0, // 0xb3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
502 0, // 0xb4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
503 0, // 0xb5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
504 0, // 0xb6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
505 0, // 0xb7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
506 0, // 0xb8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
507 0, // 0xb9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
508 0, // 0xba
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
509 0, // 0xbb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
510 0, // 0xbc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
511 0, // 0xbd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
512 0, // 0xbe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
513 0, // 0xbf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
514 0, // 0xc0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
515 0, // 0xc1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
516 0, // 0xc2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
517 0, // 0xc3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
518 0, // 0xc4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
519 0, // 0xc5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
520 0, // 0xc6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
521 0, // 0xc7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
522 0, // 0xc8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
523 0, // 0xc9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
524 0, // 0xca
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
525 0, // 0xcb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
526 0, // 0xcc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
527 0, // 0xcd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
528 0, // 0xce
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
529 0, // 0xcf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
530 0, // 0xd0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
531 0, // 0xd1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
532 0, // 0xd2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
533 0, // 0xd3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
534 0, // 0xd4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
535 0, // 0xd5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
536 0, // 0xd6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
537 0, // 0xd7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
538 0, // 0xd8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
539 0, // 0xd9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
540 0, // 0xda
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
541 0, // 0xdb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
542 0, // 0xdc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
543 0, // 0xdd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
544 0, // 0xde
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
545 0, // 0xdf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
546 0, // 0xe0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
547 0, // 0xe1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
548 0, // 0xe2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
549 0, // 0xe3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
550 0, // 0xe4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
551 0, // 0xe5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
552 0, // 0xe6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
553 0, // 0xe7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
554 0, // 0xe8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
555 0, // 0xe9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
556 0, // 0xea
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
557 0, // 0xeb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
558 0, // 0xec
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
559 0, // 0xed
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
560 0, // 0xee
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
561 0, // 0xef
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
562 0, // 0xf0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
563 0, // 0xf1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
564 0, // 0xf2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
565 0, // 0xf3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
566 0, // 0xf4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
567 0, // 0xf5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
568 0, // 0xf6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
569 0, // 0xf7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
570 0, // 0xf8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
571 0, // 0xf9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
572 0, // 0xfa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
573 0, // 0xfb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
574 0, // 0xfc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
575 0, // 0xfd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
576 0, // 0xfe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
577 0, // 0xff
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
578 };
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
579
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
580 u_char b64_decode[256] = {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
581 0, // 0x00
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
582 0, // 0x01
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
583 0, // 0x02
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
584 0, // 0x03
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
585 0, // 0x04
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
586 0, // 0x05
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
587 0, // 0x06
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
588 0, // 0x07
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
589 0, // 0x08
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
590 0, // 0x09 <tab>
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
591 0, // 0x0a <lf>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
592 0, // 0x0b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
593 0, // 0x0c
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
594 0, // 0x0d <cr>
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
595 0, // 0x0e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
596 0, // 0x0f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
597 0, // 0x10
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
598 0, // 0x11 xon char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
599 0, // 0x12
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
600 0, // 0x13 xoff char
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
601 0, // 0x14
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
602 0, // 0x15
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
603 0, // 0x16
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
604 0, // 0x17
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
605 0, // 0x18
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
606 0, // 0x19
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
607 0, // 0x1a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
608 0, // 0x1b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
609 0, // 0x1c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
610 0, // 0x1d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
611 0, // 0x1e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
612 0, // 0x1f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
613 0, // 0x20 space
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
614 0, // 0x21 !
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
615 0, // 0x22 ""
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
616 0, // 0x23 #
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
617 0, // 0x24 $
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
618 0, // 0x25 %
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
619 0, // 0x26 &
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
620 0, // 0x27 '
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
621 0, // 0x28 (
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
622 0, // 0x29 )
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
623 0, // 0x2A *
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
624 62, // 0x2B +
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
625 0, // 0x2C ,
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
626 0, // 0x2D -
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
627 0, // 0x2E .
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
628 63, // 0x2F /
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
629 52, // 0x30 0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
630 53, // 0x31 1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
631 54, // 0x32 2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
632 55, // 0x33 3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
633 56, // 0x34 4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
634 57, // 0x35 5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
635 58, // 0x36 6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
636 59, // 0x37 7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
637 60, // 0x38 8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
638 61, // 0x39 9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
639 0, // 0x3A :
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
640 0, // 0x3B ;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
641 0, // 0x3C <
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
642 0, // 0x3D =
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
643 0, // 0x3E >
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
644 0, // 0x3F ?
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
645 0, // 0x40 @
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
646 0, // 0x41 A
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
647 1, // 0x42 B
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
648 2, // 0x43 C
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
649 3, // 0x44 D
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
650 4, // 0x45 E
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
651 5, // 0x46 F
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
652 6, // 0x47 G
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
653 7, // 0x48 H
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
654 8, // 0x49 I
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
655 9, // 0x4A J
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
656 10, // 0x4B K
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
657 11, // 0x4C L
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
658 12, // 0x4D M
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
659 13, // 0x4E N
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
660 14, // 0x4F O
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
661 15, // 0x50 P
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
662 16, // 0x51 Q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
663 17, // 0x52 R
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
664 18, // 0x53 S
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
665 19, // 0x54 T
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
666 20, // 0x55 U
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
667 21, // 0x56 V
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
668 22, // 0x57 W
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
669 23, // 0x58 X
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
670 24, // 0x59 Y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
671 25, // 0x5A Z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
672 0, // 0x5B [
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
673 0, // 0x5C brace
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
674 0, // 0x5D ]
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
675 0, // 0x5E ^
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
676 0, // 0x5F _
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
677 0, // 0x60 `
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
678 26, // 0x61 a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
679 27, // 0x62 b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
680 28, // 0x63 c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
681 29, // 0x64 d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
682 30, // 0x65 e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
683 31, // 0x66 f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
684 32, // 0x67 g
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
685 33, // 0x68 h
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
686 34, // 0x69 i
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
687 35, // 0x6A j
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
688 36, // 0x6B k
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
689 37, // 0x6C l
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
690 38, // 0x6D m
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
691 39, // 0x6E n
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
692 40, // 0x6F o
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
693 41, // 0x70 p
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
694 42, // 0x71 q
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
695 43, // 0x72 r
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
696 44, // 0x73 s
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
697 45, // 0x74 t
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
698 46, // 0x75 u
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
699 47, // 0x76 v
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
700 48, // 0x77 w
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
701 49, // 0x78 x
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
702 50, // 0x79 y
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
703 51, // 0x7A z
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
704 0, // 0x7B {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
705 0, // 0x7C |
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
706 0, // 0x7D }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
707 0, // 0x7E ~
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
708 0, // 0x7f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
709 0, // 0x80
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
710 0, // 0x81
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
711 0, // 0x82
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
712 0, // 0x83
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
713 0, // 0x84
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
714 0, // 0x85
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
715 0, // 0x86
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
716 0, // 0x87
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
717 0, // 0x88
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
718 0, // 0x89
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
719 0, // 0x8a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
720 0, // 0x8b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
721 0, // 0x8c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
722 0, // 0x8d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
723 0, // 0x8e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
724 0, // 0x8f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
725 0, // 0x90
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
726 0, // 0x91
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
727 0, // 0x92
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
728 0, // 0x93
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
729 0, // 0x94
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
730 0, // 0x95
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
731 0, // 0x96
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
732 0, // 0x97
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
733 0, // 0x98
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
734 0, // 0x99
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
735 0, // 0x9a
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
736 0, // 0x9b
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
737 0, // 0x9c
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
738 0, // 0x9d
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
739 0, // 0x9e
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
740 0, // 0x9f
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
741 0, // 0xa0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
742 0, // 0xa1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
743 0, // 0xa2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
744 0, // 0xa3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
745 0, // 0xa4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
746 0, // 0xa5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
747 0, // 0xa6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
748 0, // 0xa7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
749 0, // 0xa8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
750 0, // 0xa9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
751 0, // 0xaa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
752 0, // 0xab
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
753 0, // 0xac
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
754 0, // 0xad
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
755 0, // 0xae
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
756 0, // 0xaf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
757 0, // 0xb0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
758 0, // 0xb1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
759 0, // 0xb2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
760 0, // 0xb3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
761 0, // 0xb4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
762 0, // 0xb5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
763 0, // 0xb6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
764 0, // 0xb7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
765 0, // 0xb8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
766 0, // 0xb9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
767 0, // 0xba
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
768 0, // 0xbb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
769 0, // 0xbc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
770 0, // 0xbd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
771 0, // 0xbe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
772 0, // 0xbf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
773 0, // 0xc0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
774 0, // 0xc1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
775 0, // 0xc2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
776 0, // 0xc3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
777 0, // 0xc4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
778 0, // 0xc5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
779 0, // 0xc6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
780 0, // 0xc7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
781 0, // 0xc8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
782 0, // 0xc9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
783 0, // 0xca
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
784 0, // 0xcb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
785 0, // 0xcc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
786 0, // 0xcd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
787 0, // 0xce
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
788 0, // 0xcf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
789 0, // 0xd0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
790 0, // 0xd1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
791 0, // 0xd2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
792 0, // 0xd3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
793 0, // 0xd4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
794 0, // 0xd5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
795 0, // 0xd6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
796 0, // 0xd7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
797 0, // 0xd8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
798 0, // 0xd9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
799 0, // 0xda
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
800 0, // 0xdb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
801 0, // 0xdc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
802 0, // 0xdd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
803 0, // 0xde
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
804 0, // 0xdf
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
805 0, // 0xe0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
806 0, // 0xe1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
807 0, // 0xe2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
808 0, // 0xe3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
809 0, // 0xe4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
810 0, // 0xe5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
811 0, // 0xe6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
812 0, // 0xe7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
813 0, // 0xe8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
814 0, // 0xe9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
815 0, // 0xea
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
816 0, // 0xeb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
817 0, // 0xec
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
818 0, // 0xed
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
819 0, // 0xee
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
820 0, // 0xef
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
821 0, // 0xf0
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
822 0, // 0xf1
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
823 0, // 0xf2
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
824 0, // 0xf3
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
825 0, // 0xf4
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
826 0, // 0xf5
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
827 0, // 0xf6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
828 0, // 0xf7
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
829 0, // 0xf8
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
830 0, // 0xf9
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
831 0, // 0xfa
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
832 0, // 0xfb
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
833 0, // 0xfc
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
834 0, // 0xfd
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
835 0, // 0xfe
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
836 0, // 0xff
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
837 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
838
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
839 #define PENDING_LIMIT 100
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
840 struct fsa {
12
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
841 u_char pending[PENDING_LIMIT];
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
842 int count;
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
843 state st;
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
844 state init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
845 fsa* next1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
846 fsa* next2;
12
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
847 string_set *hosts;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
848
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
849 fsa(state init, fsa* next1_, fsa* next2_, string_set *hosts_);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
850 void push(u_char *buf, int len);
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
851 void pusher();
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
852 void error(char *err);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
853 };
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
854
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
855 fsa::fsa(state init_, fsa *next1_, fsa *next2_, string_set *hosts_) {
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
856 count = 0;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
857 st = init_;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
858 init = init_;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
859 next1 = next1_;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
860 next2 = next2_;
12
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
861 hosts = hosts_;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
862 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
863
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
864 void fsa::error(char *err) {
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
865 count = 0;
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
866 st = init;
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
867 if (err) my_syslog(err);
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
868 }
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
869
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
870 void fsa::pusher() {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
871 if (next1) next1->push(pending, count);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
872 if (next2) next2->push(pending, count);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
873 count = 0;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
874 }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
875
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
876 void fsa::push(u_char *buf, int len) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
877 for (int i=0; i<len; i++) {
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
878 if (count == (PENDING_LIMIT-1)) error(NULL);
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
879 if (st >= end_state) error("finite state machine impossible state");
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
880 u_char c = buf[i];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
881 pending[count++] = c;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
882 st = parse_table[c][st];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
883 switch (st) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
884
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
885 //////////////////////////////
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
886 // host name recognizer
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
887 case h_end: {
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
888 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
889 char *tld;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
890 for (int i=0; (tld = tlds[i]); i++) {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
891 int n = strlen(tld);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
892 if (count > n) {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
893 if (strncasecmp((const char *)(pending+count-n), tld, n) == 0) {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
894 register_string(*hosts, (char*)pending);
17
b6a4b72bb96e add scanning for bare hostnames
carl
parents: 16
diff changeset
895 break;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
896 }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
897 }
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
898 }
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
899 st = h_init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
900 } // fall thru
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
901
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
902 case h_init: {
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
903 count = 0;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
904 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
905
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
906 //////////////////////////////
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
907 // html tag discarder
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
908 case t_end: {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
909 st = t_init;
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
910 } // fall thru
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
911
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
912 case t_disc: {
18
041ea016b684 add scanning for bare hostnames
carl
parents: 17
diff changeset
913 count = 0; // discard all characters
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
914 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
915
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
916 case t_init: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
917 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
918 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
919
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
920 //////////////////////////////
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
921 // url recognizer
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
922 case u_sla: {
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
923 if ((count < 6) || (8 < count)) { // allow http:// or https://
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
924 count = 0;
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
925 st = u_init;
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
926 }
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
927 } break;
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
928
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
929 case u_reco: {
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
930 if (count > 13) { // need some minimal length host name after the protocol
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
931 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
932 char *p = NULL;
9
8c65411cd7ab integration work on url scanner
carl
parents: 8
diff changeset
933 if (strncasecmp((const char *)pending, "http://", 7) == 0) {
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
934 p = (char *)pending + 7;
9
8c65411cd7ab integration work on url scanner
carl
parents: 8
diff changeset
935 }
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
936 if (strncasecmp((const char *)pending, "https://", 8) == 0) {
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
937 p = (char *)pending + 8;
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
938 }
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
939 if (p && strchr(p, '.')) register_string(*hosts, p); // require at least one . in a dns name
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
940 }
19
b8f5fa3dd5b8 fix problems in the state transitions causing impossible states
carl
parents: 18
diff changeset
941 st = u_init;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
942 } // fall thru
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
943
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
944 case u_init: {
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
945 count = 0; // discard all characters
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
946 } break;
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
947
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
948
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
949 //////////////////////////////
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
950 // html entity decoder
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
951 case e_semi: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
952 pending[--count] = '\0'; // null terminate the digit string by overwriting the semicolon
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
953 pending[0] = atoi((const char *)pending+2);
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
954 count = 1;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
955 st = e_init;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
956 } // fall thru
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
957
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
958 case e_init: {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
959 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
960 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
961
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
962 //////////////////////////////
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
963 // mime decoder
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
964 case m_2: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
965 pending[0] = hex_decode[pending[1]] * 16 + hex_decode[pending[2]];
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
966 count = 1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
967 st = m_init;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
968 } // fall thru
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
969
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
970 case m_init: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
971 pusher();
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
972 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
973
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
974 case m_cr: {
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
975 count = 1;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
976 st = m_eq;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
977 } break;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
978
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
979 case m_nl: {
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
980 count = 0;
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
981 st = m_init;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
982 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
983
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
984 //////////////////////////////
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
985 // base64 decoder
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
986 case b_lf2: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
987 count--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
988 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
989
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
990 case b_cr: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
991 int cnt = 0;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
992 if ((count % 4) == 1) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
993 count--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
994 // might have proper b64 data
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
995 for (int i=0; i<count; i+=4) {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
996 unsigned long a1 = b64_decode[pending[i]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
997 unsigned long a2 = b64_decode[pending[i+1]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
998 unsigned long a3 = b64_decode[pending[i+2]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
999 unsigned long a4 = b64_decode[pending[i+3]];
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1000 unsigned long a = (a1 << 18) | (a2 << 12) | (a3 << 6) | a4;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1001 pending[cnt++] = (a & 0x00ff0000) >> 16;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1002 pending[cnt++] = (a & 0x0000ff00) >> 8;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1003 pending[cnt++] = (a & 0x000000ff);
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1004 if ((char)pending[i+3] == '=') cnt--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1005 if ((char)pending[i+2] == '=') cnt--;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1006 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1007 count = cnt;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1008 st = b_lf2;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1009 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1010 else st = b_init;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1011 } // fall thru
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1012
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1013 case b_lf:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1014 case b_init: {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1015 pusher();
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1016 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1017
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1018 //////////////////////////////
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1019 // states that just accumulate characters in the pending buffer
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1020 case h_host:
7
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1021 case u_http:
93ff6d1ef647 stable standalone scanner module
carl
parents: 6
diff changeset
1022 case u_url:
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1023 case e_amp:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1024 case e_num:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1025 case b_64:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1026 case m_eq:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1027 case m_1:
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1028 default: {
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1029 } break;
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1030 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1031 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1032 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1033
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1034 struct url_scanner {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1035 fsa *host_parser;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1036 fsa *tags_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1037 fsa *urls_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1038 fsa *html_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1039 fsa *mime_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1040 fsa *b64_parser;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1041
12
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
1042 url_scanner(string_set *hosts);
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1043 ~url_scanner();
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1044 void scan(u_char *buffer, size_t length);
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1045 };
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1046
12
6ac6d6b822ce fix memory leak with duplicate url host names,
carl
parents: 11
diff changeset
1047 url_scanner::url_scanner(string_set *hosts) {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1048 host_parser = new fsa(h_init, NULL, NULL, hosts);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1049 tags_parser = new fsa(t_init, host_parser, NULL, NULL);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1050 urls_parser = new fsa(u_init, NULL, NULL, hosts);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1051 html_parser = new fsa(e_init, urls_parser, tags_parser, NULL);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1052 mime_parser = new fsa(m_init, html_parser, NULL, NULL);
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1053 b64_parser = new fsa(b_init, mime_parser, NULL, NULL);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1054 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1055
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1056 url_scanner::~url_scanner() {
16
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1057 delete host_parser;
2ae8d953f1d0 add scanning for bare hostnames
carl
parents: 12
diff changeset
1058 delete tags_parser;
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1059 delete urls_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1060 delete html_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1061 delete mime_parser;
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1062 delete b64_parser;
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1063 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1064
8
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1065 void url_scanner::scan(u_char *buffer, size_t length) {
dbe18921f741 integration work on url scanner
carl
parents: 7
diff changeset
1066 b64_parser->push(buffer, length);
6
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1067 }
cea50d98a6cf start work on content url scanner
carl
parents:
diff changeset
1068