Mercurial > dnsbl
comparison src/scanner.cpp @ 147:812c80305f26 stable-5-26
fix 5.23 bug and add fsa debug logging
author | carl |
---|---|
date | Mon, 04 Dec 2006 21:49:09 -0800 |
parents | ecb40aa3eaa5 |
children | c7fc218686f5 |
comparison
equal
deleted
inserted
replaced
146:7278c9766e26 | 147:812c80305f26 |
---|---|
79 m_nl, | 79 m_nl, |
80 b_cr, | 80 b_cr, |
81 uu_cr | 81 uu_cr |
82 }; | 82 }; |
83 | 83 |
84 static char* state_names[] = {"h_init", | |
85 "h_host", | |
86 "t_init", | |
87 "t_tag1", | |
88 "t_tag2", | |
89 "t_com1", | |
90 "t_com2", | |
91 "t_com3", | |
92 "t_com4", | |
93 "t_com5", | |
94 "t_disc", | |
95 "u_init", | |
96 "u_http", | |
97 "u_sla", | |
98 "u_url", | |
99 "d_init", | |
100 "d_pcnt", | |
101 "d_1", | |
102 "e_init", | |
103 "e_amp", | |
104 "e_num", | |
105 "m_init", | |
106 "m_eq", | |
107 "m_1", | |
108 "b_init", | |
109 "b_lf", | |
110 "b_lf2", | |
111 "b_64", | |
112 "uu_init", | |
113 "uu_lf", | |
114 "uu_lf2", | |
115 "uu_64", | |
116 "end_state", | |
117 "h_end", | |
118 "t_bin", | |
119 "t_end", | |
120 "u_reco", | |
121 "d_2", | |
122 "e_semi", | |
123 "m_2", | |
124 "m_cr", | |
125 "m_nl", | |
126 "b_cr", | |
127 "uu_cr"}; | |
128 | |
84 #define PENDING_LIMIT 100 | 129 #define PENDING_LIMIT 100 |
85 class fsa { | 130 class fsa { |
131 char *myname; | |
86 u_char pending[PENDING_LIMIT]; | 132 u_char pending[PENDING_LIMIT]; |
87 int count; | 133 int count; |
88 state st; | 134 state st; |
89 state init; | 135 state init; |
90 fsa *next1; | 136 fsa *next1; |
91 fsa *next2; | 137 fsa *next2; |
92 recorder *memory; | 138 recorder *memory; |
93 | 139 |
94 public: | 140 public: |
95 fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); | 141 fsa(char *myname_, state init, fsa *next1_, fsa *next2_, recorder *memory_); |
96 void push(u_char *buf, int len); | 142 void push(u_char *buf, int len); |
97 void pusher(); | 143 void pusher(); |
98 void validhost(); | 144 void validhost(); |
99 void error(char *err); | 145 void error(char *err); |
100 }; | 146 }; |
1179 | 1225 |
1180 | 1226 |
1181 //////////////////////////////////////////////// | 1227 //////////////////////////////////////////////// |
1182 // | 1228 // |
1183 // | 1229 // |
1184 fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) { | 1230 fsa::fsa(char *myname_, state init_, fsa *next1_, fsa *next2_, recorder *memory_) { |
1231 myname = myname_; | |
1185 count = 0; | 1232 count = 0; |
1186 st = init_; | 1233 st = init_; |
1187 init = init_; | 1234 init = init_; |
1188 next1 = next1_; | 1235 next1 = next1_; |
1189 next2 = next2_; | 1236 next2 = next2_; |
1191 } | 1238 } |
1192 | 1239 |
1193 void fsa::error(char *err) { | 1240 void fsa::error(char *err) { |
1194 count = 0; | 1241 count = 0; |
1195 st = init; | 1242 st = init; |
1196 if (err) my_syslog(memory->get_priv(), err); | 1243 if (err) memory->syslog(err); |
1197 } | 1244 } |
1198 | 1245 |
1199 void fsa::pusher() { | 1246 void fsa::pusher() { |
1200 if (next1) next1->push(pending, count); | 1247 if (next1) next1->push(pending, count); |
1201 if (next2) next2->push(pending, count); | 1248 if (next2) next2->push(pending, count); |
1212 char *p2 = strrchr((const char *)pending, '.'); | 1259 char *p2 = strrchr((const char *)pending, '.'); |
1213 char *p3 = strstr((const char *)pending, ".."); | 1260 char *p3 = strstr((const char *)pending, ".."); |
1214 if (p1 && (p1 != (char*)pending) & !p3) { | 1261 if (p1 && (p1 != (char*)pending) & !p3) { |
1215 // have a period, so at least two components, and no empty components | 1262 // have a period, so at least two components, and no empty components |
1216 in_addr ip; | 1263 in_addr ip; |
1217 if (inet_aton((const char*)pending, &ip)) | 1264 if (inet_aton((const char*)pending, &ip)) { |
1218 // have an ip address if at least two periods | 1265 // have an ip address if at least two periods |
1219 if (p1 != p2) memory->new_url((char*)pending); | 1266 if (p1 != p2) memory->new_url((char*)pending); |
1267 } | |
1220 else { | 1268 else { |
1221 for (int i=0; i<count; i++) pending[i] = tolower(pending[i]); | 1269 for (int i=0; i<count; i++) pending[i] = tolower(pending[i]); |
1222 // is last component a tld? | 1270 // is last component a tld? |
1223 string_set::iterator i = memory->get_tlds()->find(p2+1); | 1271 string_set::iterator i = memory->get_tlds()->find(p2+1); |
1224 if (i != memory->get_tlds()->end()) memory->new_url((char*)pending); | 1272 if (i != memory->get_tlds()->end()) memory->new_url((char*)pending); |
1226 } | 1274 } |
1227 } | 1275 } |
1228 } | 1276 } |
1229 | 1277 |
1230 void fsa::push(u_char *buf, int len) { | 1278 void fsa::push(u_char *buf, int len) { |
1279 if (debug_syslog > 10) { | |
1280 char msg[200], mbuf[200]; | |
1281 int n = sizeof(mbuf) - 1; | |
1282 if (len < n) n = len; | |
1283 memcpy(mbuf, buf, n); | |
1284 mbuf[n] = '\0'; | |
1285 snprintf(msg, sizeof(msg), "%s sees %s", myname, mbuf); | |
1286 msg[sizeof(msg)-1] = '\0'; | |
1287 memory->syslog(msg); | |
1288 } | |
1231 for (int i=0; i<len; i++) { | 1289 for (int i=0; i<len; i++) { |
1232 if (count == (PENDING_LIMIT-1)) error(NULL); | 1290 if (count == (PENDING_LIMIT-1)) error(NULL); |
1233 if (st >= end_state) error("finite state machine impossible state"); | 1291 if (st >= end_state) error("finite state machine impossible state"); |
1234 u_char c = buf[i]; | 1292 u_char c = buf[i]; |
1235 pending[count++] = c; | 1293 pending[count++] = c; |
1294 if (debug_syslog > 10) { | |
1295 char *old1 = state_names[st]; | |
1296 char *new1 = state_names[parse_table[c][st]]; | |
1297 char msg[200]; | |
1298 snprintf(msg, sizeof(msg), "%s at (%d,%c) switches from %s to %s", myname, i, c, old1, new1); | |
1299 memory->syslog(msg); | |
1300 } | |
1236 st = parse_table[c][st]; | 1301 st = parse_table[c][st]; |
1237 switch (st) { | 1302 switch (st) { |
1238 | 1303 |
1239 ////////////////////////////// | 1304 ////////////////////////////// |
1240 // host name recognizer | 1305 // host name recognizer |
1461 | 1526 |
1462 //////////////////////////////////////////////// | 1527 //////////////////////////////////////////////// |
1463 // | 1528 // |
1464 // | 1529 // |
1465 url_scanner::url_scanner(recorder *memory) { | 1530 url_scanner::url_scanner(recorder *memory) { |
1466 host_parser = new fsa(h_init, NULL, NULL, memory); | 1531 host_parser = new fsa("host_parser", h_init, NULL, NULL, memory); |
1467 tags_parser = new fsa(t_init, host_parser, NULL, memory); | 1532 tags_parser = new fsa("tags_parser", t_init, host_parser, NULL, memory); |
1468 urls_parser = new fsa(u_init, NULL, NULL, memory); | 1533 urls_parser = new fsa("urls_parser", u_init, NULL, NULL, memory); |
1469 urld_parser = new fsa(d_init, urls_parser, tags_parser, memory); | 1534 urld_parser = new fsa("urld_parser", d_init, urls_parser, tags_parser, memory); |
1470 html_parser = new fsa(e_init, urld_parser, NULL, memory); | 1535 html_parser = new fsa("html_parser", e_init, urld_parser, NULL, memory); |
1471 mime_parser = new fsa(m_init, html_parser, NULL, memory); | 1536 mime_parser = new fsa("mime_parser", m_init, html_parser, NULL, memory); |
1472 b64_parser = new fsa(b_init, mime_parser, NULL, memory); | 1537 b64_parser = new fsa("b64_parser ", b_init, mime_parser, NULL, memory); |
1473 uu_parser = new fsa(uu_init, b64_parser, NULL, memory); | 1538 uu_parser = new fsa("uu_parser ", uu_init, b64_parser, NULL, memory); |
1474 } | 1539 } |
1475 | 1540 |
1476 url_scanner::~url_scanner() { | 1541 url_scanner::~url_scanner() { |
1477 delete host_parser; | 1542 delete host_parser; |
1478 delete tags_parser; | 1543 delete tags_parser; |