Mercurial > dnsbl
diff src/scanner.cpp @ 147:812c80305f26 stable-5-26
fix 5.23 bug and add fsa debug logging
author | carl |
---|---|
date | Mon, 04 Dec 2006 21:49:09 -0800 |
parents | ecb40aa3eaa5 |
children | c7fc218686f5 |
line wrap: on
line diff
--- a/src/scanner.cpp Sun Oct 15 17:21:07 2006 -0700 +++ b/src/scanner.cpp Mon Dec 04 21:49:09 2006 -0800 @@ -81,8 +81,54 @@ uu_cr }; +static char* state_names[] = {"h_init", + "h_host", + "t_init", + "t_tag1", + "t_tag2", + "t_com1", + "t_com2", + "t_com3", + "t_com4", + "t_com5", + "t_disc", + "u_init", + "u_http", + "u_sla", + "u_url", + "d_init", + "d_pcnt", + "d_1", + "e_init", + "e_amp", + "e_num", + "m_init", + "m_eq", + "m_1", + "b_init", + "b_lf", + "b_lf2", + "b_64", + "uu_init", + "uu_lf", + "uu_lf2", + "uu_64", + "end_state", + "h_end", + "t_bin", + "t_end", + "u_reco", + "d_2", + "e_semi", + "m_2", + "m_cr", + "m_nl", + "b_cr", + "uu_cr"}; + #define PENDING_LIMIT 100 class fsa { + char *myname; u_char pending[PENDING_LIMIT]; int count; state st; @@ -92,7 +138,7 @@ recorder *memory; public: - fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); + fsa(char *myname_, state init, fsa *next1_, fsa *next2_, recorder *memory_); void push(u_char *buf, int len); void pusher(); void validhost(); @@ -1181,7 +1227,8 @@ //////////////////////////////////////////////// // // -fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) { +fsa::fsa(char *myname_, state init_, fsa *next1_, fsa *next2_, recorder *memory_) { + myname = myname_; count = 0; st = init_; init = init_; @@ -1193,7 +1240,7 @@ void fsa::error(char *err) { count = 0; st = init; - if (err) my_syslog(memory->get_priv(), err); + if (err) memory->syslog(err); } void fsa::pusher() { @@ -1214,9 +1261,10 @@ if (p1 && (p1 != (char*)pending) & !p3) { // have a period, so at least two components, and no empty components in_addr ip; - if (inet_aton((const char*)pending, &ip)) + if (inet_aton((const char*)pending, &ip)) { // have an ip address if at least two periods if (p1 != p2) memory->new_url((char*)pending); + } else { for (int i=0; i<count; i++) pending[i] = tolower(pending[i]); // is last component a tld? @@ -1228,11 +1276,28 @@ } void fsa::push(u_char *buf, int len) { + if (debug_syslog > 10) { + char msg[200], mbuf[200]; + int n = sizeof(mbuf) - 1; + if (len < n) n = len; + memcpy(mbuf, buf, n); + mbuf[n] = '\0'; + snprintf(msg, sizeof(msg), "%s sees %s", myname, mbuf); + msg[sizeof(msg)-1] = '\0'; + memory->syslog(msg); + } for (int i=0; i<len; i++) { if (count == (PENDING_LIMIT-1)) error(NULL); if (st >= end_state) error("finite state machine impossible state"); u_char c = buf[i]; pending[count++] = c; + if (debug_syslog > 10) { + char *old1 = state_names[st]; + char *new1 = state_names[parse_table[c][st]]; + char msg[200]; + snprintf(msg, sizeof(msg), "%s at (%d,%c) switches from %s to %s", myname, i, c, old1, new1); + memory->syslog(msg); + } st = parse_table[c][st]; switch (st) { @@ -1463,14 +1528,14 @@ // // url_scanner::url_scanner(recorder *memory) { - host_parser = new fsa(h_init, NULL, NULL, memory); - tags_parser = new fsa(t_init, host_parser, NULL, memory); - urls_parser = new fsa(u_init, NULL, NULL, memory); - urld_parser = new fsa(d_init, urls_parser, tags_parser, memory); - html_parser = new fsa(e_init, urld_parser, NULL, memory); - mime_parser = new fsa(m_init, html_parser, NULL, memory); - b64_parser = new fsa(b_init, mime_parser, NULL, memory); - uu_parser = new fsa(uu_init, b64_parser, NULL, memory); + host_parser = new fsa("host_parser", h_init, NULL, NULL, memory); + tags_parser = new fsa("tags_parser", t_init, host_parser, NULL, memory); + urls_parser = new fsa("urls_parser", u_init, NULL, NULL, memory); + urld_parser = new fsa("urld_parser", d_init, urls_parser, tags_parser, memory); + html_parser = new fsa("html_parser", e_init, urld_parser, NULL, memory); + mime_parser = new fsa("mime_parser", m_init, html_parser, NULL, memory); + b64_parser = new fsa("b64_parser ", b_init, mime_parser, NULL, memory); + uu_parser = new fsa("uu_parser ", uu_init, b64_parser, NULL, memory); } url_scanner::~url_scanner() {