# HG changeset patch # User carl # Date 1165297749 28800 # Node ID 812c80305f267d6cf98e1583635ee1f2a94c9ded # Parent 7278c9766e269627f8d419f135386f03af9d9123 fix 5.23 bug and add fsa debug logging diff -r 7278c9766e26 -r 812c80305f26 ChangeLog --- a/ChangeLog Sun Oct 15 17:21:07 2006 -0700 +++ b/ChangeLog Mon Dec 04 21:49:09 2006 -0800 @@ -1,5 +1,9 @@ $Id$ +5.26 2006-12-04 + Fix bug in content filtering introduced in 5.23 that prevented + seeing any host names in the body. + 5.25 2006-10-15 DNSBL lists are inherited, so dump the effective list with the -c switch. diff -r 7278c9766e26 -r 812c80305f26 NEWS --- a/NEWS Sun Oct 15 17:21:07 2006 -0700 +++ b/NEWS Mon Dec 04 21:49:09 2006 -0800 @@ -1,5 +1,6 @@ $Id$ +5.26 2006-12-04 Fix bug at 5.23 that prevented seeing host names in the mail bodies 5.25 2006-10-15 Dump the effective dnsbl list with the -c switch 5.24 2006-10-15 Allow child and parent context to specify the same fully qualified env_to address 5.23 2006-10-10 Require two periods in ip addresses diff -r 7278c9766e26 -r 812c80305f26 configure.in --- a/configure.in Sun Oct 15 17:21:07 2006 -0700 +++ b/configure.in Mon Dec 04 21:49:09 2006 -0800 @@ -1,7 +1,7 @@ AC_INIT(configure.in) AM_CONFIG_HEADER(config.h) -AM_INIT_AUTOMAKE(dnsbl,5.25) +AM_INIT_AUTOMAKE(dnsbl,5.26) AC_PATH_PROGS(BASH, bash) AC_LANG_CPLUSPLUS diff -r 7278c9766e26 -r 812c80305f26 src/scanner.cpp --- a/src/scanner.cpp Sun Oct 15 17:21:07 2006 -0700 +++ b/src/scanner.cpp Mon Dec 04 21:49:09 2006 -0800 @@ -81,8 +81,54 @@ uu_cr }; +static char* state_names[] = {"h_init", + "h_host", + "t_init", + "t_tag1", + "t_tag2", + "t_com1", + "t_com2", + "t_com3", + "t_com4", + "t_com5", + "t_disc", + "u_init", + "u_http", + "u_sla", + "u_url", + "d_init", + "d_pcnt", + "d_1", + "e_init", + "e_amp", + "e_num", + "m_init", + "m_eq", + "m_1", + "b_init", + "b_lf", + "b_lf2", + "b_64", + "uu_init", + "uu_lf", + "uu_lf2", + "uu_64", + "end_state", + "h_end", + "t_bin", + "t_end", + "u_reco", + "d_2", + "e_semi", + "m_2", + "m_cr", + "m_nl", + "b_cr", + "uu_cr"}; + #define PENDING_LIMIT 100 class fsa { + char *myname; u_char pending[PENDING_LIMIT]; int count; state st; @@ -92,7 +138,7 @@ recorder *memory; public: - fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); + fsa(char *myname_, state init, fsa *next1_, fsa *next2_, recorder *memory_); void push(u_char *buf, int len); void pusher(); void validhost(); @@ -1181,7 +1227,8 @@ //////////////////////////////////////////////// // // -fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) { +fsa::fsa(char *myname_, state init_, fsa *next1_, fsa *next2_, recorder *memory_) { + myname = myname_; count = 0; st = init_; init = init_; @@ -1193,7 +1240,7 @@ void fsa::error(char *err) { count = 0; st = init; - if (err) my_syslog(memory->get_priv(), err); + if (err) memory->syslog(err); } void fsa::pusher() { @@ -1214,9 +1261,10 @@ if (p1 && (p1 != (char*)pending) & !p3) { // have a period, so at least two components, and no empty components in_addr ip; - if (inet_aton((const char*)pending, &ip)) + if (inet_aton((const char*)pending, &ip)) { // have an ip address if at least two periods if (p1 != p2) memory->new_url((char*)pending); + } else { for (int i=0; i 10) { + char msg[200], mbuf[200]; + int n = sizeof(mbuf) - 1; + if (len < n) n = len; + memcpy(mbuf, buf, n); + mbuf[n] = '\0'; + snprintf(msg, sizeof(msg), "%s sees %s", myname, mbuf); + msg[sizeof(msg)-1] = '\0'; + memory->syslog(msg); + } for (int i=0; i= end_state) error("finite state machine impossible state"); u_char c = buf[i]; pending[count++] = c; + if (debug_syslog > 10) { + char *old1 = state_names[st]; + char *new1 = state_names[parse_table[c][st]]; + char msg[200]; + snprintf(msg, sizeof(msg), "%s at (%d,%c) switches from %s to %s", myname, i, c, old1, new1); + memory->syslog(msg); + } st = parse_table[c][st]; switch (st) { @@ -1463,14 +1528,14 @@ // // url_scanner::url_scanner(recorder *memory) { - host_parser = new fsa(h_init, NULL, NULL, memory); - tags_parser = new fsa(t_init, host_parser, NULL, memory); - urls_parser = new fsa(u_init, NULL, NULL, memory); - urld_parser = new fsa(d_init, urls_parser, tags_parser, memory); - html_parser = new fsa(e_init, urld_parser, NULL, memory); - mime_parser = new fsa(m_init, html_parser, NULL, memory); - b64_parser = new fsa(b_init, mime_parser, NULL, memory); - uu_parser = new fsa(uu_init, b64_parser, NULL, memory); + host_parser = new fsa("host_parser", h_init, NULL, NULL, memory); + tags_parser = new fsa("tags_parser", t_init, host_parser, NULL, memory); + urls_parser = new fsa("urls_parser", u_init, NULL, NULL, memory); + urld_parser = new fsa("urld_parser", d_init, urls_parser, tags_parser, memory); + html_parser = new fsa("html_parser", e_init, urld_parser, NULL, memory); + mime_parser = new fsa("mime_parser", m_init, html_parser, NULL, memory); + b64_parser = new fsa("b64_parser ", b_init, mime_parser, NULL, memory); + uu_parser = new fsa("uu_parser ", uu_init, b64_parser, NULL, memory); } url_scanner::~url_scanner() { diff -r 7278c9766e26 -r 812c80305f26 src/scanner.h --- a/src/scanner.h Sun Oct 15 17:21:07 2006 -0700 +++ b/src/scanner.h Mon Dec 04 21:49:09 2006 -0800 @@ -31,6 +31,7 @@ void new_url(char *host); void new_tag(char *tag); void binary(); + void syslog(char *buf) { my_syslog(priv, buf); }; mlfiPriv *get_priv() { return priv; }; string_set *get_cctlds() { return cctlds; }; string_set *get_tlds() { return tlds; };