changeset 147:812c80305f26 stable-5-26

fix 5.23 bug and add fsa debug logging
author carl
date Mon, 04 Dec 2006 21:49:09 -0800
parents 7278c9766e26
children 9330b8d6a56b
files ChangeLog NEWS configure.in src/scanner.cpp src/scanner.h
diffstat 5 files changed, 86 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sun Oct 15 17:21:07 2006 -0700
+++ b/ChangeLog	Mon Dec 04 21:49:09 2006 -0800
@@ -1,5 +1,9 @@
     $Id$
 
+5.26 2006-12-04
+    Fix bug in content filtering introduced in 5.23 that prevented
+    seeing any host names in the body.
+
 5.25 2006-10-15
     DNSBL lists are inherited, so dump the effective list with the
     -c switch.
--- a/NEWS	Sun Oct 15 17:21:07 2006 -0700
+++ b/NEWS	Mon Dec 04 21:49:09 2006 -0800
@@ -1,5 +1,6 @@
     $Id$
 
+5.26 2006-12-04 Fix bug at 5.23 that prevented seeing host names in the mail bodies
 5.25 2006-10-15 Dump the effective dnsbl list with the -c switch
 5.24 2006-10-15 Allow child and parent context to specify the same fully qualified env_to address
 5.23 2006-10-10 Require two periods in ip addresses
--- a/configure.in	Sun Oct 15 17:21:07 2006 -0700
+++ b/configure.in	Mon Dec 04 21:49:09 2006 -0800
@@ -1,7 +1,7 @@
 AC_INIT(configure.in)
 
 AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE(dnsbl,5.25)
+AM_INIT_AUTOMAKE(dnsbl,5.26)
 AC_PATH_PROGS(BASH, bash)
 
 AC_LANG_CPLUSPLUS
--- a/src/scanner.cpp	Sun Oct 15 17:21:07 2006 -0700
+++ b/src/scanner.cpp	Mon Dec 04 21:49:09 2006 -0800
@@ -81,8 +81,54 @@
 			uu_cr
 		   };
 
+static char* state_names[] = {"h_init",
+							  "h_host",
+							  "t_init",
+							  "t_tag1",
+							  "t_tag2",
+							  "t_com1",
+							  "t_com2",
+							  "t_com3",
+							  "t_com4",
+							  "t_com5",
+							  "t_disc",
+							  "u_init",
+							  "u_http",
+							  "u_sla",
+							  "u_url",
+							  "d_init",
+							  "d_pcnt",
+							  "d_1",
+							  "e_init",
+							  "e_amp",
+							  "e_num",
+							  "m_init",
+							  "m_eq",
+							  "m_1",
+							  "b_init",
+							  "b_lf",
+							  "b_lf2",
+							  "b_64",
+							  "uu_init",
+							  "uu_lf",
+							  "uu_lf2",
+							  "uu_64",
+							  "end_state",
+							  "h_end",
+							  "t_bin",
+							  "t_end",
+							  "u_reco",
+							  "d_2",
+							  "e_semi",
+							  "m_2",
+							  "m_cr",
+							  "m_nl",
+							  "b_cr",
+							  "uu_cr"};
+
 #define PENDING_LIMIT 100
 class fsa {
+	char		*myname;
 	u_char		pending[PENDING_LIMIT];
 	int 		count;
 	state		st;
@@ -92,7 +138,7 @@
 	recorder	*memory;
 
 public:
-	fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_);
+	fsa(char *myname_, state init, fsa *next1_, fsa *next2_, recorder *memory_);
 	void push(u_char *buf, int len);
 	void pusher();
 	void validhost();
@@ -1181,7 +1227,8 @@
 ////////////////////////////////////////////////
 //
 //
-fsa::fsa(state init_, fsa *next1_, fsa *next2_, recorder *memory_) {
+fsa::fsa(char *myname_, state init_, fsa *next1_, fsa *next2_, recorder *memory_) {
+	myname = myname_;
 	count  = 0;
 	st	   = init_;
 	init   = init_;
@@ -1193,7 +1240,7 @@
 void fsa::error(char *err) {
 	count = 0;
 	st	  = init;
-	if (err) my_syslog(memory->get_priv(), err);
+	if (err) memory->syslog(err);
 }
 
 void fsa::pusher() {
@@ -1214,9 +1261,10 @@
 		if (p1 && (p1 != (char*)pending) & !p3) {
 			// have a period, so at least two components, and no empty components
 			in_addr ip;
-			if (inet_aton((const char*)pending, &ip))
+			if (inet_aton((const char*)pending, &ip)) {
 				// have an ip address if at least two periods
 				if (p1 != p2) memory->new_url((char*)pending);
+			}
 			else {
 				for (int i=0; i<count; i++) pending[i] = tolower(pending[i]);
 				// is last component a tld?
@@ -1228,11 +1276,28 @@
 }
 
 void fsa::push(u_char *buf, int len) {
+	if (debug_syslog > 10) {
+		char msg[200], mbuf[200];
+		int n = sizeof(mbuf) - 1;
+		if (len < n) n = len;
+		memcpy(mbuf, buf, n);
+		mbuf[n] = '\0';
+		snprintf(msg, sizeof(msg), "%s sees %s", myname, mbuf);
+		msg[sizeof(msg)-1] = '\0';
+		memory->syslog(msg);
+	}
 	for (int i=0; i<len; i++) {
 		if (count == (PENDING_LIMIT-1)) error(NULL);
 		if (st >= end_state)			error("finite state machine impossible state");
 		u_char c = buf[i];
 		pending[count++] = c;
+		if (debug_syslog > 10) {
+			char *old1 = state_names[st];
+			char *new1 = state_names[parse_table[c][st]];
+			char msg[200];
+			snprintf(msg, sizeof(msg), "%s at (%d,%c) switches from %s to %s", myname, i, c, old1, new1);
+			memory->syslog(msg);
+		}
 		st = parse_table[c][st];
 		switch (st) {
 
@@ -1463,14 +1528,14 @@
 //
 //
 url_scanner::url_scanner(recorder *memory) {
-	host_parser = new fsa(h_init,  NULL,		NULL,		 memory);
-	tags_parser = new fsa(t_init,  host_parser, NULL,		 memory);
-	urls_parser = new fsa(u_init,  NULL,		NULL,		 memory);
-	urld_parser = new fsa(d_init,  urls_parser, tags_parser, memory);
-	html_parser = new fsa(e_init,  urld_parser, NULL,		 memory);
-	mime_parser = new fsa(m_init,  html_parser, NULL,		 memory);
-	b64_parser	= new fsa(b_init,  mime_parser, NULL,		 memory);
-	uu_parser	= new fsa(uu_init, b64_parser,	NULL,		 memory);
+	host_parser = new fsa("host_parser", h_init,  NULL,        NULL,        memory);
+	tags_parser = new fsa("tags_parser", t_init,  host_parser, NULL,        memory);
+	urls_parser = new fsa("urls_parser", u_init,  NULL,        NULL,        memory);
+	urld_parser = new fsa("urld_parser", d_init,  urls_parser, tags_parser, memory);
+	html_parser = new fsa("html_parser", e_init,  urld_parser, NULL,        memory);
+	mime_parser = new fsa("mime_parser", m_init,  html_parser, NULL,        memory);
+	b64_parser	= new fsa("b64_parser ", b_init,  mime_parser, NULL,        memory);
+	uu_parser	= new fsa("uu_parser  ", uu_init, b64_parser,  NULL,        memory);
 }
 
 url_scanner::~url_scanner() {
--- a/src/scanner.h	Sun Oct 15 17:21:07 2006 -0700
+++ b/src/scanner.h	Mon Dec 04 21:49:09 2006 -0800
@@ -31,6 +31,7 @@
 	void new_url(char *host);
 	void new_tag(char *tag);
 	void binary();
+	void syslog(char *buf)						{ my_syslog(priv, buf); 															};
 	mlfiPriv   *get_priv()						{ return priv;																		};
 	string_set *get_cctlds()					{ return cctlds;																	};
 	string_set *get_tlds()						{ return tlds;																		};