changeset 92:505e77188317

optimize verification step, cleanup documentation
author carl
date Wed, 21 Sep 2005 08:00:08 -0700
parents ca46fafc6621
children 591edabaf196
files ChangeLog dnsbl.conf dnsbl.spec.in install.bash package.bash sendmail.st src/context.cpp src/context.h src/dnsbl.cpp src/includes.h tld.conf xml/dnsbl.in xml/sample.conf
diffstat 13 files changed, 550 insertions(+), 486 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sun Sep 18 10:46:31 2005 -0700
+++ b/ChangeLog	Wed Sep 21 08:00:08 2005 -0700
@@ -1,5 +1,10 @@
     $Id$
 
+5.5 2005-09-21
+    Cleanup debug logging.  Verify from/to pairs now remembers the last
+    from value sent to the remote server to prevent unnecessary rset
+    commands.
+
 5.4 2005-09-18
     Add verify statement to specify the smtp host to be used to verify
     envelope from / recipient pairs.
--- a/dnsbl.conf	Sun Sep 18 10:46:31 2005 -0700
+++ b/dnsbl.conf	Wed Sep 21 08:00:08 2005 -0700
@@ -2,7 +2,8 @@
     dnsbl   local   blackholes.five-ten-sg.com  "Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s";
     dnsbl   sbl     sbl-xbl.spamhaus.org        "Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s";
     dnsbl   xbl     xbl.spamhaus.org            "Mail from %s rejected - xbl; see http://www.spamhaus.org/query/bl?ip=%s";
-    dnsbl_list  local sbl;
+    dnsbl   dul     dul.dnsbl.sorbs.net         "Mail from %s rejected - dul; see http://www.sorbs.net/lookup.shtml?%s";
+    dnsbl_list  local sbl dul;
 
     content on {
         filter    sbl-xbl.spamhaus.org        "Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s";
@@ -38,7 +39,7 @@
     };
 
     context minimal {
-        dnsbl_list sbl;
+        dnsbl_list sbl dul;
         content on {};
         env_to {
         };
--- a/dnsbl.spec.in	Sun Sep 18 10:46:31 2005 -0700
+++ b/dnsbl.spec.in	Wed Sep 21 08:00:08 2005 -0700
@@ -1,6 +1,6 @@
 Summary: DNSBL Sendmail Milter
 Name: dnsbl
-Version: 5.4
+Version: 5.5
 Release: 2
 Copyright: GPL
 Group: System Environment/Daemons
--- a/install.bash	Sun Sep 18 10:46:31 2005 -0700
+++ b/install.bash	Wed Sep 21 08:00:08 2005 -0700
@@ -62,6 +62,7 @@
 if [ ! -f $DST/html-tags.conf ]; then
     cp html-tags.conf $DST
 fi
+rm -f $DST/tld.conf     # new tld list
 if [ ! -f $DST/tld.conf ]; then
     cp tld.conf $DST
 fi
--- a/package.bash	Sun Sep 18 10:46:31 2005 -0700
+++ b/package.bash	Wed Sep 21 08:00:08 2005 -0700
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-VER=dnsbl-5.4
+VER=dnsbl-5.5
 mkdir $VER
     target1=/home/httpd/html/510sg/util/dnsbl.tar.gz
     target2=/home/httpd/html/510sg/dnsbl.conf
Binary file sendmail.st has changed
--- a/src/context.cpp	Sun Sep 18 10:46:31 2005 -0700
+++ b/src/context.cpp	Wed Sep 21 08:00:08 2005 -0700
@@ -65,8 +65,10 @@
 
 
 int SMTP::writer() {
-	// log("writer() sees buffer with %s", buffer);
-	// log("writer() sees error %d", (int)error);
+	#ifdef VERIFY_DEBUG
+		log("writer() sees buffer with %s", buffer);
+		log("writer() sees error %d", (int)error);
+	#endif
 	int rs = 0;
 	if (!error) {
 		int len = strlen(buffer);
@@ -90,7 +92,9 @@
 int SMTP::reader() {
 	// read some bytes terminated by lf or end of buffer.
 	// we may have a multi line response or part thereof in the buffer.
-	// log("reader() sees error %d", (int)error);
+	#ifdef VERIFY_DEBUG
+		log("reader() sees error %d", (int)error);
+	#endif
 	if (error) return 0;
 	int len = maxlen-1; // room for null terminator
 	while (pending < len) {
@@ -107,7 +111,9 @@
 		}
 	}
 	buffer[pending] = '\0';
-	// log("reader() sees buffer with %s", buffer);
+	#ifdef VERIFY_DEBUG
+		log("reader() sees buffer with %s", buffer);
+	#endif
 	return pending;
 }
 
@@ -134,8 +140,10 @@
 	buffer[pending] = '\0';
 	while (true) {
 		int r = read_line();
-		// log("read_response() sees line with %s", buffer);
-		// log("read_response() sees line length %d", r);
+		#ifdef VERIFY_DEBUG
+			log("read_response() sees line with %s", buffer);
+			log("read_response() sees line length %d", r);
+		#endif
 		if (r == 0) return 0;	// failed to read any bytes
 		if ((r > 4) && (buffer[3] == '-')) {
 			flush_line(r);
@@ -168,17 +176,24 @@
 
 
 int SMTP::rset() {
-	return cmd("RSET");
+	int rc = cmd("RSET");
+	efrom[0] = '\0';
+	return rc;
 }
 
 
 int SMTP::from(char *f) {
+	if (strncmp(efrom, f, maxlen)) {
+		rset();
+		strncpy(efrom, f, maxlen);
 	init();
 	append("MAIL FROM:<");
 	append(f);
 	append(">");
 	return cmd(NULL);
 }
+	return 250; // pretend it worked
+}
 
 
 int SMTP::rcpt(char *t) {
@@ -191,27 +206,32 @@
 
 
 int SMTP::quit() {
-	int rc = cmd("QUIT");
+	return cmd("QUIT");
+}
+
+
+void SMTP::closefd() {
 	shutdown(fd, SHUT_RDWR);
 	close(fd);
-	return rc;
 }
 
 
-//	void SMTP::log(char *m, int v) {
-//		char buf[maxlen];
-//		snprintf(buf, maxlen, m, v);
-//		my_syslog(buf);
-//	}
-//
-//
-//	void SMTP::log(char *m, char *v) {
-//		char buf[maxlen];
-//		snprintf(buf, maxlen, m, v);
-//		my_syslog(buf);
-//	}
-//
-//
+#ifdef VERIFY_DEBUG
+	void SMTP::log(char *m, int v) {
+		char buf[maxlen];
+		snprintf(buf, maxlen, m, v);
+		my_syslog(buf);
+	}
+
+
+	void SMTP::log(char *m, char *v) {
+		char buf[maxlen];
+		snprintf(buf, maxlen, m, v);
+		my_syslog(buf);
+	}
+#endif
+
+
 VERIFY::VERIFY(char *h) {
 	host	 = h;
 	last_err = 0;
@@ -222,47 +242,49 @@
 void VERIFY::closer() {
 	bool ok = true;
 	while (ok) {
-		int fd = 0;
+		SMTP *conn = NULL;
 		pthread_mutex_lock(&mutex);
-			if (sockets.empty()) {
+			if (connections.empty()) {
 				ok = false;
 			}
 			else {
-				time_t t = times.front();
+				conn = connections.front();
 				time_t now = time(NULL);
-				if ((now - t) > maxage) {
-					// this socket is ancient, remove it
-					fd = sockets.front();
-					times.pop_front();
-					sockets.pop_front();
+				if ((now - conn->get_stamp()) > maxage) {
+					// this connection is ancient, remove it
+					connections.pop_front();
 				}
 				else {
 					ok = false;
+					conn = NULL;
 				}
 			}
 		pthread_mutex_unlock(&mutex);
-		if (fd) {
-			SMTP s(fd);
-			s.quit();	// closes the fd
-			// s.log("closer() closes ancient %d", fd);
+		// avoid doing this work inside the mutex lock
+		if (conn) {
+			#ifdef VERIFY_DEBUG
+				conn->log("closer() closes ancient %d", conn->get_fd());
+			#endif
+			delete conn;
 		}
 	}
 }
 
 
-int  VERIFY::get_socket() {
-	int sock = NULL_SOCKET;
+SMTP* VERIFY::get_connection() {
+	SMTP *conn = NULL;
 	pthread_mutex_lock(&mutex);
-		if (!sockets.empty()) {
-			sock = sockets.front();
-			times.pop_front();
-			sockets.pop_front();
-			// SMTP::log("get_socket() %d from cache", sock);
+		if (!connections.empty()) {
+			conn = connections.front();
+			connections.pop_front();
+			#ifdef VERIFY_DEBUG
+				conn->log("get_connection() %d from cache", conn->get_fd());
+			#endif
 		}
 	pthread_mutex_unlock(&mutex);
-
-	if (sock == NULL_SOCKET) {
+	if (conn) return conn;
 		time_t now = time(NULL);
+	int sock = NULL_SOCKET;
 		if ((now - last_err) > ERROR_SOCKET_TIME) {
 			// nothing recent, maybe this time it will work
 			hostent *h = gethostbyname(host);
@@ -286,30 +308,32 @@
 			else last_err = now;
 		}
 		if (sock != NULL_SOCKET) {
-			SMTP s(sock);
-			if (s.helo() != 250) {
-				put_socket(sock, true);
-				sock = NULL_SOCKET;
+		conn = new SMTP(sock);
+		#ifdef VERIFY_DEBUG
+			conn->log("get_connection() %d new socket", conn->get_fd());
+		#endif
+		if (conn->helo() == 250) return conn;
+		delete conn;
 			}
-		}
-	}
-	return sock;
+	return NULL;
 }
 
 
-void VERIFY::put_socket(int fd, bool err) {
-	if (err) {
-		// SMTP::log("put_socket() %d with error, close it", fd);
-		shutdown(fd, SHUT_RDWR);
-		close(fd);
+void VERIFY::put_connection(SMTP *conn) {
+	if (conn->err()) {
+		#ifdef VERIFY_DEBUG
+			conn->log("put_socket() %d with error, close it", conn->get_fd());
+		#endif
+		delete conn;
 		last_err = time(NULL);
 	}
 	else {
-		// SMTP::log("put_socket() %d", fd);
+		#ifdef VERIFY_DEBUG
+			conn->log("put_socket() %d", conn->get_fd());
+		#endif
+		conn->now();
 		pthread_mutex_lock(&mutex);
-			time_t now = time(NULL);
-			times.push_back(now);
-			sockets.push_back(fd);
+			connections.push_back(conn);
 		pthread_mutex_unlock(&mutex);
 	}
 }
@@ -317,20 +341,23 @@
 
 bool VERIFY::ok(char *from, char *to) {
 	if (host == token_myhostname) return true;
-	int fd = get_socket();
-	if (fd == NULL_SOCKET) return true; // cannot verify right now, we have socket errors
-	SMTP s(fd);
-	s.rset();
+	SMTP *conn = get_connection();
+	if (!conn) return true;    // cannot verify right now, we have socket errors
 	int rc;
-	rc = s.from(from);
-	// s.log("verify::ok from sees %d", rc);
+	rc = conn->from(from);
+	#ifdef VERIFY_DEBUG
+		conn->log("verify::ok() from sees %d", rc);
+	#endif
 	if (rc != 250) {
-		put_socket(fd, s.err());
+		conn->rset();
+		put_connection(conn);
 		return (rc >= 500) ? false : true;
 	}
-	rc = s.rcpt(to);
-	// s.log("verify::ok rcpt sees %d", rc);
-	put_socket(fd, s.err());
+	rc = conn->rcpt(to);
+	#ifdef VERIFY_DEBUG
+		conn->log("verify::ok() rcpt sees %d", rc);
+	#endif
+	put_connection(conn);
 	return (rc >= 500) ? false : true;
 }
 
--- a/src/context.h	Sun Sep 18 10:46:31 2005 -0700
+++ b/src/context.h	Wed Sep 21 08:00:08 2005 -0700
@@ -13,12 +13,12 @@
 class DNSBL;
 class CONTEXT;
 class VERIFY;
+class SMTP;
 class recorder;
 
 typedef map<char *, char *, ltstr>		  string_map;
 typedef set<int>						  int_set;
-typedef list<int>						  fd_list;
-typedef list<time_t>					  time_list;
+typedef list<SMTP *>					  smtp_list;
 typedef list<char *>					  string_list;
 typedef DNSBL * 						  DNSBLP;
 typedef VERIFY *						  VERIFYP;
@@ -34,13 +34,19 @@
 	static const int maxlen = 1000;
 	int  fd;
 	bool error;
+	time_t	stamp;
+	char	efrom[maxlen];	// last envelope from sent on this socket
 	int  pending;		// unread bytes in buffer, not including the null terminator
 	char buffer[maxlen];
 public:
-	SMTP(int f) 			{fd = f; error = false;};
+	SMTP(int f) 			{fd = f; error = false; efrom[0] = '\0';};
+	~SMTP() 				{if (!error) quit(); closefd();};
 	void init() 			{pending = 0; buffer[0] = '\0';};
 	void append(char *c)	{strncat(buffer, c, max(0, maxlen-1-(int)strlen(c)));};
 	bool err()				{return error;};
+	void	now()			{stamp = time(NULL);};
+	time_t	get_stamp() 	{return stamp;};
+	int 	get_fd()		{return fd;};
 	int  writer();
 	int  reader();
 	int  read_line();
@@ -52,21 +58,23 @@
 	int  from(char *f);
 	int  rcpt(char *t);
 	int  quit();
-	// static void log(char *m, int v);
-	// static void log(char *m, char *v);
+	void	closefd();
+#ifdef VERIFY_DEBUG
+	static void log(char *m, int v);
+	static void log(char *m, char *v);
+#endif
 };
 
 class VERIFY {
 	char			*host;		// host to be used to verify recipient addresses
 	time_t			last_err;	// time of last socket error
 	pthread_mutex_t mutex;		// protect the lists of sockets and timestamps
-	fd_list 		sockets;	// open sockets, ready to be used
-	time_list		times;		// last timestamp when this socket was used
+	smtp_list		connections;// open sockets, ready to be used
 public:
 	VERIFY(char *h);
 	void closer();			// if the oldest socket is ancient, close it
-	int  get_socket();
-	void put_socket(int fd, bool err);
+	SMTP	*get_connection();
+	void	put_connection(SMTP *conn);
 	bool ok(char *from, char *to);
 };
 
--- a/src/dnsbl.cpp	Sun Sep 18 10:46:31 2005 -0700
+++ b/src/dnsbl.cpp	Wed Sep 21 08:00:08 2005 -0700
@@ -392,8 +392,6 @@
 				syslog_opened = true;
 			}
 			syslog(LOG_NOTICE, "%s", text);
-			// closelog();
-			// syslog_opened = false;
 		pthread_mutex_unlock(&syslog_mutex);
 	}
 	else {
@@ -434,7 +432,9 @@
 			}
 			else {
 				// peer closed the socket
-			  //my_syslog("!!child worker process, peer closed socket while reading question");
+				#ifdef RESOLVER_DEBUG
+					my_syslog("process_resolver_requests() peer closed socket while reading question");
+				#endif
 				shutdown(socket, SHUT_RDWR);
 				close(socket);
 				return;
@@ -444,9 +444,11 @@
 
 		// find the answer
 #ifdef NS_PACKETSZ
-	  //char text[1000];
-	  //snprintf(text, sizeof(text), "!!child worker process has a question %s", question);
-	  //my_syslog(text);
+		#ifdef RESOLVER_DEBUG
+			char text[1000];
+			snprintf(text, sizeof(text), "process_resolver_requests() has a question %s", question);
+			my_syslog(text);
+		#endif
 		glom.length = res_search(question, ns_c_in, ns_t_a, glom.answer, sizeof(glom.answer));
 		if (glom.length < 0) glom.length = 0;	// represent all errors as zero length answers
 #else
@@ -461,8 +463,10 @@
 		// write the answer
 		char *buf = (char *)&glom;
 		int   len = glom.length + sizeof(glom.length);
-	  //snprintf(text, sizeof(text), "!!child worker process writing answer length %d for total %d", glom.length, len);
-	  //my_syslog(text);
+		#ifdef RESOLVER_DEBUG
+			snprintf(text, sizeof(text), "process_resolver_requests() writing answer length %d for total %d", glom.length, len);
+			my_syslog(text);
+		#endif
 		int    ws = 0;
 		while (len > ws) {
 			int ns = write(socket, buf+ws, len-ws);
@@ -471,7 +475,9 @@
 			}
 			else {
 				// peer closed the socket!
-			  //my_syslog("!!child worker process, peer closed socket while writing answer");
+				#ifdef RESOLVER_DEBUG
+					my_syslog("process_resolver_requests() peer closed socket while writing answer");
+				#endif
 				shutdown(socket, SHUT_RDWR);
 				close(socket);
 				return;
@@ -498,9 +504,11 @@
 	char *buf = (char *)&glom;
 	priv.my_read(buf, sizeof(glom.length));
 	buf += sizeof(glom.length);
- ///char text[1000];
- ///snprintf(text, sizeof(text), "!!milter thread wrote question %s and has answer length %d", question, glom.length);
- ///my_syslog(text);
+	#ifdef RESOLVER_DEBUG
+		char text[1000];
+		snprintf(text, sizeof(text), "dns_interface() wrote question %s and has answer length %d", question, glom.length);
+		my_syslog(text);
+	#endif
 	if ((glom.length < 0) || (glom.length > sizeof(glom.answer))) {
 		priv.err = true;
 		return 0;  // cannot process overlarge answers
--- a/src/includes.h	Sun Sep 18 10:46:31 2005 -0700
+++ b/src/includes.h	Wed Sep 21 08:00:08 2005 -0700
@@ -1,3 +1,8 @@
+#define VERIFY_DEBUG   1
+#define RESOLVER_DEBUG 1
+#undef	VERIFY_DEBUG
+#undef	RESOLVER_DEBUG
+
 #include "tokenizer.h"
 #include "context.h"
 #include "dnsbl.h"
--- a/tld.conf	Sun Sep 18 10:46:31 2005 -0700
+++ b/tld.conf	Wed Sep 21 08:00:08 2005 -0700
@@ -1,5 +1,5 @@
 #
-# icann tlds from http://www.icann.org/registries/listing.html
+# icann tlds from http://www.icann.org/registries/listing.html as of 2005-09-18
 #
 aero
 biz
@@ -9,6 +9,7 @@
 gov
 info
 int
+jobs
 mil
 museum
 name
@@ -16,7 +17,7 @@
 org
 pro
 #
-# icann tlds from http://www.iana.org/cctld/cctld-whois.htm   2004-06-04
+# icann tlds from http://www.iana.org/cctld/cctld-whois.htm as of 2004-06-04
 #
 ac   # Ascension Island
 ad   # Andorra
--- a/xml/dnsbl.in	Sun Sep 18 10:46:31 2005 -0700
+++ b/xml/dnsbl.in	Wed Sep 21 08:00:08 2005 -0700
@@ -2,7 +2,7 @@
 
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
-<title>DNSBL Sendmail milter - Version 5.4</title>
+<title>DNSBL Sendmail milter - Version 5.5</title>
 </head>
 
 <center>Introduction</center>
@@ -19,6 +19,13 @@
 per-recipient basis, so that fred@example.com could use SPEWS and the
 SBL, where all other users @example.com use only the SBL.
 
+<p>This milter can also verify the envelope from/recipient pairs with
+the primary MX server.  This allows the backup mail servers to properly
+reject mail sent to invalid addresses.  Otherwise, the backup mail
+servers will accept that mail, and then generate a bounce message when
+the message is forwarded to the primary server (and rejected there with
+no such user).
+
 <p>This milter will also decode (uuencode, base64, mime, html entity,
 url encodings) and scan for HTTP and HTTPS URLs and bare hostnames in
 the body of the mail.  If any of those host names have A or NS records
@@ -112,14 +119,6 @@
 point to a child context.  If such an entry is found, we switch to that
 child filtering context.
 
-<br><br><li>If the filtering context specifies a verification host, and
-the envelope to email address is covered by this filtering context, and
-the verification host is not our own hostname, we open an smtp
-conversation with that verification host.  The current envelope from and
-recipient to values are passed to that verification host.  If we receive
-anything other than a 250 response those commands, we reject the current
-recipient with "no such user".
-
 <br><br><li>We lookup [1) the full envelope from email address, 2) the
 domain name part of the envelope from address, 3) the user@ part of the
 envelope from address] in the filtering context env_from statement.
@@ -143,9 +142,18 @@
 dns suffix).
 
 <br><br><li>If the mail has not been accepted or rejected yet, and the
+filtering context specifies a verification host, and the envelope to
+email address is covered by this filtering context, and the verification
+host is not our own hostname, we open an smtp conversation with that
+verification host.  The current envelope from and recipient to values
+are passed to that verification host.  If we receive a 5xy response
+those commands, we reject the current recipient with "no such user".
+
+<br><br><li>If the mail has not been accepted or rejected yet, and the
 filtering context enables content filtering, and this is the first such
-recipient in this smtp transaction, we set the content filtering parameters
-from this context, and enable content filtering for this body.
+recipient in this smtp transaction, we set the content filtering
+parameters from this context, and enable content filtering for the body
+of this message.
 
 </ol>
 
--- a/xml/sample.conf	Sun Sep 18 10:46:31 2005 -0700
+++ b/xml/sample.conf	Wed Sep 21 08:00:08 2005 -0700
@@ -40,10 +40,10 @@
 
 context sample {
     dnsbl   local   blackholes.five-ten-sg.com  "Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s";
-    dnsbl   spews   blackholes.spews.org        "Mail from %s rejected - spews; see http://www.spews.org/ask.cgi?x=%s";
     dnsbl   sbl     sbl-xbl.spamhaus.org        "Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s";
     dnsbl   xbl     xbl.spamhaus.org            "Mail from %s rejected - xbl; see http://www.spamhaus.org/query/bl?ip=%s";
-    dnsbl_list  local sbl;
+    dnsbl   dul     dul.dnsbl.sorbs.net         "Mail from %s rejected - dul; see http://www.sorbs.net/lookup.shtml?%s";
+    dnsbl_list  local sbl dul;
 
     content on {
         filter    sbl-xbl.spamhaus.org        "Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s";
@@ -86,7 +86,7 @@
     };
 
     context minimal {
-        dnsbl_list sbl;
+        dnsbl_list sbl dul;
         content on {};
         env_to {
             sales@mydomain.com;
@@ -114,7 +114,7 @@
     };
 
     context customer1 {
-        dnsbl_list sbl;
+        dnsbl_list sbl dul;
         env_to {
             customer1.com;
             customer1a.com;
@@ -139,7 +139,7 @@
     };
 
     context customer2 {
-        dnsbl_list sbl spews;
+        dnsbl_list sbl;
         env_to {
             customer2.com;
             customer2a.com;