changeset 168:6bac960af6b4

add generic reverse dns filtering regex
author carl
date Thu, 30 Aug 2007 11:08:46 -0700
parents 9b129ed78d7d
children 44021d570958
files ChangeLog NEWS configure.in dnsbl.conf src/context.cpp src/context.h src/dnsbl.cpp src/dnsbl.h src/spamass.cpp.in src/spamass.h xml/dnsbl.in
diffstat 11 files changed, 130 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Mon Aug 27 20:49:19 2007 -0700
+++ b/ChangeLog	Thu Aug 30 11:08:46 2007 -0700
@@ -1,15 +1,18 @@
     $Id$
 
-6.06 2007-07-27
+6.07 2007-08-30
+    Add generic reverse dns filtering with regular expression.
+
+6.06 2007-08-27
     Fix bug that effectively disabled spamassassin filtering. Improve
     spamassassin filtering documentation. Generate warning if the
     config file specifies spam assassin filtering, but spamc is not
     found by autoconf.
 
-6.05 2007-07-26
+6.05 2007-08-26
     Fix unitialized variable in my spamassassin code.
 
-6.04 2007-07-26
+6.04 2007-08-26
     Add spamassassin integration via spamc, code from spamass-milter.
 
 6.03 2007-07-14
--- a/NEWS	Mon Aug 27 20:49:19 2007 -0700
+++ b/NEWS	Thu Aug 30 11:08:46 2007 -0700
@@ -1,8 +1,9 @@
     $Id$
 
-6.06 2007-07-27 Fix bug that effectively disabled spamassassin filtering.
-6.05 2007-07-26 Fix unitialized variable in my spamassassin code.
-6.04 2007-07-26 Add spamassassin integration via spamc, code from spamass-milter.
+6.07 2007-08-30 Add generic reverse dns filtering with regular expression.
+6.06 2007-08-27 Fix bug that effectively disabled spamassassin filtering.
+6.05 2007-08-26 Fix unitialized variable in my spamassassin code.
+6.04 2007-08-26 Add spamassassin integration via spamc, code from spamass-milter.
 6.03 2007-07-14 Don't add auto whitelist entries for our own domains.
 6.02 2007-07-10 Allow manual updates to the auto whitelisting files.
 6.01 2007-07-07 GPL3. Block mail to recipients that cannot reply. Start auto whitelisting.
--- a/configure.in	Mon Aug 27 20:49:19 2007 -0700
+++ b/configure.in	Thu Aug 30 11:08:46 2007 -0700
@@ -1,6 +1,6 @@
 
 AC_PREREQ(2.59)
-AC_INIT(dnsbl,6.06,carl@five-ten-sg.com)
+AC_INIT(dnsbl,6.07,carl@five-ten-sg.com)
 AC_CONFIG_SRCDIR([config.h.in])
 AC_CONFIG_HEADER([config.h])
 
--- a/dnsbl.conf	Mon Aug 27 20:49:19 2007 -0700
+++ b/dnsbl.conf	Thu Aug 30 11:08:46 2007 -0700
@@ -56,6 +56,9 @@
         spamassassin 5;
     };
 
+    generic "(^|[.-])(host)?([0-9]{1,3}[.-](Red-|dynamic[.-])?){4}"
+            "your mail server %s seems to have a generic name";
+
     env_to {
         # !! replace this with your domain names
         # child contexts are not allowed to specify recipient addresses outside these domains
--- a/src/context.cpp	Mon Aug 27 20:49:19 2007 -0700
+++ b/src/context.cpp	Thu Aug 30 11:08:46 2007 -0700
@@ -33,6 +33,7 @@
 char *token_envfrom;
 char *token_envto;
 char *token_filter;
+char *token_generic;
 char *token_host_limit;
 char *token_html_limit;
 char *token_html_tags;
@@ -670,6 +671,8 @@
 	name				= name_;
 	verify_host 		= NULL;
 	verifier			= NULL;
+	generic_regx		= NULL;
+	generic_message 	= NULL;
 	autowhite_file		= NULL;
 	whitelister 		= NULL;
 	env_from_default	= (parent) ? token_inherit : token_unknown;
@@ -694,6 +697,7 @@
 		// delete the underlying DNSBL objects.
 		delete d;
 	}
+	if (generic_regx && strlen(generic_regx)) regfree(&generic_pattern);
 }
 
 
@@ -712,6 +716,30 @@
 }
 
 
+bool CONTEXT::set_generic(char *regx, char *msg)
+{
+	int rc = 0;
+	if (generic_regx && strlen(generic_regx)) regfree(&generic_pattern);
+	generic_regx	= regx;
+	generic_message = msg;
+	if (generic_regx && strlen(generic_regx)) {
+		rc = regcomp(&generic_pattern, regx, REG_NOSUB | REG_ICASE | REG_EXTENDED);
+	}
+	return rc;	// true iff bad pattern
+}
+
+
+char *CONTEXT::generic_match(char *client)
+{
+	if (parent && !generic_regx) return parent->generic_match(client);
+	if (!strlen(generic_regx))	 return NULL;
+	if (0 == regexec(&generic_pattern, client, 0, NULL, 0)) {
+		return generic_message;
+	}
+	return NULL;
+}
+
+
 bool CONTEXT::cover_env_to(char *to) {
 	char buffer[maxlen];
 	char *x = strchr(to, '@');
@@ -983,6 +1011,11 @@
 		printf("%s     verify %s; \n", indent, verify_host);
 	}
 
+	if (generic_regx) {
+		printf("%s     generic \"%s\"  \n", indent, generic_regx);
+		printf("%s             \"%s\"; \n", indent, generic_message);
+	}
+
 	if (autowhite_file && whitelister) {
 		printf("%s     autowhite %d %s; \n", indent, whitelister->get_days(), autowhite_file);
 	}
@@ -1331,6 +1364,21 @@
 
 ////////////////////////////////////////////////
 //
+bool parse_generic(TOKEN &tok, CONFIG &dc, CONTEXT &me);
+bool parse_generic(TOKEN &tok, CONFIG &dc, CONTEXT &me) {
+	char *regx = tok.next();
+	char *msg  = tok.next();
+	if (!tsa(tok, token_semi)) return false;
+	if (me.set_generic(regx, msg)) {
+		tok.token_error("invalid regular expression %s", regx, regx);
+		return false;
+	}
+	return true;
+}
+
+
+////////////////////////////////////////////////
+//
 bool parse_autowhite(TOKEN &tok, CONFIG &dc, CONTEXT &me);
 bool parse_autowhite(TOKEN &tok, CONFIG &dc, CONTEXT &me) {
 	int days = tok.nextint();
@@ -1470,6 +1518,9 @@
 		else if (have == token_verify) {
 			if (!parse_verify(tok, dc, *con)) return false;
 		}
+		else if (have == token_generic) {
+			if (!parse_generic(tok, dc, *con)) return false;
+		}
 		else if (have == token_autowhite) {
 			if (!parse_autowhite(tok, dc, *con)) return false;
 		}
@@ -1542,6 +1593,7 @@
 	token_envfrom	   = register_string("env_from");
 	token_envto 	   = register_string("env_to");
 	token_filter	   = register_string("filter");
+	token_generic	   = register_string("generic");
 	token_host_limit   = register_string("host_limit");
 	token_html_limit   = register_string("html_limit");
 	token_html_tags    = register_string("html_tags");
--- a/src/context.h	Mon Aug 27 20:49:19 2007 -0700
+++ b/src/context.h	Thu Aug 30 11:08:46 2007 -0700
@@ -11,6 +11,7 @@
 
 #include "tokenizer.h"
 #include <map>
+#include <regex.h>
 
 
 enum status {oksofar,		// not rejected yet
@@ -123,6 +124,9 @@
 	string_set		env_to; 			// this context applies to these envelope recipients
 	char *			verify_host;		// use this smtp host to verify email addresses
 	VERIFYP 		verifier;			// pointer to the verifier structure
+	char *			generic_regx;		// pointer to generic regular expression
+	char *			generic_message;	// pointer to generic message with one %s
+	regex_t 		generic_pattern;	// compiled regular expression
 	char *			autowhite_file; 	// file to use for automatic whitelisting
 	WHITELISTERP	whitelister;		// pointer to the auto whitelister structure
 	string_map		env_from;			// map senders to white/black/unknown
@@ -202,6 +206,9 @@
 	void		add_dnsbl(DNSBLP dns)						{dnsbl_list.push_back(dns);};
 	DNSBLP		find_dnsbl(char *name);
 
+	bool		set_generic(char *regx, char *msg);
+	char*		generic_match(char *client);
+
 	bool			get_content_filtering() 				{return content_filtering; };
 	int 			get_host_limit()						{return host_limit; 	   };
 	bool			get_host_random()						{return host_random;	   };
@@ -260,6 +267,7 @@
 extern char *token_envfrom;
 extern char *token_envto;
 extern char *token_filter;
+extern char *token_generic;
 extern char *token_host_limit;
 extern char *token_html_limit;
 extern char *token_html_tags;
--- a/src/dnsbl.cpp	Mon Aug 27 20:49:19 2007 -0700
+++ b/src/dnsbl.cpp	Thu Aug 30 11:08:46 2007 -0700
@@ -249,6 +249,7 @@
 	mailaddr			= NULL;
 	queueid 			= NULL;
 	authenticated		= NULL;
+	client_name 		= NULL;
 	have_whites 		= false;
 	only_whites 		= true;
 	want_spamassassin	= false;
@@ -277,6 +278,7 @@
 	if (mailaddr)	   free(mailaddr);
 	if (queueid)	   free(queueid);
 	if (authenticated) free(authenticated);
+	if (client_name)   free(client_name);
 	discard(env_to);
 	if (memory)   delete memory;
 	if (scanner)  delete scanner;
@@ -285,6 +287,7 @@
 		mailaddr			= NULL;
 		queueid 			= NULL;
 		authenticated		= NULL;
+		client_name 		= NULL;
 		have_whites 		= false;
 		only_whites 		= true;
 		want_spamassassin	= false;
@@ -965,7 +968,9 @@
 	priv.mailaddr	   = to_lower_string(from[0]);
 	priv.queueid	   = strdup(smfi_getsymval(ctx, "i"));
 	priv.authenticated = smfi_getsymval(ctx, "{auth_authen}");
+	priv.client_name   = smfi_getsymval(ctx, "_");
 	if (priv.authenticated) priv.authenticated = strdup(priv.authenticated);
+	if (priv.client_name)	priv.client_name   = strdup(priv.client_name);
 	if (spamc != spamc_empty) {
 		priv.assassin  = new SpamAssassin(&priv, priv.ip, priv.helo, priv.mailaddr, priv.queueid);
 	}
@@ -1034,6 +1039,14 @@
 		smfi_setreply(ctx, "550", "5.7.1", buf);
 		return SMFIS_REJECT;
 	}
+	char *msg = con.generic_match(priv.client_name);
+	if (msg) {
+		// reject the recipient based on generic reverse dns
+		char buf[maxlen];
+		snprintf(buf, sizeof(buf), msg, priv.client_name);
+		smfi_setreply(ctx, "550", "5.7.1", buf);
+		return SMFIS_REJECT;
+	}
 	if (st == black) {
 		// reject the recipient based on blacklisting either from or to
 		smfi_setreply(ctx, "550", "5.7.1", "no such user");
--- a/src/dnsbl.h	Mon Aug 27 20:49:19 2007 -0700
+++ b/src/dnsbl.h	Thu Aug 30 11:08:46 2007 -0700
@@ -33,6 +33,7 @@
 	char			*mailaddr;				// envelope from value
 	char			*queueid;				// sendmail queue id
 	char			*authenticated; 		// client authenticated? if so, suppress all dnsbl checks, but check rate limits
+	char			*client_name;			// fully qualified host name of the smtp client
 	bool			have_whites;			// have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails
 	bool			only_whites;			// every recipient is whitelisted?
 	bool			want_spamassassin;		// at least one non-whitelisted recipients has a non zero spamassassin limit
--- a/src/spamass.cpp.in	Mon Aug 27 20:49:19 2007 -0700
+++ b/src/spamass.cpp.in	Thu Aug 30 11:08:46 2007 -0700
@@ -93,45 +93,25 @@
         strftime(date, sizeof(date), "%a, %d %b %Y %H:%M:%S %z", localtime(&tval));
         macro_b = date;
 
-        /* queue ID */
+        // queue ID
         macro_i = queueid;
 
-        /* FQDN of this site */
-        macro_j = smfi_getsymval(ctx, "j");
-        if (!macro_j) {
-            macro_j = "localhost";
-            warnmacro("j", "ENVRCPT");
-        }
+        // FQDN of this site
+        macro_j = getorwarnmacro(ctx, "j", "localhost", "ENVRCPT");
 
-        /* Protocol used to receive the message */
-        macro_r = smfi_getsymval(ctx, "r");
-        if (!macro_r) {
-            macro_r = "SMTP";
-            warnmacro("r", "ENVRCPT");
-        }
+        // Protocol used to receive the message */
+        macro_r = getorwarnmacro(ctx, "r", "SMTP",      "ENVRCPT");
 
         macro_s = helo;
 
-        /* Sendmail binary version */
-        macro_v = smfi_getsymval(ctx, "v");
-        if (!macro_v) {
-            macro_v = "8.13.0";
-            warnmacro("v", "ENVRCPT");
-        }
+        // Sendmail binary version
+        macro_v = getorwarnmacro(ctx, "v", "8.13.0",    "ENVRCPT");
 
-        /* Sendmail .cf version */
-        macro_Z = smfi_getsymval(ctx, "Z");
-        if (!macro_Z) {
-            macro_Z = "8.13.0";
-            warnmacro("Z", "ENVRCPT");
-        }
+        // Sendmail .cf version
+        macro_Z = getorwarnmacro(ctx, "Z", "8.13.0",    "ENVRCPT");
 
-        /* Validated sending site's address */
-        macro__ = smfi_getsymval(ctx, "_");
-        if (!macro__) {
-            macro__ = "unknown";
-            warnmacro("_", "ENVRCPT");
-        }
+        // Validated sending site's address
+        macro__ = getorwarnmacro(ctx, "_", "unknown",   "ENVRCPT");
 
         output(string("Received: from ") + macro_s + " (" + macro__+ ")\r\n\t" +
                "by " + macro_j + " (" + macro_v + "/" + macro_Z + ") with " + macro_r + " id " + macro_i + "\r\n\t" +
@@ -413,6 +393,17 @@
 }
 
 
+char *SpamAssassin::getorwarnmacro(SMFICTX *ctx, char *macro, char *def, char *scope)
+{
+    char *rc = smfi_getsymval(ctx, macro);
+    if (!rc) {
+        rc = def;
+        warnmacro(macro, scope);
+    }
+    return rc;
+}
+
+
 void SpamAssassin::warnmacro(char *macro, char *scope)
 {
     if (warnedmacro) return;
--- a/src/spamass.h	Mon Aug 27 20:49:19 2007 -0700
+++ b/src/spamass.h	Thu Aug 30 11:08:46 2007 -0700
@@ -48,6 +48,7 @@
 	int  read_pipe();
 	void empty_and_close_pipe();
 	void closeall(int fd);
+	char *getorwarnmacro(SMFICTX *ctx, char *macro, char *def, char *scope);
 	void warnmacro(char *macro, char *scope);
 
 public:
--- a/xml/dnsbl.in	Mon Aug 27 20:49:19 2007 -0700
+++ b/xml/dnsbl.in	Thu Aug 30 11:08:46 2007 -0700
@@ -12,7 +12,7 @@
 
     <refentry id="@PACKAGE@.1">
         <refentryinfo>
-            <date>2007-07-27</date>
+            <date>2007-08-30</date>
         </refentryinfo>
 
         <refmeta>
@@ -326,10 +326,17 @@
                     parent context.
                 </para></listitem>
                 <listitem><para>
-                    The dns lists specified in the filtering context are checked and the
-                    mail is rejected if any list has an A record for the standard dns based
-                    lookup scheme (reversed octets of the client followed by the dns
-                    suffix).
+                    If the mail has not been accepted or rejected yet, the dns lists
+                    specified in the filtering context are checked and the mail is rejected
+                    if any list has an A record for the standard dns based lookup scheme
+                    (reversed octets of the client followed by the dns suffix).
+                </para></listitem>
+                <listitem><para>
+                    If the mail has not been accepted or rejected yet, and the filtering
+                    context specifies a non-empty generic regular expression, then we check
+                    the fully qualified client name (obtained via the sendmail macro "_").
+                    The mail is rejected if the client name matches the specified regular
+                    expression.
                 </para></listitem>
                 <listitem><para>
                     If the mail has not been accepted or rejected yet, we look for a
@@ -531,7 +538,7 @@
 
     <refentry id="@PACKAGE@.conf.5">
         <refentryinfo>
-            <date>2007-07-27</date>
+            <date>2007-08-30</date>
         </refentryinfo>
 
         <refmeta>
@@ -564,8 +571,8 @@
             <literallayout class="monospaced"><![CDATA[
 CONFIG     = {CONTEXT ";"}+
 CONTEXT    = "context" NAME "{" {STATEMENT}+ "}"
-STATEMENT  = (DNSBL | DNSBLLIST | CONTENT | ENV-TO   | VERIFY |
-                      AUTOWHITE | CONTEXT | ENV-FROM | RATE-LIMIT) ";"
+STATEMENT  = (DNSBL | DNSBLLIST | CONTENT | ENV-TO   | VERIFY | GENERIC
+                    | AUTOWHITE | CONTEXT | ENV-FROM | RATE-LIMIT) ";"
 
 DNSBL      = "dnsbl" NAME DNSPREFIX ERROR-MSG1
 
@@ -599,6 +606,9 @@
 DCC-TO     = "dcc_to" ("ok" | "many") "{" DCCINCLUDEFILE "}" ";"
 
 VERIFY     = "verify" HOSTNAME ";"
+GENERIC    = "generic" REGULAREXPRESSION ERROR-MSG4 ";"
+ERROR-MSG4 = string containing exactly one %s replacement token
+             which is replaced with the client name
 AUTOWHITE  = "autowhite" DAYS FILENAME ";"
 
 ENV_FROM   = "env_from" [DEFAULT] "{" {(FROM-ADDR | DCC-FROM)}+ "}"