view src/dnsbl.cpp @ 130:e316f6fd9c39

uribl lookups fully qualified; allow two component host names
author carl
date Tue, 01 Aug 2006 10:44:55 -0700
parents c5cd1261394d
children df355d117199
line wrap: on
line source

/*

Copyright (c) 2004, 2005 Carl Byington - 510 Software Group, released
under the GPL version 2 or any later version at your choice available at
http://www.fsf.org/licenses/gpl.txt

Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its
suppliers.	Inspired by the DCC by Rhyolite Software

-r port  The port used to talk to our internal dns resolver processes
-p port  The port through which the MTA will connect to this milter.
-t sec	 The timeout value.
-c		 Check the config, and print a copy to stdout. Don't start the
		 milter or do anything with the socket.
-s		 Stress test by loading and deleting the current config in a loop.
-d		 increase debug level
-e f|t	 Print the results of looking up from address f and to address
		 t in the current config

*/


// from sendmail sample
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <sysexits.h>
#include <unistd.h>

// needed for socket io
#include <sys/ioctl.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/un.h>

// needed for thread
#include <pthread.h>

// needed for std c++ collections
#include <set>
#include <map>
#include <list>

// for the dns resolver
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>

// misc stuff needed here
#include <ctype.h>
#include <syslog.h>
#include <pwd.h>
#include <sys/wait.h>	/* header for waitpid() and various macros */
#include <signal.h> 	/* header for signal functions */

#include "includes.h"

static char* dnsbl_version="$Id$";


extern "C" {
	#include "libmilter/mfapi.h"
	sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr);
	sfsistat mlfi_envfrom(SMFICTX *ctx, char **argv);
	sfsistat mlfi_envrcpt(SMFICTX *ctx, char **argv);
	sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len);
	sfsistat mlfi_eom(SMFICTX *ctx);
	sfsistat mlfi_abort(SMFICTX *ctx);
	sfsistat mlfi_close(SMFICTX *ctx);
	void sig_chld(int signo);
}

int  debug_syslog  = 0;
bool syslog_opened = false;
bool use_syslog    = true;	// false to printf
bool loader_run    = true;	// used to stop the config loader thread
CONFIG	 *config = NULL;	// protected by the config_mutex
int   generation = 0;		// protected by the config_mutex
const int maxlen = 1000;	// used for snprintf buffers

pthread_mutex_t  config_mutex;
pthread_mutex_t  syslog_mutex;
pthread_mutex_t  resolve_mutex;
pthread_mutex_t  fd_pool_mutex;

std::set<int>	 fd_pool;
const int	 NULL_SOCKET	   = -1;
const time_t ERROR_SOCKET_TIME = 60;			// number of seconds between attempts to open a socket to the dns resolver process
char  *resolver_port		   = NULL;			// unix domain socket to talk to the dns resolver process
int    resolver_socket		   = NULL_SOCKET;	// socket used to listen for resolver requests
time_t last_error_time;
int    resolver_sock_count = 0; 		 // protected with fd_pool_mutex
int    resolver_pool_size  = 0; 		 // protected with fd_pool_mutex


struct ns_map {
	// all the strings are owned by the keys/values in the ns_host string map
	string_map	ns_host;	// nameserver name -> host name that uses this name server
	ns_mapper	ns_ip;		// nameserver name -> ip address of the name server
	~ns_map();
	void add(char *name, char *refer);
};


ns_map::~ns_map() {
	for (string_map::iterator i=ns_host.begin(); i!=ns_host.end(); i++) {
		char *x = (*i).first;
		char *y = (*i).second;
		free(x);
		free(y);
	}
	ns_ip.clear();
	ns_host.clear();
}


void ns_map::add(char *name, char *refer) {
	string_map::iterator i = ns_host.find(name);
	if (i != ns_host.end()) return;
	char *x = strdup(name);
	char *y = strdup(refer);
	ns_ip[x]   = 0;
	ns_host[x] = y;

}

// packed structure to allow a single socket write to dump the
// length and the following answer. The packing attribute is gcc specific.
struct glommer {
	int    length;
	#ifdef NS_PACKETSZ
		u_char answer[NS_PACKETSZ*4];	// with a resolver, we return resolver answers
	#else
		int    answer;					// without a resolver, we return a single ip4 address, 0 == no answer
	#endif
} __attribute__ ((packed));


////////////////////////////////////////////////
// helper to discard the strings held by a context_map
//
void discard(context_map &cm);
void discard(context_map &cm) {
	for (context_map::iterator i=cm.begin(); i!=cm.end(); i++) {
		char *x = (*i).first;
		free(x);
	}
	cm.clear();
}


////////////////////////////////////////////////
// helper to register a string in a context_map
//
void register_string(context_map &cm, char *name, CONTEXT *con);
void register_string(context_map &cm, char *name, CONTEXT *con) {
	context_map::iterator i = cm.find(name);
	if (i != cm.end()) return;
	char *x = strdup(name);
	cm[x] = con;
}


////////////////////////////////////////////////
// disconnect the fd from the dns resolver process
//
void my_disconnect(int sock, bool decrement = true);
void my_disconnect(int sock, bool decrement) {
	if (sock != NULL_SOCKET) {
		if (decrement) {
			pthread_mutex_lock(&fd_pool_mutex);
				resolver_sock_count--;
			pthread_mutex_unlock(&fd_pool_mutex);
		}
		shutdown(sock, SHUT_RDWR);
		close(sock);
	}
}


////////////////////////////////////////////////
// return fd connected to the dns resolver process
//
int my_connect();
int my_connect() {
	// if we have had recent errors, don't even try to open the socket
	if ((time(NULL) - last_error_time) < ERROR_SOCKET_TIME) return NULL_SOCKET;

	// nothing recent, maybe this time it will work
	int sock = NULL_SOCKET;
	sockaddr_un server;
	memset(&server, '\0', sizeof(server));
	server.sun_family = AF_UNIX;
	strncpy(server.sun_path, resolver_port, sizeof(server.sun_path)-1);
	sock = socket(AF_UNIX, SOCK_STREAM, 0);
	if (sock != NULL_SOCKET) {
		bool rc = (connect(sock, (sockaddr *)&server, sizeof(server)) == 0);
		if (!rc) {
			my_disconnect(sock, false);
			sock = NULL_SOCKET;
			last_error_time = time(NULL);
		}
	}
	else last_error_time = time(NULL);
	if (sock != NULL_SOCKET) {
		pthread_mutex_lock(&fd_pool_mutex);
			resolver_sock_count++;
		pthread_mutex_unlock(&fd_pool_mutex);
	}
	return sock;
}


mlfiPriv::mlfiPriv() {
	pthread_mutex_lock(&config_mutex);
		pc = config;
		pc->reference_count++;
	pthread_mutex_unlock(&config_mutex);
	get_fd();
	ip					= 0;
	mailaddr			= NULL;
	queueid 			= NULL;
	authenticated		= false;
	have_whites 		= false;
	only_whites 		= true;
	memory				= NULL;
	scanner 			= NULL;
	content_suffix		= NULL;
	content_message 	= NULL;
	uribl_suffix		= NULL;
	uribl_message		= NULL;
	content_host_ignore = NULL;
}

mlfiPriv::~mlfiPriv() {
	return_fd();
	pthread_mutex_lock(&config_mutex);
		pc->reference_count--;
	pthread_mutex_unlock(&config_mutex);
	reset(true);
}

void mlfiPriv::reset(bool final) {
	if (mailaddr) free(mailaddr);
	if (queueid)  free(queueid);
	discard(env_to);
	if (memory)  delete memory;
	if (scanner) delete scanner;
	if (!final) {
		mailaddr			= NULL;
		queueid 			= NULL;
		authenticated		= false;
		have_whites 		= false;
		only_whites 		= true;
		memory				= NULL;
		scanner 			= NULL;
		content_suffix		= NULL;
		content_message 	= NULL;
		uribl_suffix		= NULL;
		uribl_message		= NULL;
		content_host_ignore = NULL;
	}
}

void mlfiPriv::get_fd() {
	err = true;
	fd	= NULL_SOCKET;
	int result = pthread_mutex_lock(&fd_pool_mutex);
	if (!result) {
		std::set<int>::iterator i;
		i = fd_pool.begin();
		if (i != fd_pool.end()) {
			// have at least one fd in the pool
			err = false;
			fd	= *i;
			fd_pool.erase(fd);
			resolver_pool_size--;
			pthread_mutex_unlock(&fd_pool_mutex);
		}
		else {
			// pool is empty, get a new fd
			pthread_mutex_unlock(&fd_pool_mutex);
			fd	= my_connect();
			err = (fd == NULL_SOCKET);
		}
	}
	else {
		// cannot lock the pool, just get a new fd
		fd	= my_connect();
		err = (fd == NULL_SOCKET);
	}
}

void mlfiPriv::return_fd() {
	if (err) {
		// this fd got a socket error, so close it, rather than returning it to the pool
		my_disconnect(fd);
	}
	else {
		int result = pthread_mutex_lock(&fd_pool_mutex);
		if (!result) {
			if ((resolver_sock_count > resolver_pool_size*5) || (resolver_pool_size < 5)) {
				// return the fd to the pool
				fd_pool.insert(fd);
				resolver_pool_size++;
				pthread_mutex_unlock(&fd_pool_mutex);
			}
			else {
				// more than 20% of the open resolver sockets are in the pool, and the
				// pool as at least 5 sockets. that is enough, so just close this one.
				pthread_mutex_unlock(&fd_pool_mutex);
				my_disconnect(fd);
			}
		}
		else {
			// could not lock the pool, so just close the fd
			my_disconnect(fd);
		}
	}
}

int mlfiPriv::my_write(char *buf, int len) {
	if (err) return 0;
	int rs = 0;
	while (len) {
		int ws = write(fd, buf, len);
		if (ws > 0) {
			rs	+= ws;
			len -= ws;
			buf += ws;
		}
		else {
			// peer closed the socket!
			rs = 0;
			err = true;
			break;
		}
	}
	return rs;
}

int mlfiPriv::my_read(char *buf, int len) {
	if (err) return 0;
	int rs = 0;
	while (len > 1) {
		int ws = read(fd, buf, len);
		if (ws > 0) {
			rs	+= ws;
			len -= ws;
			buf += ws;
		}
		else {
			// peer closed the socket!
			rs = 0;
			err = true;
			break;
		}
	}
	return rs;
}

void mlfiPriv::need_content_filter(char *rcpt, CONTEXT &con) {
	register_string(env_to, rcpt, &con);
	if (!memory) {
		// first recipient that needs content filtering sets all
		// the content filtering parameters
		memory		  = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_cctlds());
		scanner 	  = new url_scanner(memory);
		content_suffix		= con.get_content_suffix();
		content_message 	= con.get_content_message();
		uribl_suffix		= con.get_uribl_suffix();
		uribl_message		= con.get_uribl_message();
		content_host_ignore = &con.get_content_host_ignore();
	}
}

#define MLFIPRIV	((struct mlfiPriv *) smfi_getpriv(ctx))


////////////////////////////////////////////////
// syslog a message
//
void my_syslog(mlfiPriv *priv, char *text) {
	char buf[maxlen];
	if (priv) {
		snprintf(buf, sizeof(buf), "%s: %s", priv->queueid, text);
		text = buf;
	}
	if (use_syslog) {
		pthread_mutex_lock(&syslog_mutex);
			if (!syslog_opened) {
				openlog("dnsbl", LOG_PID, LOG_MAIL);
				syslog_opened = true;
			}
			syslog(LOG_NOTICE, "%s", text);
		pthread_mutex_unlock(&syslog_mutex);
	}
	else {
		printf("%s \n", text);
	}
}

void my_syslog(char *text) {
	my_syslog(NULL, text);
}


////////////////////////////////////////////////
//	read a resolver request from the socket, process it, and
//	write the result back to the socket.

void process_resolver_requests(int socket);
void process_resolver_requests(int socket) {
#ifdef NS_MAXDNAME
	char question[NS_MAXDNAME];
#else
	char question[1000];
#endif
	glommer glom;

	int maxq = sizeof(question);
	while (true) {
		// read a question
		int rs = 0;
		while (rs < maxq) {
			int ns = read(socket, question+rs, maxq-rs);
			if (ns > 0) {
				rs += ns;
				if (question[rs-1] == '\0') {
					// last byte read was the null terminator, we are done
					break;
				}
			}
			else {
				// peer closed the socket
				#ifdef RESOLVER_DEBUG
					my_syslog("process_resolver_requests() peer closed socket while reading question");
				#endif
				shutdown(socket, SHUT_RDWR);
				close(socket);
				return;
			}
		}
		question[rs-1] = '\0';  // ensure null termination

		// find the answer
#ifdef NS_PACKETSZ
		#ifdef RESOLVER_DEBUG
			char text[1000];
			snprintf(text, sizeof(text), "process_resolver_requests() has a question %s", question);
			my_syslog(text);
		#endif
		glom.length = res_search(question, ns_c_in, ns_t_a, glom.answer, sizeof(glom.answer));
		if (glom.length < 0) glom.length = 0;	// represent all errors as zero length answers
#else
		glom.length = sizeof(glom.answer);
		glom.answer = 0;
		struct hostent *host = gethostbyname(question);
		if (host && (host->h_addrtype == AF_INET)) {
			memcpy(&glom.answer, host->h_addr, sizeof(glom.answer));
		}
#endif

		// write the answer
		char *buf = (char *)&glom;
		int   len = glom.length + sizeof(glom.length);
		#ifdef RESOLVER_DEBUG
			snprintf(text, sizeof(text), "process_resolver_requests() writing answer length %d for total %d", glom.length, len);
			my_syslog(text);
		#endif
		int    ws = 0;
		while (len > ws) {
			int ns = write(socket, buf+ws, len-ws);
			if (ns > 0) {
				ws += ns;
			}
			else {
				// peer closed the socket!
				#ifdef RESOLVER_DEBUG
					my_syslog("process_resolver_requests() peer closed socket while writing answer");
				#endif
				shutdown(socket, SHUT_RDWR);
				close(socket);
				return;
			}
		}
	}
}


////////////////////////////////////////////////
//	ask a dns question and get an A record answer - we don't try
//	very hard, just using the default resolver retry settings.
//	If we cannot get an answer, we just accept the mail.
//
//
int dns_interface(mlfiPriv &priv, char *question, bool maybe_ip, ns_map *nameservers);
int dns_interface(mlfiPriv &priv, char *question, bool maybe_ip, ns_map *nameservers) {
	// this part can be done without locking the resolver mutex. Each
	// milter thread is talking over its own socket to a separate resolver
	// process, which does the actual dns resolution.
	if (priv.err) return 0; // cannot ask more questions on this socket.
	priv.my_write(question, strlen(question)+1);   // write the question including the null terminator
	glommer glom;
	char *buf = (char *)&glom;
	priv.my_read(buf, sizeof(glom.length));
	buf += sizeof(glom.length);
	#ifdef RESOLVER_DEBUG
		char text[1000];
		snprintf(text, sizeof(text), "dns_interface() wrote question %s and has answer length %d", question, glom.length);
		my_syslog(text);
	#endif
	if ((glom.length < 0) || (glom.length > sizeof(glom.answer))) {
		priv.err = true;
		return 0;  // cannot process overlarge answers
	}
	priv.my_read(buf, glom.length);

#ifdef NS_PACKETSZ
	// now we need to lock the resolver mutex to keep the milter threads from
	// stepping on each other while parsing the dns answer.
	int ret_address = 0;
	pthread_mutex_lock(&resolve_mutex);
		if (glom.length > 0) {
			// parse the answer
			ns_msg handle;
			ns_rr  rr;
			if (ns_initparse(glom.answer, glom.length, &handle) == 0) {
				// look for ns names
				if (nameservers) {
					ns_map &ns = *nameservers;
					int rrnum = 0;
					while (ns_parserr(&handle, ns_s_ns, rrnum++, &rr) == 0) {
						if (ns_rr_type(rr) == ns_t_ns) {
							char nam[NS_MAXDNAME+1];
							char		 *n = nam;
							const u_char *p = ns_rr_rdata(rr);
							while (((n-nam) < NS_MAXDNAME) && ((p-glom.answer) < glom.length) && *p) {
								size_t s = *(p++);
								if (s > 191) {
									// compression pointer
									s = (s-192)*256 + *(p++);
									if (s >= glom.length) break; // pointer outside bounds of answer
									p = glom.answer + s;
									s = *(p++);
								}
								if (s > 0) {
									if ((n-nam) 		>= (NS_MAXDNAME-s)) break;	// destination would overflow name buffer
									if ((p-glom.answer) >= (glom.length-s)) break;	// source outside bounds of answer
									memcpy(n, p, s);
									n += s;
									p += s;
									*(n++) = '.';
								}
							}
							if (n-nam) n--; 			// remove trailing .
							*n = '\0';                  // null terminate it
							ns.add(nam, question);		// ns host to lookup later
						}
					}
					rrnum = 0;
					while (ns_parserr(&handle, ns_s_ar, rrnum++, &rr) == 0) {
						if (ns_rr_type(rr) == ns_t_a) {
							char* nam = (char*)ns_rr_name(rr);
							ns_mapper::iterator i = ns.ns_ip.find(nam);
							if (i != ns.ns_ip.end()) {
								// we want this ip address
								int address;
								memcpy(&address, ns_rr_rdata(rr), sizeof(address));
								ns.ns_ip[nam] = address;
							}
						}
					}
				}
				int rrnum = 0;
				while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) {
					if (ns_rr_type(rr) == ns_t_a) {
						int address;
						memcpy(&address, ns_rr_rdata(rr), sizeof(address));
						ret_address = address;
					}
				}
			}
		}
		if (maybe_ip && !ret_address) {
			// might be a bare ip address
			in_addr ip;
			if (inet_aton(question, &ip)) {
				ret_address = ip.s_addr;
			}
		}
	pthread_mutex_unlock(&resolve_mutex);
	return ret_address;
#else
	return glom.answer;
#endif
}


////////////////////////////////////////////////
//	check a single dnsbl
//
bool check_single(mlfiPriv &priv, int ip, char *suffix);
bool check_single(mlfiPriv &priv, int ip, char *suffix) {
	// make a dns question
	const u_char *src = (const u_char *)&ip;
	if (src[0] == 127) return false;	// don't do dns lookups on localhost
	if (src[0] == 10)  return false;	// don't do dns lookups on rfc1918 space
	if ((src[0] == 192) && (src[1] == 168)) return false;
	if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false;
#ifdef NS_MAXDNAME
	char question[NS_MAXDNAME];
#else
	char question[1000];
#endif
	snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix);
	// ask the question, if we get an A record it implies a blacklisted ip address
	return dns_interface(priv, question, false, NULL);
}


////////////////////////////////////////////////
//	check a single dnsbl
//
bool check_single(mlfiPriv &priv, int ip, DNSBL &bl);
bool check_single(mlfiPriv &priv, int ip, DNSBL &bl) {
	return check_single(priv, ip, bl.suffix);
}


////////////////////////////////////////////////
//	check the dnsbls specified for this recipient
//
bool check_dnsbl(mlfiPriv &priv, dnsblp_list &dnsbll, DNSBLP &rejectlist);
bool check_dnsbl(mlfiPriv &priv, dnsblp_list &dnsbll, DNSBLP &rejectlist) {
	for (dnsblp_list::iterator i=dnsbll.begin(); i!=dnsbll.end(); i++) {
		DNSBLP dp = *i; 	// non null by construction
		bool st;
		map<DNSBLP, bool>::iterator f = priv.checked.find(dp);
		if (f == priv.checked.end()) {
			// have not checked this list yet
			st = check_single(priv, priv.ip, *dp);
			rejectlist = dp;
			priv.checked[dp] = st;
		}
		else {
			st = (*f).second;
			rejectlist = (*f).first;
		}
		if (st) return st;
	}
	return false;
}


////////////////////////////////////////////////
//	lookup the domain name part of a hostname on two lists
//
//	if we find part of the hostname on the uribl, return
//	true and point found to the part of the hostname that we found.
//	otherwise, return false and preserve the value of found.
//
bool uriblookup(mlfiPriv &priv ,char *hostname, char *top, char *&found) ;
bool uriblookup(mlfiPriv &priv, char *hostname, char *top, char *&found) {
	// top is pointer to '.' char at end of base domain, or null for ip address form
	// so for hostname of www.fred.mydomain.co.uk
	// top points to-----------------------^
	// and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff
	char buf[maxlen];
	if (top) {
		// add one more component
		*top = '\0';
		char *x = strrchr(hostname, '.');
		if (x) hostname = x+1;
		*top = '.';
	}
	snprintf(buf, sizeof(buf), "%s.%s", hostname, priv.uribl_suffix);
	if (dns_interface(priv, buf, false, NULL)) {
		if (debug_syslog > 2) {
			char tmp[maxlen];
			snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix);
			my_syslog(tmp);
		}
		found = hostname;
		return true;
	}
	return false;
}


////////////////////////////////////////////////
//	uribl checker
//	-------------
//	hostname MUST not have a trailing dot
//	If tld, two level lookup.
//	Else, look up three level domain.
//
//	if we find part of the hostname on the uribl, return
//	true and point found to the part of the hostname that we found.
//	otherwise, return false and preserve the value of found.
//
bool check_uribl(mlfiPriv &priv, char *hostname, char *&found) ;
bool check_uribl(mlfiPriv &priv, char *hostname, char *&found) {
	in_addr ip;
	if (inet_aton(hostname, &ip)) {
		const u_char *src = (const u_char *)&ip.s_addr;
		if (src[0] == 127) return false;	// don't do dns lookups on localhost
		if (src[0] == 10)  return false;	// don't do dns lookups on rfc1918 space
		if ((src[0] == 192) && (src[1] == 168)) return false;
		if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false;
		static char adr[sizeof "255.255.255.255"];
		snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]);
		return (uriblookup(priv, adr, NULL, found));
	}

	char *top, *top2, *top3;
	top = strrchr(hostname, '.');
	if (top) {
		*top = '\0';
		top2 = strrchr(hostname, '.');
		*top = '.';

		if (top2) {
			string_set::iterator i = priv.memory->get_cctlds()->find(top2+1);
			string_set::iterator x = priv.memory->get_cctlds()->end();
			// if we have a 2-level-cctld, just look at top three levels of the name
			if (i != x) return uriblookup(priv, hostname, top2, found);

			*top2 = '\0';
			top3 = strrchr(hostname, '.');
			*top2 = '.';

			// if we have more than 3 levels in the name, look at the top three levels of the name
			if (top3 && uriblookup(priv, hostname, top2, found)) return true;
			// if that was not found, fall thru to looking at the top two levels
		}
		// look at the top two levels of the name
		return uriblookup(priv, hostname, top, found);
	}
	return false;
}


////////////////////////////////////////////////
//	check the hosts from the body against the content filter and uribl dnsbls
//
//
bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&msg, char *&host, int &ip, char *&found);
bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&msg, char *&host, int &ip, char *&found) {
	found = NULL;	// normally ip address style
	if (!priv.content_suffix && !priv.uribl_suffix) return false;	// nothing to check
	CONFIG	   &dc	   = *priv.pc;
	string_set &hosts  = priv.memory->get_hosts();
	string_set &ignore = *priv.content_host_ignore;

	int count = 0;
	int   cnt = hosts.size();	// number of hosts we could look at
	int_set ips;
	ns_map	nameservers;
	for (string_set::iterator i=hosts.begin(); i!=hosts.end(); i++) {
		host = *i;	// a reference into hosts, which will live until this smtp transaction is closed

		// don't bother looking up hosts on the ignore list
		string_set::iterator j = ignore.find(host);
		if (j != ignore.end()) continue;

		// try to only look at limit/cnt fraction of the available cnt host names in random mode
		if ((cnt > limit) && (limit > 0) && random) {
			int r = rand() % cnt;
			if (r >= limit) {
				if (debug_syslog > 2) {
					char buf[maxlen];
					snprintf(buf, sizeof(buf), "host %s skipped", host);
					my_syslog(&priv, buf);
				}
				continue;
			}
		}
		count++;
		ip = dns_interface(priv, host, true, &nameservers);
		if (debug_syslog > 2) {
			char buf[maxlen];
			if (ip) {
				char adr[sizeof "255.255.255.255"];
				adr[0] = '\0';
				inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
				snprintf(buf, sizeof(buf), "host %s found at %s", host, adr);
			}
			else {
				snprintf(buf, sizeof(buf), "host %s not found", host);
			}
			my_syslog(&priv, buf);
		}
		if (ip) {
			int_set::iterator i = ips.find(ip);
			if (i == ips.end()) {
				// we haven't looked this up yet
				ips.insert(ip);
				// check dnsbl style list
				if (priv.content_suffix && check_single(priv, ip, priv.content_suffix)) {
					msg = priv.content_message;
					return true;
				}
				// Check uribl & surbl style list
				if (priv.uribl_suffix && check_uribl(priv, host, found)) {
					msg = priv.uribl_message;
					return true;
				}
			}
		}
	}
	limit *= 4;   // allow average of 3 ns per host name
	for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) {
		count++;
		if ((count > limit) && (limit > 0)) return false;	// too many name servers to check them all
		host = (*i).first;	// a transient reference that needs to be replaced before we return it
		ip	 = (*i).second;
		if (!ip) ip = dns_interface(priv, host, false, NULL);
		if (debug_syslog > 2) {
			char buf[maxlen];
			if (ip) {
				char adr[sizeof "255.255.255.255"];
				adr[0] = '\0';
				inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
				snprintf(buf, sizeof(buf), "ns %s found at %s", host, adr);
			}
			else {
				snprintf(buf, sizeof(buf), "ns %s not found", host);
			}
			my_syslog(&priv, buf);
		}
		if (ip) {
			int_set::iterator i = ips.find(ip);
			if (i == ips.end()) {
				ips.insert(ip);
				if (check_single(priv, ip, priv.content_suffix)) {
					msg = priv.content_message;
					string_map::iterator j = nameservers.ns_host.find(host);
					if (j != nameservers.ns_host.end()) {
						char *refer = (*j).second;
						char buf[maxlen];
						snprintf(buf, sizeof(buf), "%s with nameserver %s", refer, host);
						host = register_string(hosts, buf);    // put a copy into hosts, and return that reference
					}
					else {
						host = register_string(hosts, host);   // put a copy into hosts, and return that reference
					}
					return true;
				}
			}
		}
	}
	return false;
}


////////////////////////////////////////////////
//
// this email address is passed in from sendmail, and will normally be
// enclosed in <>.	I think older versions of sendmail supplied the <>
// wrapper if the mail client did not, but the current version does not do
// that.  So the <> wrapper is now optional.  It may have mixed case, just
// as the mail client sent it.	We dup the string and convert the duplicate
// to lower case.
//
char *to_lower_string(char *email);
char *to_lower_string(char *email) {
	int n = strlen(email);
	if (*email == '<') {
		// assume it also ends with >
		n -= 2;
		if (n < 1) return strdup(email);	// return "<>"
		email++;
	}
	char *key = strdup(email);
	key[n] = '\0';
	for (int i=0; i<n; i++) key[i] = tolower(key[i]);
	return key;
}


////////////////////////////////////////////////
// start of sendmail milter interfaces
//
sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr)
{
	// allocate some private memory
	mlfiPriv *priv = new mlfiPriv;
	if (hostaddr->sa_family == AF_INET) {
		priv->ip = ((struct sockaddr_in *)hostaddr)->sin_addr.s_addr;
	}

	// save the private data
	smfi_setpriv(ctx, (void*)priv);

	// continue processing
	return SMFIS_CONTINUE;
}

sfsistat mlfi_envfrom(SMFICTX *ctx, char **from)
{
	mlfiPriv &priv	   = *MLFIPRIV;
	priv.mailaddr	   = to_lower_string(from[0]);
	priv.authenticated = (smfi_getsymval(ctx, "{auth_authen}") != NULL);
	return SMFIS_CONTINUE;
}

sfsistat mlfi_envrcpt(SMFICTX *ctx, char **rcpt)
{
	DNSBLP rejectlist = NULL;	// list that caused the reject
	mlfiPriv &priv = *MLFIPRIV;
	CONFIG &dc = *priv.pc;
	if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i"));
	char *rcptaddr	= rcpt[0];
	char *loto		= to_lower_string(rcptaddr);
	CONTEXT    &con = *(dc.find_context(loto)->find_context(priv.mailaddr));
	VERIFYP 	ver = con.find_verify(loto);
	if (debug_syslog > 1) {
		char buf[maxlen];
		char msg[maxlen];
		snprintf(msg, sizeof(msg), "from <%s> to <%s> using context %s", priv.mailaddr, loto, con.get_full_name(buf,maxlen));
		my_syslog(&priv, msg);
	}
	free(loto);
	char *fromvalue = con.find_from(priv.mailaddr);
	status st;
	if (priv.authenticated) {
		st = white;
	}
	else if (fromvalue == token_black) {
		st = black;
	}
	else if (fromvalue == token_white) {
		st = white;
	}
	else {
		// check the dns based lists
		st = (check_dnsbl(priv, con.get_dnsbl_list(), rejectlist)) ? reject : oksofar;
	}
	if (st == reject) {
		// reject the recipient based on some dnsbl
		char adr[sizeof "255.255.255.255"];
		adr[0] = '\0';
		inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr));
		char buf[maxlen];
		snprintf(buf, sizeof(buf), rejectlist->message, adr, adr);
		smfi_setreply(ctx, "550", "5.7.1", buf);
		return SMFIS_REJECT;
	}
	if (st == black) {
		// reject the recipient based on blacklisting either from or to
		smfi_setreply(ctx, "550", "5.7.1", "no such user");
		return SMFIS_REJECT;
	}
	if (ver && (st != white)) {
		// try to verify this from/to pair of addresses since it is not explicitly whitelisted
		char *loto = to_lower_string(rcptaddr);
		bool rc = ver->ok(priv.mailaddr, loto);
		free(loto);
		if (!rc) {
			smfi_setreply(ctx, "550", "5.7.1", "no such user");
			return SMFIS_REJECT;
		}
	}
	// accept the recipient
	if (!con.get_content_filtering()) st = white;
	if (st == oksofar) {
		// but remember the non-whites
		priv.need_content_filter(rcptaddr, con);
		priv.only_whites = false;
	}
	if (st == white) {
		priv.have_whites = true;
	}
	return SMFIS_CONTINUE;
}

sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len)
{
	mlfiPriv &priv = *MLFIPRIV;
	if (priv.authenticated) 	  return SMFIS_CONTINUE;
	if (priv.only_whites)		  return SMFIS_CONTINUE;
	priv.scanner->scan(data, len);
	return SMFIS_CONTINUE;
}

sfsistat mlfi_eom(SMFICTX *ctx)
{
	sfsistat  rc;
	mlfiPriv &priv = *MLFIPRIV;
	CONFIG	 &dc   = *priv.pc;
	char	 *host = NULL;
	int 	  ip;
	status	  st;
	// process end of message
	if (priv.authenticated || priv.only_whites) rc = SMFIS_CONTINUE;
	else {
		// assert env_to not empty
		char buf[maxlen];
		char *msg = NULL;
		string_set alive;
		bool random = false;
		int  limit	= 0;
		for (context_map::iterator i=priv.env_to.begin(); i!=priv.env_to.end(); i++) {
			char *rcpt	 = (*i).first;
			CONTEXT &con = *((*i).second);
			if (!con.acceptable_content(*priv.memory, msg)) {
				// bad html tags or excessive hosts
				smfi_delrcpt(ctx, rcpt);
			}
			else {
				alive.insert(rcpt);
				random |= con.get_host_random();
				limit	= max(limit, con.get_host_limit());
			}
		}
		bool rejecting = alive.empty(); // if alive is empty, we must have set msg above in acceptable_content()
		if (!rejecting) {
			char *fmt, *found;
			if (check_hosts(priv, random, limit, fmt, host, ip, found)) {
				if (found) {
					// uribl style
					snprintf(buf, sizeof(buf), fmt, host, found);
				}
				else {
					// dnsbl style
					char adr[sizeof "255.255.255.255"];
					adr[0] = '\0';
					inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
					snprintf(buf, sizeof(buf), fmt, host, adr);
				}
				msg = buf;
				rejecting = true;
			}
		}
		if (!rejecting) {
			rc = SMFIS_CONTINUE;
		}
		else if (!priv.have_whites) {
			// can reject the entire message
			smfi_setreply(ctx, "550", "5.7.1", msg);
			rc = SMFIS_REJECT;
		}
		else {
			// need to accept it but remove the recipients that don't want it
			for (string_set::iterator i=alive.begin(); i!=alive.end(); i++) {
				char *rcpt = *i;
				smfi_delrcpt(ctx, rcpt);
			}
			rc = SMFIS_CONTINUE;
		}
	}
	// reset for a new message on the same connection
	mlfi_abort(ctx);
	return rc;
}

sfsistat mlfi_abort(SMFICTX *ctx)
{
	mlfiPriv &priv = *MLFIPRIV;
	priv.reset();
	return SMFIS_CONTINUE;
}

sfsistat mlfi_close(SMFICTX *ctx)
{
	mlfiPriv *priv = MLFIPRIV;
	if (!priv) return SMFIS_CONTINUE;
	delete priv;
	smfi_setpriv(ctx, NULL);
	return SMFIS_CONTINUE;
}

struct smfiDesc smfilter =
{
	"DNSBL",            // filter name
	SMFI_VERSION,		// version code -- do not change
	SMFIF_DELRCPT,		// flags
	mlfi_connect,		// connection info filter
	NULL,				// SMTP HELO command filter
	mlfi_envfrom,		// envelope sender filter
	mlfi_envrcpt,		// envelope recipient filter
	NULL,				// header filter
	NULL,				// end of header
	mlfi_body,			// body block filter
	mlfi_eom,			// end of message
	mlfi_abort, 		// message aborted
	mlfi_close, 		// connection cleanup
};


////////////////////////////////////////////////
//	reload the config
//
CONFIG* new_conf();
CONFIG* new_conf() {
	CONFIG *newc = new CONFIG;
	pthread_mutex_lock(&config_mutex);
		newc->generation = generation++;
	pthread_mutex_unlock(&config_mutex);
	if (debug_syslog) {
		char buf[maxlen];
		snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation);
		my_syslog(buf);
	}
	if (load_conf(*newc, "dnsbl.conf")) {
		newc->load_time = time(NULL);
		return newc;
	}
	delete newc;
	return NULL;
}


////////////////////////////////////////////////
//	thread to watch the old config files for changes
//	and reload when needed. we also cleanup old
//	configs whose reference count has gone to zero.
//
void* config_loader(void *arg);
void* config_loader(void *arg) {
	typedef set<CONFIG *> configp_set;
	configp_set old_configs;
	while (loader_run) {
		sleep(180);  // look for modifications every 3 minutes
		if (!loader_run) break;
		CONFIG &dc = *config;
		time_t then = dc.load_time;
		struct stat st;
		bool reload = false;
		for (string_set::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
			char *fn = *i;
			if (stat(fn, &st))			 reload = true; // file disappeared
			else if (st.st_mtime > then) reload = true; // file modified
			if (reload) break;
		}
		if (reload) {
			CONFIG *newc = new_conf();
			if (newc) {
				// replace the global config pointer
				pthread_mutex_lock(&config_mutex);
					CONFIG *old = config;
					config = newc;
				pthread_mutex_unlock(&config_mutex);
				if (old) old_configs.insert(old);
			}
			else {
				// failed to load new config
				my_syslog("failed to load new configuration");
				system("echo 'failed to load new dnsbl configuration from /etc/dnsbl' | mail -s 'error in /etc/dnsbl configuration' root");
				// update the load time on the current config to prevent complaining every 3 minutes
				dc.load_time = time(NULL);
			}
		}
		// now look for old configs with zero ref counts
		for (configp_set::iterator i=old_configs.begin(); i!=old_configs.end(); ) {
			CONFIG *old = *i;
			if (!old->reference_count) {
				if (debug_syslog) {
					char buf[maxlen];
					snprintf(buf, sizeof(buf), "freeing memory for old configuration generation %d", old->generation);
					my_syslog(buf);
				}
				delete old; // destructor does all the work
				old_configs.erase(i++);
			}
			else i++;
		}
	}
	return NULL;
}


void usage(char *prog);
void usage(char *prog)
{
	fprintf(stderr, "Usage: %s  [-d [level]] [-c] [-s] [-e from|to] -r port -p sm-sock-addr [-t timeout]\n", prog);
	fprintf(stderr, "where port is for the connection to our own dns resolver processes\n");
	fprintf(stderr, "    and should be local-domain-socket-file-name\n");
	fprintf(stderr, "where sm-sock-addr is for the connection to sendmail\n");
	fprintf(stderr, "    and should be one of\n");
	fprintf(stderr, "        inet:port@ip-address\n");
	fprintf(stderr, "        local:local-domain-socket-file-name\n");
	fprintf(stderr, "-c will load and dump the config to stdout\n");
	fprintf(stderr, "-s will stress test the config loading code by repeating the load/free cycle\n");
	fprintf(stderr, "        in an infinte loop.\n");
	fprintf(stderr, "-d will set the syslog message level, currently 0 to 3\n");
	fprintf(stderr, "-e will print the results of looking up the from and to addresses in the\n");
	fprintf(stderr, "        current config. The | character is used to separate the from and to\n");
	fprintf(stderr, "        addresses in the argument to the -e switch\n");
}



void setup_socket(char *sock);
void setup_socket(char *sock) {
	unlink(sock);
	//	sockaddr_un addr;
	//	memset(&addr, '\0', sizeof addr);
	//	addr.sun_family = AF_UNIX;
	//	strncpy(addr.sun_path, sock, sizeof(addr.sun_path)-1);
	//	int s = socket(AF_UNIX, SOCK_STREAM, 0);
	//	bind(s, (sockaddr*)&addr, sizeof(addr));
	//	close(s);
}


/*
 * The signal handler function -- only gets called when a SIGCHLD
 * is received, ie when a child terminates
 */
void sig_chld(int signo)
{
	int status;
	/* Wait for any child without blocking */
	while (waitpid(-1, &status, WNOHANG) > 0) {
		// ignore child exit status, we only do this to cleanup zombies
	}
}


int main(int argc, char**argv)
{
	token_init();
	bool check	 = false;
	bool stress  = false;
	bool setconn = false;
	bool setreso = false;
	char *email = NULL;
	int c;
	const char *args = "r:p:t:e:d:chs";
	extern char *optarg;

	// Process command line options
	while ((c = getopt(argc, argv, args)) != -1) {
		switch (c) {
			case 'r':
				if (optarg == NULL || *optarg == '\0') {
					fprintf(stderr, "Illegal resolver socket: %s\n", optarg);
					exit(EX_USAGE);
				}
				resolver_port = strdup(optarg);
				setup_socket(resolver_port);
				setreso = true;
				break;

			case 'p':
				if (optarg == NULL || *optarg == '\0') {
					fprintf(stderr, "Illegal sendmail socket: %s\n", optarg);
					exit(EX_USAGE);
				}
				if (smfi_setconn(optarg) == MI_FAILURE) {
					fprintf(stderr, "smfi_setconn failed\n");
					exit(EX_SOFTWARE);
				}
					 if (strncasecmp(optarg, "unix:", 5) == 0)  setup_socket(optarg + 5);
				else if (strncasecmp(optarg, "local:", 6) == 0) setup_socket(optarg + 6);
				setconn = true;
				break;

			case 't':
				if (optarg == NULL || *optarg == '\0') {
					fprintf(stderr, "Illegal timeout: %s\n", optarg);
					exit(EX_USAGE);
				}
				if (smfi_settimeout(atoi(optarg)) == MI_FAILURE) {
					fprintf(stderr, "smfi_settimeout failed\n");
					exit(EX_SOFTWARE);
				}
				break;

			case 'e':
				if (email) free(email);
				email = strdup(optarg);
				break;

			case 'c':
				check = true;
				break;

			case 's':
				stress = true;
				break;

			case 'd':
				if (optarg == NULL || *optarg == '\0') debug_syslog = 1;
				else								   debug_syslog = atoi(optarg);
				break;

			case 'h':
			default:
				usage(argv[0]);
				exit(EX_USAGE);
		}
	}

	if (check) {
		use_syslog	 = false;
		debug_syslog = 10;
		CONFIG *conf = new_conf();
		if (conf) {
			conf->dump();
			delete conf;
			return 0;
		}
		else {
			return 1;	// config failed to load
		}
	}

	if (stress) {
		fprintf(stdout, "stress testing\n");
		while (1) {
			for (int i=0; i<10; i++) {
				CONFIG *conf = new_conf();
				if (conf) delete conf;
			}
			fprintf(stdout, ".");
			fflush(stdout);
			sleep(1);
		}
	}

	if (email) {
		char *x = strchr(email, '|');
		if (x) {
			*x = '\0';
			char *from = strdup(email);
			char *to   = strdup(x+1);
			use_syslog = false;
			CONFIG *conf = new_conf();
			if (conf) {
				CONTEXTP con = conf->find_context(to);
				char buf[maxlen];
				fprintf(stdout, "envelope to   <%s> finds context %s\n", to, con->get_full_name(buf,maxlen));
				CONTEXTP fc = con->find_context(from);
				fprintf(stdout, "envelope from <%s> finds context %s\n", from, fc->get_full_name(buf,maxlen));
				char *st = fc->find_from(from);
				fprintf(stdout, "envelope from <%s> finds status %s\n", from, st);
				delete conf;
			}
		}
		return 0;
	}

	if (!setconn) {
		fprintf(stderr, "%s: Missing required -p argument\n", argv[0]);
		usage(argv[0]);
		exit(EX_USAGE);
	}

	if (!setreso) {
		fprintf(stderr, "%s: Missing required -r argument\n", argv[0]);
		usage(argv[0]);
		exit(EX_USAGE);
	}

	if (smfi_register(smfilter) == MI_FAILURE) {
		fprintf(stderr, "smfi_register failed\n");
		exit(EX_UNAVAILABLE);
	}

	// switch to background mode
	if (daemon(1,0) < 0) {
		fprintf(stderr, "daemon() call failed\n");
		exit(EX_UNAVAILABLE);
	}

	// write the pid
	const char *pidpath = "/var/run/dnsbl.pid";
	unlink(pidpath);
	FILE *f = fopen(pidpath, "w");
	if (f) {
#ifdef linux
		// from a comment in the DCC source code:
		// Linux threads are broken.  Signals given the
		// original process are delivered to only the
		// thread that happens to have that PID.  The
		// sendmail libmilter thread that needs to hear
		// SIGINT and other signals does not, and that breaks
		// scripts that need to stop milters.
		// However, signaling the process group works.
		fprintf(f, "-%d\n", (u_int)getpgrp());
#else
		fprintf(f, "%d\n", (u_int)getpid());
#endif
		fclose(f);
	}

	// initialize the thread sync objects
	pthread_mutex_init(&config_mutex, 0);
	pthread_mutex_init(&syslog_mutex, 0);
	pthread_mutex_init(&resolve_mutex, 0);
	pthread_mutex_init(&fd_pool_mutex, 0);

	// drop root privs
	struct passwd *pw = getpwnam("dnsbl");
	if (pw) {
		if (setgid(pw->pw_gid) == -1) {
			my_syslog("failed to switch to group dnsbl");
		}
		if (setuid(pw->pw_uid) == -1) {
			my_syslog("failed to switch to user dnsbl");
		}
	}

	// load the initial config
	config = new_conf();
	if (!config) {
		my_syslog("failed to load initial configuration, quitting");
		exit(1);
	}

	// fork off the resolver listener process
	pid_t child = fork();
	if (child < 0) {
		my_syslog("failed to create resolver listener process");
		exit(0);
	}
	if (child == 0) {
		// we are the child - dns resolver listener process
		resolver_socket = socket(AF_UNIX, SOCK_STREAM, 0);
		if (resolver_socket < 0) {
			my_syslog("child failed to create resolver socket");
			exit(0);   // failed
		}
		sockaddr_un server;
		memset(&server, '\0', sizeof(server));
		server.sun_family = AF_UNIX;
		strncpy(server.sun_path, resolver_port, sizeof(server.sun_path)-1);
		//try to bind the address to the socket.
		if (bind(resolver_socket, (sockaddr *)&server, sizeof(server)) < 0) {
			// bind failed
			shutdown(resolver_socket, SHUT_RDWR);
			close(resolver_socket);
			my_syslog("child failed to bind resolver socket");
			exit(0);   // failed
		}
		//listen on the socket.
		if (listen(resolver_socket, 10) < 0) {
			// listen failed
			shutdown(resolver_socket, SHUT_RDWR);
			close(resolver_socket);
			my_syslog("child failed to listen to resolver socket");
			exit(0);   // failed
		}
		// setup sigchld handler to prevent zombies
		struct sigaction act;
		act.sa_handler = sig_chld;		// Assign sig_chld as our SIGCHLD handler
		sigemptyset(&act.sa_mask);		// We don't want to block any other signals in this example
		act.sa_flags = SA_NOCLDSTOP;	// only want children that have terminated
		if (sigaction(SIGCHLD, &act, NULL) < 0) {
			my_syslog("child failed to setup SIGCHLD handler");
			exit(0);   // failed
		}
		while (true) {
			sockaddr_un client;
			socklen_t	clientlen = sizeof(client);
			int s = accept(resolver_socket, (sockaddr *)&client, &clientlen);
			if (s > 0) {
				// accept worked, it did not get cancelled before we could accept it
				// fork off a process to handle this connection
				int newchild = fork();
				if (newchild == 0) {
					// this is the worker process
					// child does not need the listening socket
					close(resolver_socket);
					process_resolver_requests(s);
					exit(0);
				}
				else {
					// this is the parent
					// parent does not need the accepted socket
					close(s);
				}
			}
		}
		exit(0);	// make sure we don't fall thru.
	}
	else {
		sleep(2);	// allow child to get started
	}

	// only create threads after the fork() in daemon
	pthread_t tid;
	if (pthread_create(&tid, 0, config_loader, 0))
		my_syslog("failed to create config loader thread");
	if (pthread_detach(tid))
		my_syslog("failed to detach config loader thread");
	if (pthread_create(&tid, 0, verify_closer, 0))
		my_syslog("failed to create verify closer thread");
	if (pthread_detach(tid))
		my_syslog("failed to detach verify closer thread");

	time_t starting = time(NULL);
	int rc = smfi_main();
	if ((rc != MI_SUCCESS) && (time(NULL) > starting+5*60)) {
		my_syslog("trying to restart after smfi_main()");
		loader_run = false; 	// eventually the config loader thread will terminate
		execvp(argv[0], argv);
	}
	exit((rc == MI_SUCCESS) ? 0 : EX_UNAVAILABLE);
}