view src/dnsbl.cpp @ 30:8f43f8fa1ad7 stable-2-5

ignore text including bad html tags inside comments
author carl
date Thu, 27 May 2004 21:30:26 -0700
parents 4dfdf33f1db0
children fc7f8f3ea90f
line wrap: on
line source

/*

Copyright (c) 2004 Carl Byington - 510 Software Group, released under
the GPL version 2 or any later version at your choice available at
http://www.fsf.org/licenses/gpl.txt

Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its
suppliers.  Inspired by the DCC by Rhyolite Software

-p port  The port through which the MTA will connect to this milter.
-t sec   The timeout value.
-c       Check the config, and print a copy to stdout. Don't start the
         milter or do anything with the socket.
-d       Add debug syslog entries


TODO:
1) Add config for max_recipients for each mail domain. Recipients in
excess of that limit will be rejected, and the entire data will be
rejected if it is sent.

2) Add config for poison addresses. If any recipient is poison, all
recipients are rejected even if they would be whitelisted, and the
data is rejected if sent.

*/


// from sendmail sample
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <sysexits.h>
#include <unistd.h>

// needed for socket io
#include <sys/ioctl.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netdb.h>
#include <sys/socket.h>

// needed for thread
#include <pthread.h>

// needed for std c++ collections
#include <set>
#include <map>
#include <list>

// for the dns resolver
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>

// misc stuff needed here
#include <ctype.h>
#include <fstream>
#include <syslog.h>

static char* dnsbl_version="$Id$";

#define DEFAULT "default"
#define WHITE   "white"
#define BLACK   "black"
#define OK      "ok"
#define MANY    "many"

enum status {oksofar,       // not rejected yet
             white,         // whitelisted by envelope from
             black,         // blacklisted by envelope from or to
             reject,        // rejected by a dns list
             reject_tag,    // too many bad html tags
             reject_host};  // too many hosts/urls in body

using namespace std;

extern "C" {
    #include "libmilter/mfapi.h"
    sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr);
    sfsistat mlfi_envfrom(SMFICTX *ctx, char **argv);
    sfsistat mlfi_envrcpt(SMFICTX *ctx, char **argv);
    sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len);
    sfsistat mlfi_eom(SMFICTX *ctx);
    sfsistat mlfi_abort(SMFICTX *ctx);
    sfsistat mlfi_close(SMFICTX *ctx);
}

struct ltstr {
    bool operator()(char* s1, char* s2) const {
        return strcmp(s1, s2) < 0;
    }
};

struct DNSBL {
    char    *suffix;    // blacklist suffix like blackholes.five-ten-sg.com
    char    *message;   // error message with one or two %s operators for the ip address replacement
    DNSBL(char *s, char *m);
};
DNSBL::DNSBL(char *s, char *m) {
    suffix  = s;
    message = m;
}

typedef DNSBL *                           DNSBLP;
typedef list<DNSBLP>                      DNSBLL;
typedef DNSBLL *                          DNSBLLP;
typedef map<char *, char *, ltstr>        string_map;
typedef map<char *, string_map *, ltstr>  from_map;
typedef map<char *, DNSBLP, ltstr>        dnsblp_map;
typedef map<char *, DNSBLLP, ltstr>       dnsbllp_map;
typedef set<char *, ltstr>                string_set;
typedef list<char *>                      string_list;

struct CONFIG {
    // the only mutable stuff once it has been loaded from the config file
    int         reference_count;    // protected by the global config_mutex
    // all the rest is constant after loading from the config file
    int         generation;
    time_t      load_time;
    string_list config_files;
    dnsblp_map  dnsbls;
    dnsbllp_map dnsblls;
    from_map    env_from;
    string_map  env_to_dnsbll;      // map recipient to a named dnsbll
    string_map  env_to_chkfrom;     // map recipient to a named from map
    char *      content_suffix;     // for sbl url body filtering
    char *      content_message;    // ""
    char *      host_limit_message; // error message for excessive host names
    int         host_limit;         // limit on host names
    char *      tag_limit_message;  // error message for excessive bad html tags
    int         tag_limit;          // limit on bad html tags
    string_set  html_tags;          // set of valid html tags
    string_set  tlds;               // set of valid tld components
    CONFIG();
    ~CONFIG();
};
CONFIG::CONFIG() {
    reference_count    = 0;
    generation         = 0;
    load_time          = 0;
    content_suffix     = NULL;
    content_message    = NULL;
    host_limit_message = NULL;
    host_limit         = 0;
    tag_limit_message  = NULL;
    tag_limit          = 0;
}
CONFIG::~CONFIG() {
    for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) {
        DNSBLP d = (*i).second;
        // delete the underlying DNSBL objects.
        delete d;
    }
    for (dnsbllp_map::iterator i=dnsblls.begin(); i!=dnsblls.end(); i++) {
        DNSBLLP d = (*i).second;
        // *d is a list of pointers to DNSBL objects, but
        // the underlying objects have already been deleted above.
        delete d;
    }
    for (from_map::iterator i=env_from.begin(); i!=env_from.end(); i++) {
        string_map *d = (*i).second;
        delete d;
    }
}

static bool debug_syslog = false;
static bool loader_run   = true;    // used to stop the config loader thread
static string_set all_strings;      // owns all the strings, only modified by the config loader thread
static CONFIG * config = NULL;      // protected by the config_mutex
static int  generation = 0;         // protected by the config_mutex

static pthread_mutex_t  config_mutex;
static pthread_mutex_t  syslog_mutex;
static pthread_mutex_t  resolve_mutex;


////////////////////////////////////////////////
// helper to discard the strings held by a string_set
//
static void discard(string_set &s);
static void discard(string_set &s) {
    for (string_set::iterator i=s.begin(); i!=s.end(); i++) {
        free(*i);
    }
    s.clear();
}

////////////////////////////////////////////////
// helper to register a string in a string set
//
static char* register_string(string_set &s, char *name);
static char* register_string(string_set &s, char *name) {
    string_set::iterator i = s.find(name);
    if (i != s.end()) return *i;
    char *x = strdup(name);
    s.insert(x);
    return x;
}

////////////////////////////////////////////////
// syslog a message
//
static void my_syslog(char *text);
static void my_syslog(char *text) {
    pthread_mutex_lock(&syslog_mutex);
        openlog("dnsbl", LOG_PID, LOG_MAIL);
        syslog(LOG_NOTICE, "%s", text);
        closelog();
    pthread_mutex_unlock(&syslog_mutex);
}


// include the content scanner
#include "scanner.cpp"


////////////////////////////////////////////////
// mail filter private data, held for us by sendmail
//
struct mlfiPriv
{
    // connection specific data
    CONFIG  *pc;                    // global context with our maps
    int     ip;                     // ip4 address of the smtp client
    map<DNSBLP, status> checked;    // status from those lists
    // message specific data
    char    *mailaddr;      // envelope from value
    bool    authenticated;  // client authenticated? if so, suppress all dnsbl checks
    bool    have_whites;    // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails
    bool    only_whites;    // every recipient is whitelisted?
    string_set  non_whites; // remember the non-whitelisted recipients so we can remove them if need be
    recorder    *memory;    // memory for the content scanner
    url_scanner *scanner;   // object to handle body scanning
    mlfiPriv();
    ~mlfiPriv();
    void reset(bool final = false); // for a new message
};
mlfiPriv::mlfiPriv() {
    pthread_mutex_lock(&config_mutex);
        pc = config;
        pc->reference_count++;
    pthread_mutex_unlock(&config_mutex);
    ip            = 0;
    mailaddr      = NULL;
    authenticated = false;
    have_whites   = false;
    only_whites   = true;
    memory        = new recorder(&pc->html_tags, &pc->tlds);
    scanner       = new url_scanner(memory);
}
mlfiPriv::~mlfiPriv() {
    pthread_mutex_lock(&config_mutex);
        pc->reference_count--;
    pthread_mutex_unlock(&config_mutex);
    reset(true);
}
void mlfiPriv::reset(bool final) {
    if (mailaddr) free(mailaddr);
    discard(non_whites);
    delete memory;
    delete scanner;
    if (!final) {
        mailaddr      = NULL;
        authenticated = false;
        have_whites   = false;
        only_whites   = true;
        memory        = new recorder(&pc->html_tags, &pc->tlds);
        scanner       = new url_scanner(memory);
    }
}

#define MLFIPRIV    ((struct mlfiPriv *) smfi_getpriv(ctx))


////////////////////////////////////////////////
// register a global string
//
static char* register_string(char *name);
static char* register_string(char *name) {
    return register_string(all_strings, name);
}


static char* next_token(char *delim);
static char* next_token(char *delim) {
    char *name = strtok(NULL, delim);
    if (!name) return name;
    return register_string(name);
}


////////////////////////////////////////////////
// lookup an email address in the env_from or env_to maps
//
static char* lookup1(char *email, string_map map);
static char* lookup1(char *email, string_map map) {
    string_map::iterator i = map.find(email);
    if (i != map.end()) return (*i).second;
    char *x = strchr(email, '@');
    if (!x) return DEFAULT;
    x++;
    i = map.find(x);
    if (i != map.end()) return (*i).second;
    return DEFAULT;
}


////////////////////////////////////////////////
// lookup an email address in the env_from or env_to maps
// this email address is passed in from sendmail, and will
// always be enclosed in <>. It may have mixed case, just
// as the mail client sent it.
//
static char* lookup(char* email, string_map map);
static char* lookup(char* email, string_map map) {
    int n = strlen(email)-2;
    if (n < 1) return DEFAULT;  // malformed
    char *key = strdup(email+1);
    key[n] = '\0';
    for (int i=0; i<n; i++) key[i] = tolower(key[i]);
    char *rc = lookup1(key, map);
    free(key);
    return rc;
}


////////////////////////////////////////////////
//  find the dnsbl with a specific name
//
static DNSBLP find_dnsbl(CONFIG &dc, char *name);
static DNSBLP find_dnsbl(CONFIG &dc, char *name) {
    dnsblp_map::iterator i = dc.dnsbls.find(name);
    if (i == dc.dnsbls.end()) return NULL;
    return (*i).second;
}


////////////////////////////////////////////////
//  find the dnsbll with a specific name
//
static DNSBLLP find_dnsbll(CONFIG &dc, char *name);
static DNSBLLP find_dnsbll(CONFIG &dc, char *name) {
    dnsbllp_map::iterator i = dc.dnsblls.find(name);
    if (i == dc.dnsblls.end()) return NULL;
    return (*i).second;
}


////////////////////////////////////////////////
//  find the envfrom map with a specific name
//
static string_map* find_from_map(CONFIG &dc, char *name);
static string_map* find_from_map(CONFIG &dc, char *name) {
    from_map::iterator i = dc.env_from.find(name);
    if (i == dc.env_from.end()) return NULL;
    return (*i).second;
}


static string_map& really_find_from_map(CONFIG &dc, char *name);
static string_map& really_find_from_map(CONFIG &dc, char *name) {
    string_map *sm = find_from_map(dc, name);
    if (!sm) {
        sm = new string_map;
        dc.env_from[name] = sm;
    }
    return *sm;
}


////////////////////////////////////////////////
//
//  ask a dns question and get an A record answer - we don't try
//  very hard, just using the default resolver retry settings.
//  If we cannot get an answer, we just accept the mail.  The
//  caller must ensure thread safety.
//
//
static int dns_interface(char *question, bool maybe_ip);
static int dns_interface(char *question, bool maybe_ip) {
#ifdef NS_PACKETSZ
    u_char answer[NS_PACKETSZ];
    int length = res_search(question, ns_c_in, ns_t_a, answer, sizeof(answer));
    if (length >= 0) {  // no error yet
        // parse the answer
        ns_msg handle;
        ns_rr  rr;
        if (ns_initparse(answer, length, &handle) == 0) {
            int rrnum = 0;
            while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) {
                if (ns_rr_type(rr) == ns_t_a) {
                    int address;
                    memcpy(&address, ns_rr_rdata(rr), sizeof(address));
                    return address;
                }
            }
        }
    }
    if (maybe_ip) {
        // might be a bare ip address
        in_addr ip;
        if (inet_aton(question, &ip)) {
            return ip.s_addr;
        }
    }
    return 0;
#else
    struct hostent *host = gethostbyname(question);
    if (!host) return 0;
    if (host->h_addrtype != AF_INET) return 0;
    int address;
    memcpy(&address, host->h_addr, sizeof(address));
    return address;
#endif
}

static int protected_dns_interface(char *question, bool maybe_ip);
static int protected_dns_interface(char *question, bool maybe_ip) {
    int ans;
    pthread_mutex_lock(&resolve_mutex);
        ans = dns_interface(question, maybe_ip);
    pthread_mutex_unlock(&resolve_mutex);
    return ans;

}

////////////////////////////////////////////////
//  check a single dnsbl
//
static status check_single(int ip, char *suffix);
static status check_single(int ip, char *suffix) {
    // make a dns question
    const u_char *src = (const u_char *)&ip;
    if (src[0] == 127) return oksofar;  // don't do dns lookups on localhost
#ifdef NS_MAXDNAME
    char question[NS_MAXDNAME];
#else
    char question[1000];
#endif
    snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix);
    // ask the question, if we get an A record it implies a blacklisted ip address
    return (protected_dns_interface(question, false)) ? reject : oksofar;
}


////////////////////////////////////////////////
//  check a single dnsbl
//
static status check_single(int ip, DNSBL &bl);
static status check_single(int ip, DNSBL &bl) {
    return check_single(ip, bl.suffix);
}


////////////////////////////////////////////////
//  check the dnsbls specified for this recipient
//
static status check_dnsbl(mlfiPriv &priv, DNSBLLP dnsbllp, DNSBLP &rejectlist);
static status check_dnsbl(mlfiPriv &priv, DNSBLLP dnsbllp, DNSBLP &rejectlist) {
    if (priv.authenticated) return oksofar;
    if (!dnsbllp)           return oksofar;
    DNSBLL &dnsbll = *dnsbllp;
    for (DNSBLL::iterator i=dnsbll.begin(); i!=dnsbll.end(); i++) {
        DNSBLP dp = *i;     // non null by construction
        status st;
        map<DNSBLP, status>::iterator f = priv.checked.find(dp);
        if (f == priv.checked.end()) {
            // have not checked this list yet
            st = check_single(priv.ip, *dp);
            rejectlist = dp;
            priv.checked[dp] = st;
        }
        else {
            st = (*f).second;
            rejectlist = (*f).first;
        }
        if (st == reject) return st;
    }
    return oksofar;
}


////////////////////////////////////////////////
//  check the dnsbls specified for this recipient
//
static status check_hosts(mlfiPriv &priv, char *&host, int &ip);
static status check_hosts(mlfiPriv &priv, char *&host, int &ip) {
    CONFIG     &dc   = *priv.pc;
    if (!dc.content_suffix) return oksofar;
    int count = 0;
    for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) {
        count++;
        int lim = priv.pc->host_limit;
        if ((count > lim) && (lim > 0)) return reject_host;
        host = *i;
        ip   = protected_dns_interface(host, true);
        if (debug_syslog) {
            char buf[200];
            if (ip) {
                char adr[sizeof "255.255.255.255"];
                adr[0] = '\0';
                inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
                snprintf(buf, sizeof(buf), "host %s found at %s", host, adr);
            }
            else {
                snprintf(buf, sizeof(buf), "host %s not found", host);
            }
            my_syslog(buf);
        }
        if (ip) {
            status st = check_single(ip, dc.content_suffix);
            if (st == reject) return st;
        }
    }
    host = NULL;
    int bin = priv.memory->binary_tags;
    int bad = priv.memory->bad_html_tags;
    int lim = priv.pc->tag_limit;
    if (bin > bad) return oksofar;  // probably .zip or .tar.gz with random content
    if ((bad > lim) && (lim > 0)) return reject_tag;
    return oksofar;
}


////////////////////////////////////////////////
// start of sendmail milter interfaces
//
sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr)
{
    // allocate some private memory
    mlfiPriv *priv = new mlfiPriv;
    if (hostaddr->sa_family == AF_INET) {
        priv->ip = ((struct sockaddr_in *)hostaddr)->sin_addr.s_addr;
    }

    // save the private data
    smfi_setpriv(ctx, (void*)priv);

    // continue processing
    return SMFIS_CONTINUE;
}

sfsistat mlfi_envfrom(SMFICTX *ctx, char **from)
{
    mlfiPriv &priv = *MLFIPRIV;
    priv.mailaddr      = strdup(from[0]);
    priv.authenticated = (smfi_getsymval(ctx, "{auth_authen}") != NULL);
    return SMFIS_CONTINUE;
}

sfsistat mlfi_envrcpt(SMFICTX *ctx, char **rcpt)
{
    DNSBLP rejectlist = NULL;   // list that caused the reject
    status st = oksofar;
    mlfiPriv &priv = *MLFIPRIV;
    CONFIG &dc = *priv.pc;
    char *rcptaddr = rcpt[0];
    char *dnsname  = lookup(rcptaddr, dc.env_to_dnsbll);
    char *fromname = lookup(rcptaddr, dc.env_to_chkfrom);
    if ((strcmp(dnsname,  BLACK) == 0) ||
        (strcmp(fromname, BLACK) == 0)) {
        st = black; // two options to blacklist this recipient
    }
    else if (strcmp(fromname, WHITE) == 0) {
        st = white;
    }
    else {
        // check an env_from map
        string_map *sm = find_from_map(dc, fromname);
        if (sm != NULL) {
            fromname = lookup(priv.mailaddr, *sm);  // returns default if name not in map
            if (strcmp(fromname, BLACK) == 0) {
                st = black; // blacklist this envelope from value
            }
            if (strcmp(fromname, WHITE) == 0) {
                st = white; // blacklist this envelope from value
            }
        }
    }
    if ((st == oksofar) && (strcmp(dnsname, WHITE) != 0)) {
        // check dns lists
        st = check_dnsbl(priv, find_dnsbll(dc, dnsname), rejectlist);
    }

    if (st == reject) {
        // reject the recipient based on some dnsbl
        char adr[sizeof "255.255.255.255"];
        adr[0] = '\0';
        inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr));
        char buf[2000];
        snprintf(buf, sizeof(buf), rejectlist->message, adr, adr);
        smfi_setreply(ctx, "550", "5.7.1", buf);
        return SMFIS_REJECT;
    }
    else if (st == black) {
        // reject the recipient based on blacklisting either from or to
        smfi_setreply(ctx, "550", "5.7.1", "no such user");
        return SMFIS_REJECT;
    }
    else {
        // accept the recipient
        if (st == oksofar) {
            // but remember the non-whites
            register_string(priv.non_whites, rcptaddr);
            priv.only_whites = false;
        }
        if (st == white) {
            priv.have_whites = true;
        }
        return SMFIS_CONTINUE;
    }
}

sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len)
{
    mlfiPriv &priv = *MLFIPRIV;
    if (priv.authenticated) return SMFIS_CONTINUE;
    if (priv.only_whites)   return SMFIS_CONTINUE;
    priv.scanner->scan(data, len);
    return SMFIS_CONTINUE;
}

sfsistat mlfi_eom(SMFICTX *ctx)
{
    sfsistat  rc;
    mlfiPriv &priv = *MLFIPRIV;
    char     *host = NULL;
    int       ip;
    status    st;
    // process end of message
    if (priv.authenticated ||
        priv.only_whites   ||
        ((st=check_hosts(priv, host, ip)) == oksofar)) rc = SMFIS_CONTINUE;
    else {
        if (!priv.have_whites) {
            // can reject the entire message
            char buf[2000];
            if (st == reject_tag) {
                // rejected due to excessive bad html tags
                snprintf(buf, sizeof(buf), priv.pc->tag_limit_message);
            }
            else if (st == reject_host) {
                // rejected due to excessive unique host/urls
                snprintf(buf, sizeof(buf), priv.pc->host_limit_message);
            }
            else {
                char adr[sizeof "255.255.255.255"];
                adr[0] = '\0';
                inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
                snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr);
            }
            smfi_setreply(ctx, "550", "5.7.1", buf);
            rc = SMFIS_REJECT;
        }
        else {
            // need to accept it but remove the recipients that don't want it
            for (string_set::iterator i=priv.non_whites.begin(); i!=priv.non_whites.end(); i++) {
                char *rcpt = *i;
                smfi_delrcpt(ctx, rcpt);
            }
            rc = SMFIS_CONTINUE;
        }
    }
    // reset for a new message on the same connection
    mlfi_abort(ctx);
    return rc;
}

sfsistat mlfi_abort(SMFICTX *ctx)
{
    mlfiPriv &priv = *MLFIPRIV;
    priv.reset();
    return SMFIS_CONTINUE;
}

sfsistat mlfi_close(SMFICTX *ctx)
{
    mlfiPriv *priv = MLFIPRIV;
    if (!priv) return SMFIS_CONTINUE;
    delete priv;
    smfi_setpriv(ctx, NULL);
    return SMFIS_CONTINUE;
}

struct smfiDesc smfilter =
{
    "DNSBL",            // filter name
    SMFI_VERSION,       // version code -- do not change
    SMFIF_DELRCPT,      // flags
    mlfi_connect,       // connection info filter
    NULL,               // SMTP HELO command filter
    mlfi_envfrom,       // envelope sender filter
    mlfi_envrcpt,       // envelope recipient filter
    NULL,               // header filter
    NULL,               // end of header
    mlfi_body,          // body block filter
    mlfi_eom,           // end of message
    mlfi_abort,         // message aborted
    mlfi_close,         // connection cleanup
};


static void dumpit(char *name, string_map map);
static void dumpit(char *name, string_map map) {
    fprintf(stdout, "\n");
    for (string_map::iterator i=map.begin(); i!=map.end(); i++) {
        fprintf(stdout, "%s %s->%s\n", name, (*i).first, (*i).second);
    }
}


static void dumpit(from_map map);
static void dumpit(from_map map) {
    for (from_map::iterator i=map.begin(); i!=map.end(); i++) {
        char buf[2000];
        snprintf(buf, sizeof(buf), "envelope from map for %s", (*i).first);
        string_map *sm = (*i).second;
        dumpit(buf, *sm);
    }
}


static void dumpit(CONFIG &dc);
static void dumpit(CONFIG &dc) {
    dumpit(dc.env_from);
    dumpit("envelope to (dnsbl list)", dc.env_to_dnsbll);
    dumpit("envelope to (from map)", dc.env_to_chkfrom);
    fprintf(stdout, "\ndnsbls\n");
    for (dnsblp_map::iterator i=dc.dnsbls.begin(); i!=dc.dnsbls.end(); i++) {
        fprintf(stdout, "%s %s %s\n", (*i).first, (*i).second->suffix, (*i).second->message);
    }
    fprintf(stdout, "\ndnsbl_lists\n");
    for (dnsbllp_map::iterator i=dc.dnsblls.begin(); i!=dc.dnsblls.end(); i++) {
        char *name = (*i).first;
        DNSBLL &dl = *((*i).second);
        fprintf(stdout, "%s", name);
        for (DNSBLL::iterator j=dl.begin(); j!=dl.end(); j++) {
            DNSBL &d = **j;
            fprintf(stdout, " %s", d.suffix);
        }
        fprintf(stdout, "\n");
    }
    if (dc.content_suffix) {
        fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message);
    }
    if (dc.host_limit) {
        fprintf(stdout, "\ncontent filtering for host names enabled with limit %d %s\n", dc.host_limit, dc.host_limit_message);
    }
    if (dc.tag_limit) {
        fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.tag_limit, dc.tag_limit_message);
    }
    fprintf(stdout, "\nfiles\n");
    for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
        char *f = *i;
        fprintf(stdout, "config includes %s\n", f);
    }
}


////////////////////////////////////////////////
//  check for redundant or recursive include files
//
static bool ok_to_include(CONFIG &dc, char *fn);
static bool ok_to_include(CONFIG &dc, char *fn) {
    if (!fn) return false;
    bool ok = true;
    for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
        char *f = *i;
        if (strcmp(f, fn) == 0) {
            my_syslog("redundant or recursive include file detected");
            ok = false;
            break;
        }
    }
    return ok;
}


////////////////////////////////////////////////
//  load a single config file
//
static void load_conf_dcc(CONFIG &dc, char *name, char *fn);
static void load_conf_dcc(CONFIG &dc, char *name, char *fn) {
    dc.config_files.push_back(fn);
    char *list = BLACK;
    const int LINE_SIZE = 2000;
    ifstream is(fn);
    if (is.fail()) return;
    char line[LINE_SIZE];
    char *delim = " \t";
    int curline = 0;
    while (!is.eof()) {
        is.getline(line, LINE_SIZE);
        curline++;
        int n = strlen(line);
        if (!n) continue;
        for (int i=0; i<n; i++) line[i] = tolower(line[i]);
        if (line[0] == '#') continue;
        char *head = line;
        if (strspn(line, delim) == 0) {
            // have a leading ok/many tag to fetch
            char *cmd = strtok(line, delim);
                 if (strcmp(cmd, MANY) == 0) list = BLACK;
            else if (strcmp(cmd, OK) == 0)   list = WHITE;
            head = cmd + strlen(cmd) + 1;
        }
        char *cmd = strtok(head, delim);
        if (!cmd) continue;
        if (strcmp(cmd, "env_from") == 0) {
            char *from = next_token(delim);
            if (from) {
                string_map &fm = really_find_from_map(dc, name);
                fm[from] = list;
            }
        }
        else if (strcmp(cmd, "env_to") == 0) {
            char *to = next_token(delim);
            if (to) {
                dc.env_to_dnsbll[to]  = list;
                dc.env_to_chkfrom[to] = list;
            }
        }
        else if (strcmp(cmd, "substitute") == 0) {
            char *tag = next_token(delim);
            if (tag && (strcmp(tag, "mail_host") == 0)) {
                char *from = next_token(delim);
                if (from) {
                    string_map &fm = really_find_from_map(dc, name);
                    fm[from] = list;
                }
            }
        }
        else if (strcmp(cmd, "include") == 0) {
            char *fn = next_token(delim);
            if (ok_to_include(dc, fn)) {
                load_conf_dcc(dc, name, fn);
            }
        }

    }
    is.close();
}


static void load_conf(CONFIG &dc, char *fn);
static void load_conf(CONFIG &dc, char *fn) {
    dc.config_files.push_back(fn);
    map<char*, int, ltstr> commands;
    enum {dummy, tld, content, hostlimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc};
    commands["tld"        ] = tld;
    commands["content"    ] = content;
    commands["host_limit" ] = hostlimit;
    commands["html_limit" ] = htmllimit;
    commands["html_tag"   ] = htmltag;
    commands["dnsbl"      ] = dnsbl;
    commands["dnsbl_list" ] = dnsbll;
    commands["env_from"   ] = envfrom;
    commands["env_to"     ] = envto;
    commands["include"    ] = include;
    commands["include_dcc"] = includedcc;
    const int LINE_SIZE = 2000;
    ifstream is(fn);
    if (is.fail()) return;
    char line[LINE_SIZE];
    char orig[LINE_SIZE];
    char *delim = " \t";
    int curline = 0;
    while (!is.eof()) {
        is.getline(line, LINE_SIZE);
        snprintf(orig, sizeof(orig), "%s", line);
        curline++;
        int n = strlen(line);
        for (int i=0; i<n; i++) line[i] = tolower(line[i]);
        char *cmd = strtok(line, delim);
        if (cmd && (cmd[0] != '#') && (cmd[0] != '\0')) {
            // have a decent command
            bool processed = false;
            switch (commands[cmd]) {
                case tld: {
                    char *tld = next_token(delim);
                    if (!tld) break;                            // no tld value
                    dc.tlds.insert(tld);
                    processed = true;
                    } break;

                case content: {
                    char *suff = strtok(NULL, delim);
                    if (!suff) break;                           // no dns suffix
                    char *msg = suff + strlen(suff);
                    if ((msg - line) >= strlen(orig)) break;    // line ended with the dns suffix
                    msg  = strchr(msg+1, '\'');
                    if (!msg) break;                            // no reply message template
                    msg++; // move over the leading '
                    if ((msg - line) >= strlen(orig)) break;    // line ended with the leading quote
                    char *last = strchr(msg, '\'');
                    if (!last) break;                           // no trailing quote
                    *last = '\0';                               // make it a null terminator
                    dc.content_suffix  = register_string(suff);
                    dc.content_message = register_string(msg);
                    processed = true;
                    } break;

                case hostlimit: {
                    char *limit = strtok(NULL, delim);
                    if (!limit) break;                          // no integer limit
                    char *msg = limit + strlen(limit);
                    if ((msg - line) >= strlen(orig)) break;    // line ended with the limit
                    msg  = strchr(msg+1, '\'');
                    if (!msg) break;                            // no reply message template
                    msg++; // move over the leading '
                    if ((msg - line) >= strlen(orig)) break;    // line ended with the leading quote
                    char *last = strchr(msg, '\'');
                    if (!last) break;                           // no trailing quote
                    *last = '\0';                               // make it a null terminator
                    dc.host_limit         = atoi(limit);
                    dc.host_limit_message = register_string(msg);
                    processed = true;
                    } break;

                case htmllimit: {
                    char *limit = strtok(NULL, delim);
                    if (!limit) break;                          // no integer limit
                    char *msg = limit + strlen(limit);
                    if ((msg - line) >= strlen(orig)) break;    // line ended with the limit
                    msg  = strchr(msg+1, '\'');
                    if (!msg) break;                            // no reply message template
                    msg++; // move over the leading '
                    if ((msg - line) >= strlen(orig)) break;    // line ended with the leading quote
                    char *last = strchr(msg, '\'');
                    if (!last) break;                           // no trailing quote
                    *last = '\0';                               // make it a null terminator
                    dc.tag_limit         = atoi(limit);
                    dc.tag_limit_message = register_string(msg);
                    processed = true;
                    } break;

                case htmltag: {
                    char *tag = next_token(delim);
                    if (!tag) break;                            // no html tag value
                    dc.html_tags.insert(tag);                   // base version
                    char buf[200];
                    snprintf(buf, sizeof(buf), "/%s", tag);
                    dc.html_tags.insert(register_string(buf));  // leading /
                    snprintf(buf, sizeof(buf), "%s/", tag);
                    dc.html_tags.insert(register_string(buf));  // trailing /
                    processed = true;
                    } break;

                case dnsbl: {
                    // have a new dnsbl to use
                    char *name = next_token(delim);
                    if (!name) break;                           // no name name
                    if (find_dnsbl(dc, name)) break;            // duplicate entry
                    char *suff = strtok(NULL, delim);
                    if (!suff) break;                           // no dns suffic
                    char *msg = suff + strlen(suff);
                    if ((msg - line) >= strlen(orig)) break;    // line ended with the dns suffix
                    msg  = strchr(msg+1, '\'');
                    if (!msg) break;                            // no reply message template
                    msg++; // move over the leading '
                    if ((msg - line) >= strlen(orig)) break;    // line ended with the leading quote
                    char *last = strchr(msg, '\'');
                    if (!last) break;                           // no trailing quote
                    *last = '\0';                               // make it a null terminator
                    dc.dnsbls[name] = new DNSBL(register_string(suff), register_string(msg));
                    processed = true;
                    } break;

                case dnsbll: {
                    // define a new combination of dnsbls
                    char *name = next_token(delim);
                    if (!name) break;
                    if (find_dnsbll(dc, name)) break;               // duplicate entry
                    char *list = next_token(delim);
                    if (!list || (*list == '\0') || (*list == '#')) break;
                    DNSBLLP d = new DNSBLL;
                    DNSBLP p = find_dnsbl(dc, list);
                    if (p) d->push_back(p);
                    while (true) {
                        list = next_token(delim);
                        if (!list || (*list == '\0') || (*list == '#')) break;
                        DNSBLP p = find_dnsbl(dc, list);
                        if (p) d->push_back(p);
                    }
                    dc.dnsblls[name] = d;
                    processed = true;
                    } break;

                case envfrom: {
                    // add an entry into the named string_map
                    char *name = next_token(delim);
                    if (!name) break;
                    char *from = next_token(delim);
                    if (!from) break;
                    char *list = next_token(delim);
                    if (!list) break;
                    if ((strcmp(list, WHITE) == 0) ||
                        (strcmp(list, BLACK) == 0)) {
                        string_map &fm = really_find_from_map(dc, name);
                        fm[from] = list;
                        processed = true;
                    }
                    else {
                        // list may be the name of a previously defined from_map
                        string_map *m = find_from_map(dc, list);
                        if (m && (strcmp(list,name) != 0)) {
                            string_map &pm = *m;
                            string_map &fm = really_find_from_map(dc, name);
                            fm.insert(pm.begin(), pm.end());
                            processed = true;
                        }
                    }
                    } break;

                case envto: {
                    // define the dnsbl_list and env_from maps to use for this recipient
                    char *to   = next_token(delim);
                    if (!to) break;
                    char *list = next_token(delim);
                    if (!list) break;
                    char *from = next_token(delim);
                    if (!from) break;
                    dc.env_to_dnsbll[to]  = list;
                    dc.env_to_chkfrom[to] = from;
                    processed = true;
                    } break;

                case include: {
                    char *fn = next_token(delim);
                    if (ok_to_include(dc, fn)) {
                        load_conf(dc, fn);
                        processed = true;
                    }
                    } break;

                case includedcc: {
                    char *name = next_token(delim);
                    if (!name) break;
                    char *fn = next_token(delim);
                    if (ok_to_include(dc, fn)) {
                        load_conf_dcc(dc, name, fn);
                        processed = true;
                    }
                    } break;

                default: {
                    } break;
            }
            if (!processed) {
                pthread_mutex_lock(&syslog_mutex);
                    openlog("dnsbl", LOG_PID, LOG_MAIL);
                    syslog(LOG_ERR, "ignoring file %s line %d : %s\n", fn, curline, orig);
                    closelog();
                pthread_mutex_unlock(&syslog_mutex);
            }
        }
    }
    is.close();
}


////////////////////////////////////////////////
//  reload the config
//
static CONFIG* new_conf();
static CONFIG* new_conf() {
    CONFIG *newc = new CONFIG;
    pthread_mutex_lock(&config_mutex);
        newc->generation = generation++;
    pthread_mutex_unlock(&config_mutex);
    char buf[200];
    snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation);
    my_syslog(buf);
    load_conf(*newc, "dnsbl.conf");
    newc->load_time = time(NULL);
    return newc;
}


////////////////////////////////////////////////
//  thread to watch the old config files for changes
//  and reload when needed. we also cleanup old
//  configs whose reference count has gone to zero.
//
static void* config_loader(void *arg);
static void* config_loader(void *arg) {
    typedef set<CONFIG *> configp_set;
    configp_set old_configs;
    while (loader_run) {
        sleep(180);  // look for modifications every 3 minutes
        if (!loader_run) break;
        CONFIG &dc = *config;
        time_t then = dc.load_time;
        struct stat st;
        bool reload = false;
        for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
            char *fn = *i;
            if (stat(fn, &st))           reload = true; // file disappeared
            else if (st.st_mtime > then) reload = true; // file modified
            if (reload) break;
        }
        if (reload) {
            CONFIG *newc = new_conf();
            // replace the global config pointer
            pthread_mutex_lock(&config_mutex);
                CONFIG *old = config;
                config = newc;
            pthread_mutex_unlock(&config_mutex);
            if (old) old_configs.insert(old);
        }
        // now look for old configs with zero ref counts
        for (configp_set::iterator i=old_configs.begin(); i!=old_configs.end(); ) {
            CONFIG *old = *i;
            if (!old->reference_count) {
                char buf[200];
                snprintf(buf, sizeof(buf), "freeing memory for old configuration generation %d", old->generation);
                my_syslog(buf);
                delete old; // destructor does all the work
                old_configs.erase(i++);
            }
            else i++;
        }
    }
    return NULL;
}


static void usage(char *prog);
static void usage(char *prog)
{
    fprintf(stderr, "Usage: %s  [-d] [-c] -p socket-addr [-t timeout]\n", prog);
    fprintf(stderr, "where socket-addr is for the connection to sendmail and should be one of\n");
    fprintf(stderr, "   inet:port@local-ip-address\n");
    fprintf(stderr, "   local:local-domain-socket-file-name\n");
    fprintf(stderr, "-c will load and dump the config to stdout\n");
    fprintf(stderr, "-d will add some syslog debug messages\n");
}


int main(int argc, char**argv)
{
    bool check   = false;
    bool setconn = false;
    int c;
    const char *args = "p:t:hcd";
    extern char *optarg;

    // Process command line options
    while ((c = getopt(argc, argv, args)) != -1) {
        switch (c) {
            case 'p':
                if (optarg == NULL || *optarg == '\0') {
                    fprintf(stderr, "Illegal conn: %s\n", optarg);
                    exit(EX_USAGE);
                }
                if (smfi_setconn(optarg) == MI_FAILURE) {
                    fprintf(stderr, "smfi_setconn failed\n");
                    exit(EX_SOFTWARE);
                }

                     if (strncasecmp(optarg, "unix:", 5) == 0)  unlink(optarg + 5);
                else if (strncasecmp(optarg, "local:", 6) == 0) unlink(optarg + 6);
                setconn = true;
                break;

            case 't':
                if (optarg == NULL || *optarg == '\0') {
                    fprintf(stderr, "Illegal timeout: %s\n", optarg);
                    exit(EX_USAGE);
                }
                if (smfi_settimeout(atoi(optarg)) == MI_FAILURE) {
                    fprintf(stderr, "smfi_settimeout failed\n");
                    exit(EX_SOFTWARE);
                }
                break;

            case 'c':
                check = true;
                break;

            case 'd':
                debug_syslog = true;
                break;

            case 'h':
            default:
                usage(argv[0]);
                exit(EX_USAGE);
        }
    }

    if (check) {
        CONFIG &dc = *new_conf();
        dumpit(dc);
        return 0;
    }

    if (!setconn) {
        fprintf(stderr, "%s: Missing required -p argument\n", argv[0]);
        usage(argv[0]);
        exit(EX_USAGE);
    }

    if (smfi_register(smfilter) == MI_FAILURE) {
        fprintf(stderr, "smfi_register failed\n");
        exit(EX_UNAVAILABLE);
    }

    // switch to background mode
    if (daemon(1,0) < 0) {
        fprintf(stderr, "daemon() call failed\n");
        exit(EX_UNAVAILABLE);
    }

    // initialize the thread sync objects
    pthread_mutex_init(&config_mutex, 0);
    pthread_mutex_init(&syslog_mutex, 0);
    pthread_mutex_init(&resolve_mutex, 0);

    // load the initial config
    config = new_conf();

    // only create threads after the fork() in daemon
    pthread_t tid;
    if (pthread_create(&tid, 0, config_loader, 0))
        my_syslog("failed to create config loader thread");
    if (pthread_detach(tid))
        my_syslog("failed to detach config loader thread");

    // write the pid
    const char *pidpath = "/var/run/dnsbl.pid";
    unlink(pidpath);
    FILE *f = fopen(pidpath, "w");
    if (f) {
#ifdef linux
        // from a comment in the DCC source code:
        // Linux threads are broken.  Signals given the
        // original process are delivered to only the
        // thread that happens to have that PID.  The
        // sendmail libmilter thread that needs to hear
        // SIGINT and other signals does not, and that breaks
        // scripts that need to stop milters.
        // However, signaling the process group works.
        fprintf(f, "-%d\n", (u_int)getpgrp());
#else
        fprintf(f, "%d\n", (u_int)getpid());
#endif
        fclose(f);
    }

    time_t starting = time(NULL);
    int rc = smfi_main();
    if ((rc != MI_SUCCESS) && (time(NULL) > starting+5*60)) {
        my_syslog("trying to restart after smfi_main()");
        loader_run = false;     // eventually the config loader thread will terminate
        execvp(argv[0], argv);
    }
    exit((rc == MI_SUCCESS) ? 0 : EX_UNAVAILABLE);
}