view src/dnsbl.cpp @ 296:05b604c99e06 stable-6-0-43

allow broken SRS0+ rather than the correct SRS0= tag
author Carl Byington <carl@five-ten-sg.com>
date Fri, 18 Jul 2014 15:08:53 -0700
parents fbbc341001cc
children 13905d36ca82
line wrap: on
line source

/*

Copyright (c) 2013 Carl Byington - 510 Software Group, released under
the GPL version 3 or any later version at your choice available at
http://www.gnu.org/licenses/gpl-3.0.txt

Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its
suppliers.  Inspired by the DCC by Rhyolite Software

-b port  The port used to talk to the dcc interface daemon
-r port  The port used to talk to our internal dns resolver processes
-p port  The port through which the MTA will connect to this milter.
-t sec   The timeout value.
-c       Check the config, and print a copy to stdout. Don't start the
         milter or do anything with the socket.
-s       Stress test by loading and deleting the current config in a loop.
-d level set the debug level
-e f|t   Print the results of looking up from address f and to address
         t in the current config

*/


// from sendmail sample
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <sysexits.h>
#include <unistd.h>

// needed for socket io
#include <sys/ioctl.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/un.h>

// needed for thread
#include <pthread.h>

// needed for std c++ collections
#include <set>
#include <map>
#include <list>

// for the dns resolver
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>

// misc stuff needed here
#include <ctype.h>
#include <syslog.h>
#include <pwd.h>
#include <sys/wait.h>   /* header for waitpid() and various macros */
#include <signal.h>     /* header for signal functions */

#include "includes.h"

#ifndef HAVE_DAEMON
	#include "daemon.h"
	#include "daemon.c"
#endif

#ifndef HAVE_MEMRCHR
    void *memrchr(const void *a, int c, size_t len);
    void *memrchr(const void *a, int c, size_t len) {
        const unsigned char *p = (const unsigned char *)a;
        for (p += len-1; (const void *)p >= a; p--)
            if (*p == c)
                return (void *)p;
        return (void *)0;
    }
#endif

extern "C" {
    sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr);
    sfsistat mlfi_helo(SMFICTX * ctx, char *helohost);
    sfsistat mlfi_envfrom(SMFICTX *ctx, char **argv);
    sfsistat mlfi_envrcpt(SMFICTX *ctx, char **argv);
    sfsistat mlfi_header(SMFICTX* ctx, char* headerf, char* headerv);
    sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len);
    sfsistat mlfi_eom(SMFICTX *ctx);
    sfsistat mlfi_abort(SMFICTX *ctx);
    sfsistat mlfi_close(SMFICTX *ctx);
    void sig_chld(int signo);
}

int  debug_syslog  = 0;
bool syslog_opened = false;
bool use_syslog    = true;  // false to printf
bool loader_run    = true;  // used to stop the config loader thread
CONFIG   *config = NULL;    // protected by the config_mutex
int   generation = 0;       // protected by the config_mutex
const int maxlen = 1000;    // used for snprintf buffers
regex_t srs_pattern;        // used to detect srs coding in mail addresses
regex_t prvs_pattern;       // used to detect prvs coding in mail addresses

pthread_mutex_t  config_mutex;
pthread_mutex_t  syslog_mutex;
pthread_mutex_t  resolve_mutex;
pthread_mutex_t  fd_pool_mutex;
pthread_mutex_t  rate_mutex;

std::set<int>    fd_pool;
int         NULL_SOCKET              = -1;
const       time_t ERROR_SOCKET_TIME = 60;          // number of seconds between attempts to open a socket to the dns resolver process
const char *resolver_port            = NULL;        // unix domain socket to talk to the dns resolver process
int         resolver_socket          = NULL_SOCKET; // socket used to listen for resolver requests
const char *dccifd_port              = NULL;        // unix domain socket to talk to the dcc interface daemon
time_t      last_error_time     = 0;
int         resolver_sock_count = 0;        // protected with fd_pool_mutex
int         resolver_pool_size  = 0;        // protected with fd_pool_mutex
rates       rcpt_hourly_counts;             // protected with rate_mutex
rates       rcpt_daily_counts;              // protected with rate_mutex
auth_addresses auth_hourly_addresses;       // protected with rate_mutex
auth_addresses auth_daily_addresses;        // protected with rate_mutex


struct ns_map {
    // all the strings are owned by the keys/values in the ns_host string map
    string_map  ns_host;    // nameserver name -> host name that uses this name server
    ns_mapper   ns_ip;      // nameserver name -> ipv4 address of the name server
    ~ns_map();
    void add(const char *name, const char *refer);
};


ns_map::~ns_map() {
    for (string_map::iterator i=ns_host.begin(); i!=ns_host.end(); i++) {
        const char *x = (*i).first;
        const char *y = (*i).second;
        free((void*)x);
        free((void*)y);
    }
    ns_ip.clear();
    ns_host.clear();
}


void ns_map::add(const char *name, const char *refer) {
    string_map::iterator i = ns_host.find(name);
    if (i != ns_host.end()) return;
    char *x = strdup(name);
    char *y = strdup(refer);
    ns_ip[x]   = 0;
    ns_host[x] = y;

}

// packed structure to allow a single socket write to dump the
// length and the following answer. The packing attribute is gcc specific.
struct glommer {
    size_t  length;
    #ifdef NS_PACKETSZ
        u_char answer[NS_PACKETSZ*4];   // with a resolver, we return resolver answers
    #else
        uint32_t answer;                // without a resolver, we return a single ipv4 address in network byte order, 0 == no answer
    #endif
} __attribute__ ((packed));


////////////////////////////////////////////////
// helper to manipulate recipient counts
//
void incr_rcpt_count(const char *user, int &hourly, int &daily);
void incr_rcpt_count(const char *user, int &hourly, int &daily) {
    pthread_mutex_lock(&rate_mutex);
        rates::iterator i = rcpt_hourly_counts.find(user);
        hourly = 1;
        if (i == rcpt_hourly_counts.end()) {
            user = strdup(user);
            rcpt_hourly_counts[user] = hourly;
        }
        else {
            hourly = ++((*i).second);
        }

        rates::iterator j = rcpt_daily_counts.find(user);
        daily = 1;
        if (j == rcpt_daily_counts.end()) {
            user = strdup(user);
            rcpt_daily_counts[user] = daily;
        }
        else {
            daily = ++((*j).second);
        }
    pthread_mutex_unlock(&rate_mutex);
}


void add_auth_address(const char *user, int &hourly, int &daily, int32_t ip);
void add_auth_address(const char *user, int &hourly, int &daily, int32_t ip) {
    pthread_mutex_lock(&rate_mutex);
        auth_addresses::iterator i = auth_hourly_addresses.find(user);
        if (i == auth_hourly_addresses.end()) {
            user = strdup(user);
            auth_hourly_addresses[user] = new int32_t_set;
            auth_hourly_addresses[user]->insert(ip);
            hourly = 1;
        }
        else {
            int32_t_set::iterator k = ((*i).second)->find(ip);
            if (k == ((*i).second)->end()) ((*i).second)->insert(ip);
            hourly = ((*i).second)->size();
        }

        auth_addresses::iterator j = auth_daily_addresses.find(user);
        if (j == auth_daily_addresses.end()) {
            user = strdup(user);
            auth_daily_addresses[user] = new int32_t_set;
            auth_daily_addresses[user]->insert(ip);
            daily = 1;
        }
        else {
            int32_t_set::iterator k = ((*j).second)->find(ip);
            if (k == ((*j).second)->end()) ((*j).second)->insert(ip);
            daily = ((*j).second)->size();
        }
    pthread_mutex_unlock(&rate_mutex);
}

////////////////////////////////////////////////
// helper to discard the strings held by a context_map
//
void discard(context_map &cm);
void discard(context_map &cm) {
    for (context_map::iterator i=cm.begin(); i!=cm.end(); i++) {
        const char *x = (*i).first;
        free((void*)x);
    }
    cm.clear();
}


////////////////////////////////////////////////
// helper to register a string in a context_map
//
void register_string(context_map &cm, const char *name, CONTEXT *con);
void register_string(context_map &cm, const char *name, CONTEXT *con) {
    context_map::iterator i = cm.find(name);
    if (i != cm.end()) return;
    char *x = strdup(name);
    cm[x] = con;
}


////////////////////////////////////////////////
// disconnect the fd from the dns resolver process
//
void my_disconnect(int sock, bool decrement = true);
void my_disconnect(int sock, bool decrement) {
    if (sock != NULL_SOCKET) {
        if (decrement) {
            pthread_mutex_lock(&fd_pool_mutex);
                resolver_sock_count--;
            pthread_mutex_unlock(&fd_pool_mutex);
        }
        shutdown(sock, SHUT_RDWR);
        close(sock);
    }
}


////////////////////////////////////////////////
// return fd connected to the dns resolver process
//
int my_connect();
int my_connect() {
    // if we have had recent errors, don't even try to open the socket
    if ((time(NULL) - last_error_time) < ERROR_SOCKET_TIME) return NULL_SOCKET;

    // nothing recent, maybe this time it will work
    int sock = NULL_SOCKET;
    sockaddr_un server;
    memset(&server, '\0', sizeof(server));
    server.sun_family = AF_UNIX;
    strncpy(server.sun_path, resolver_port, sizeof(server.sun_path)-1);
    sock = socket(AF_UNIX, SOCK_STREAM, 0);
    if (sock != NULL_SOCKET) {
        bool rc = (connect(sock, (sockaddr *)&server, sizeof(server)) == 0);
        if (!rc) {
            my_disconnect(sock, false);
            sock = NULL_SOCKET;
            last_error_time = time(NULL);
        }
    }
    else last_error_time = time(NULL);
    if (sock != NULL_SOCKET) {
        pthread_mutex_lock(&fd_pool_mutex);
            resolver_sock_count++;
        pthread_mutex_unlock(&fd_pool_mutex);
    }
    return sock;
}


////////////////////////////////////////////////
//  ask a dns question and get an A record answer in network byte order
//  we don't try very hard, just using the default resolver retry settings.
//  If we cannot get an answer, we just accept the mail.
//
//
uint32_t dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers);
uint32_t dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers) {
    // tell sendmail we are still working
    #if _FFR_SMFI_PROGRESS
        if (priv.eom) smfi_progress(priv.ctx);
    #endif

    // this part can be done without locking the resolver mutex. Each
    // milter thread is talking over its own socket to a separate resolver
    // process, which does the actual dns resolution.
    if (priv.err) return 0; // cannot ask more questions on this socket.
    if (maybe_ip) {
        // might be a bare ip address, try this first to avoid dns lookups that may not be needed
        in_addr ip;
        if (inet_aton(question, &ip)) {
            return ip.s_addr;
        }
    }
    int n = strlen(question);
    if (question[n-1] == '.') {
        priv.my_write(question, n+1);   // write the question including the null terminator
    }
    else {
        priv.my_write(question, n);     // write the question
        priv.my_write(".", 2);          // and the fully qualified . terminator and null string terminator
    }
    glommer glom;
    char *buf = (char *)&glom;
    priv.my_read(buf, sizeof(glom.length));
    buf += sizeof(glom.length);
    #ifdef RESOLVER_DEBUG
        char text[1000];
        snprintf(text, sizeof(text), "dns_interface() wrote question %s and has answer length %d", question, glom.length);
        my_syslog(text);
    #endif
    if (glom.length == 0) return 0;
    if (glom.length > sizeof(glom.answer)) {
        priv.err = true;
        return 0;  // cannot process overlarge answers
    }
    priv.my_read(buf, glom.length);

#ifdef NS_PACKETSZ
    // now we need to lock the resolver mutex to keep the milter threads from
    // stepping on each other while parsing the dns answer.
    uint32_t ret_address = 0;
    pthread_mutex_lock(&resolve_mutex);
        // parse the answer
        ns_msg handle;
        ns_rr  rr;
        if (ns_initparse(glom.answer, glom.length, &handle) == 0) {
            // look for ns names
            if (nameservers) {
                ns_map &ns = *nameservers;
                int rrnum = 0;
                while (ns_parserr(&handle, ns_s_ns, rrnum++, &rr) == 0) {
                    if (ns_rr_type(rr) == ns_t_ns) {
                        char nam[NS_MAXDNAME+1];
                        char         *n = nam;
                        const u_char *p = ns_rr_rdata(rr);
                        while (((n-nam) < NS_MAXDNAME) && ((size_t)(p-glom.answer) < glom.length) && *p) {
                            size_t s = *(p++);
                            if (s > 191) {
                                // compression pointer
                                s = (s-192)*256 + *(p++);
                                if (s >= glom.length) break; // pointer outside bounds of answer
                                p = glom.answer + s;
                                s = *(p++);
                            }
                            if (s > 0) {
                                if ((size_t)(n-nam)         >= (NS_MAXDNAME-s)) break;  // destination would overflow name buffer
                                if ((size_t)(p-glom.answer) >= (glom.length-s)) break;  // source outside bounds of answer
                                memcpy(n, p, s);
                                n += s;
                                p += s;
                                *(n++) = '.';
                            }
                        }
                        if (n-nam) n--;             // remove trailing .
                        *n = '\0';                  // null terminate it
                        ns.add(nam, question);      // ns host to lookup later
                    }
                }
                rrnum = 0;
                while (ns_parserr(&handle, ns_s_ar, rrnum++, &rr) == 0) {
                    if (ns_rr_type(rr) == ns_t_a) {
                        char* nam = (char*)ns_rr_name(rr);
                        ns_mapper::iterator i = ns.ns_ip.find(nam);
                        if (i != ns.ns_ip.end()) {
                            // we want this ip address
                            uint32_t address;
                            memcpy(&address, ns_rr_rdata(rr), sizeof(address));
                            ns.ns_ip[nam] = address;
                        }
                    }
                }
            }
            int rrnum = 0;
            while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) {
                if (ns_rr_type(rr) == ns_t_a) {
                    uint32_t address;
                    memcpy(&address, ns_rr_rdata(rr), sizeof(address));
                    ret_address = address;
                }
            }
        }
    pthread_mutex_unlock(&resolve_mutex);
    #ifdef RESOLVER_DEBUG
        snprintf(text, sizeof(text), "dns_interface() found ip %d", ret_address);
        my_syslog(text);
    #endif
    return ret_address;
#else
    return glom.answer;
#endif
}


////////////////////////////////////////////////
//  lookup a hostname on the uribl
//
//  if we find hostname on the uribl, return true and point found to hostname
//  as a string registered in hosts.
//  otherwise, return false and preserve the value of found.
//
bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ;
bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) {
    char buf[maxlen];
    snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix);
    uint32_t ip = ntohl(dns_interface(priv, buf, false, NULL));
    if (ip and (ip != 0x7f000000)) {
        if (debug_syslog > 2) {
            char tmp[maxlen];
            snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix);
            my_syslog(tmp);
        }
        found = register_string(hosts, hostname);
        return true;
    }
    return false;
}


////////////////////////////////////////////////
//  uribl checker
//  -------------
// hostname MUST not have a trailing dot. Find the tld part of
// the hostname, and add one more level. If that is listed on
// the uribl, return true and point found to the part of the
// hostname that we found as a string registered in hosts.
// Otherwise, return false and preserve the value of found.
//
bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ;
bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) {
    in_addr ip;
    if (inet_aton(hostname, &ip)) return false; // don't check ip addresses in uribls
    const char* components[maxlen];
    int n = 0;  // number of components in the hostname
    while (n < maxlen) {
        components[n++] = hostname;
        const char *c = strchr(hostname, '.');
        if (!c) break;
        hostname = c+1;
    }
    string_set *tlds     = priv.memory->get_tlds();
    string_set *tldwilds = priv.memory->get_tldwilds();
    string_set *tldnots  = priv.memory->get_tldnots();
    string_set::iterator xtlds     = tlds->end();
    string_set::iterator xtldwilds = tldwilds->end();
    string_set::iterator xtldnots  = tldnots->end();
    for (int i=max(0,n-4); i<n; i++) {
        const char* name = components[i];
        bool rc = false;
        string_set::iterator tt = tldnots->find(name);
        if (tt != xtldnots) {
            rc = true;
        }
        else {
            tt = tldwilds->find(name);
            if (tt != xtldwilds) {
                if (i > 1) {
                    rc = true;
                    name = components[i-2];
                }
                else return false;
            }
            else {
                tt = tlds->find(name);
                if (tt != xtlds) {
                    if (i > 0) {
                        rc = true;
                        name = components[i-1];
                    }
                    else return false;
                }
            }
        }
        if (rc) {
            return uriblookup(priv, hosts, name, found);
        }
    }
    return false;
}


mlfiPriv::mlfiPriv() {
    pthread_mutex_lock(&config_mutex);
        pc = config;
        pc->reference_count++;
    pthread_mutex_unlock(&config_mutex);
    get_fd();
    ctx                     = NULL;
    eom                     = false;
    ip                      = 0;
    helo                    = NULL;
    mailaddr                = NULL;
    queueid                 = NULL;
    authenticated           = NULL;
    client_name             = NULL;
    client_dns_name         = NULL;
    client_dns_forged       = false;
    host_uribl              = NULL;
    helo_uribl              = false;
    client_uribl            = false;
    from_uribl              = false;
    have_whites             = false;
    only_whites             = true;
    want_spamassassin       = false;
    want_dccgrey            = false;
    want_dccbulk            = false;
    allow_autowhitelisting  = true;
    content_context         = NULL;
    memory                  = NULL;
    scanner                 = NULL;
    content_suffix          = NULL;
    content_message         = NULL;
    uribl_suffix            = NULL;
    uribl_message           = NULL;
    content_host_ignore     = NULL;
    assassin                = NULL;
    dccifd                  = NULL;
}

mlfiPriv::~mlfiPriv() {
    return_fd();
    pthread_mutex_lock(&config_mutex);
        pc->reference_count--;
        bool last = (!pc->reference_count) && (pc != config);
    pthread_mutex_unlock(&config_mutex);
    if (last) delete pc;  // free this config, since we were the last reference to it
    if (helo) free((void*)helo);
    reset(true);
}

void mlfiPriv::reset(bool final) {
    while (!delayer.empty()) {
        DELAYWHITEP  dwp = delayer.front();
        const char *loto = dwp->get_loto();
        if (loto) free((void*)loto);
        delete dwp;
        delayer.pop_front();
    }
    if (mailaddr)        free((void*)mailaddr);
    if (queueid)         free((void*)queueid);
    if (authenticated)   free((void*)authenticated);
    if (client_name)     free((void*)client_name);
    if (client_dns_name) free((void*)client_dns_name);
    discard(hosts_uribl);
    delayer.clear();
    discard(env_to);
    if (memory)   delete memory;
    if (scanner)  delete scanner;
    if (assassin) delete assassin;
    if (dccifd)   delete dccifd;
    if (!final) {
        ctx                     = NULL;
        eom                     = false;
        mailaddr                = NULL;
        queueid                 = NULL;
        authenticated           = NULL;
        client_name             = NULL;
        client_dns_name         = NULL;
        host_uribl              = NULL;
        helo_uribl              = false;
        client_uribl            = false;
        from_uribl              = false;
        have_whites             = false;
        only_whites             = true;
        want_spamassassin       = false;
        want_dccgrey            = false;
        want_dccbulk            = false;
        allow_autowhitelisting  = true;
        content_context         = NULL;
        memory                  = NULL;
        scanner                 = NULL;
        content_suffix          = NULL;
        content_message         = NULL;
        uribl_suffix            = NULL;
        uribl_message           = NULL;
        content_host_ignore     = NULL;
        assassin                = NULL;
        dccifd                  = NULL;
    }
}

void mlfiPriv::get_fd() {
    err = true;
    fd  = NULL_SOCKET;
    int result = pthread_mutex_lock(&fd_pool_mutex);
    if (!result) {
        std::set<int>::iterator i;
        i = fd_pool.begin();
        if (i != fd_pool.end()) {
            // have at least one fd in the pool
            err = false;
            fd  = *i;
            fd_pool.erase(fd);
            resolver_pool_size--;
            pthread_mutex_unlock(&fd_pool_mutex);
        }
        else {
            // pool is empty, get a new fd
            pthread_mutex_unlock(&fd_pool_mutex);
            fd  = my_connect();
            err = (fd == NULL_SOCKET);
        }
    }
    else {
        // cannot lock the pool, just get a new fd
        fd  = my_connect();
        err = (fd == NULL_SOCKET);
    }
}

void mlfiPriv::return_fd() {
    if (err) {
        // this fd got a socket error, so close it, rather than returning it to the pool
        my_disconnect(fd);
    }
    else {
        int result = pthread_mutex_lock(&fd_pool_mutex);
        if (!result) {
            if ((resolver_sock_count > resolver_pool_size*5) || (resolver_pool_size < 5)) {
                // return the fd to the pool
                fd_pool.insert(fd);
                resolver_pool_size++;
                pthread_mutex_unlock(&fd_pool_mutex);
            }
            else {
                // more than 20% of the open resolver sockets are in the pool, and the
                // pool as at least 5 sockets. that is enough, so just close this one.
                pthread_mutex_unlock(&fd_pool_mutex);
                my_disconnect(fd);
            }
        }
        else {
            // could not lock the pool, so just close the fd
            my_disconnect(fd);
        }
    }
}

size_t mlfiPriv::my_write(const char *buf, size_t len) {
    if (err) return 0;
    size_t rs = 0;
    while (len) {
        size_t ws = write(fd, buf, len);
        if (ws > 0) {
            rs  += ws;
            len -= ws;
            buf += ws;
        }
        else {
            // peer closed the socket!
            rs = 0;
            err = true;
            break;
        }
    }
    return rs;
}

size_t mlfiPriv::my_read(char *buf, size_t len) {
    if (err) return 0;
    size_t rs = 0;
    while (len) {
        size_t ws = read(fd, buf, len);
        if (ws > 0) {
            rs  += ws;
            len -= ws;
            buf += ws;
        }
        else {
            // peer closed the socket!
            rs = 0;
            err = true;
            break;
        }
    }
    return rs;
}

void mlfiPriv::need_content_filter(const char *rcpt, CONTEXT &con) {
    if (!memory) {
        // first recipient that needs content filtering sets
        // some of the content filtering parameters
        memory        = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_tldwilds(), con.get_content_tldnots());
        scanner       = new url_scanner(memory);
        content_suffix      = con.get_content_suffix();
        content_message     = con.get_content_message();
        uribl_suffix        = con.get_uribl_suffix();
        uribl_message       = con.get_uribl_message();
        content_host_ignore = &con.get_content_host_ignore();
        // if we are using uribl, test helo and client names here
        if (uribl_suffix) {
            if (helo) {
                helo_uribl = check_uribl(*this, hosts_uribl, helo, host_uribl);
            }
            if (client_dns_name && !helo_uribl) {
                client_uribl = check_uribl(*this, hosts_uribl, client_dns_name, host_uribl);
            }
            if (mailaddr && !client_uribl) {
                const char *f = strchr(mailaddr, '@');
                if (f) from_uribl = check_uribl(*this, hosts_uribl, f+1, host_uribl);
            }
        }
    }
}


mlfiPriv* fetch_priv_from_ctx(SMFICTX *ctx);
mlfiPriv* fetch_priv_from_ctx(SMFICTX *ctx)
{
    mlfiPriv *priv = (struct mlfiPriv *)smfi_getpriv(ctx);
    priv->ctx = ctx;
    return priv;
}
#define MLFIPRIV    fetch_priv_from_ctx(ctx)



////////////////////////////////////////////////
// syslog a message
//
void my_syslog(mlfiPriv *priv, const char *text) {
    char buf[maxlen];
    if (priv) {
        snprintf(buf, sizeof(buf), "%s: %s", priv->queueid, text);
        text = buf;
    }
    if (use_syslog) {
        pthread_mutex_lock(&syslog_mutex);
            if (!syslog_opened) {
                openlog("dnsbl", LOG_PID, LOG_MAIL);
                syslog_opened = true;
            }
            syslog(LOG_NOTICE, "%s", text);
        pthread_mutex_unlock(&syslog_mutex);
    }
    else {
        printf("%s \n", text);
    }
}

void my_syslog(mlfiPriv *priv, const string text) {
    if (debug_syslog > 3) {
        char buf[maxlen];
        strncpy(buf, text.c_str(), sizeof(buf));
        buf[maxlen-1] = '\0';   // ensure null termination
        my_syslog(priv, buf);
    }
}

void my_syslog(const char *text) {
    my_syslog(NULL, text);
}


////////////////////////////////////////////////
//  read a resolver request from the socket, process it, and
//  write the result back to the socket.

void process_resolver_requests(int socket);
void process_resolver_requests(int socket) {
#ifdef NS_MAXDNAME
    char question[NS_MAXDNAME];
#else
    char question[1000];
#endif
    glommer glom;

    int maxq = sizeof(question);
    while (true) {
        // read a question
        int rs = 0;
        while (rs < maxq) {
            int ns = read(socket, question+rs, maxq-rs);
            if (ns > 0) {
                rs += ns;
                if (question[rs-1] == '\0') {
                    // last byte read was the null terminator, we are done
                    break;
                }
            }
            else {
                // peer closed the socket
                #ifdef RESOLVER_DEBUG
                    my_syslog("process_resolver_requests() peer closed socket while reading question");
                #endif
                shutdown(socket, SHUT_RDWR);
                close(socket);
                return;
            }
        }
        question[rs-1] = '\0';  // ensure null termination

        // find the answer
#ifdef NS_PACKETSZ
        #ifdef RESOLVER_DEBUG
            char text[1000];
            snprintf(text, sizeof(text), "process_resolver_requests() has a question %s", question);
            my_syslog(text);
        #endif
        int res_result = res_search(question, ns_c_in, ns_t_a, glom.answer, sizeof(glom.answer));
        if (res_result < 0) glom.length = 0;   // represent all errors as zero length answers
        else                glom.length = (size_t)res_result;
#else
        glom.length = sizeof(glom.answer);
        glom.answer = 0;
        struct hostent *host = gethostbyname(question);
        if (host && (host->h_addrtype == AF_INET)) {
            memcpy(&glom.answer, host->h_addr, sizeof(glom.answer));
        }
#endif

        // write the answer
        char *buf = (char *)&glom;
        int   len = glom.length + sizeof(glom.length);
        #ifdef RESOLVER_DEBUG
            snprintf(text, sizeof(text), "process_resolver_requests() writing answer length %d for total %d", glom.length, len);
            my_syslog(text);
        #endif
        int    ws = 0;
        while (len > ws) {
            int ns = write(socket, buf+ws, len-ws);
            if (ns > 0) {
                ws += ns;
            }
            else {
                // peer closed the socket!
                #ifdef RESOLVER_DEBUG
                    my_syslog("process_resolver_requests() peer closed socket while writing answer");
                #endif
                shutdown(socket, SHUT_RDWR);
                close(socket);
                return;
            }
        }
    }
}


////////////////////////////////////////////////
//  check a single dns list, return ip address in network byte order
//
uint32_t check_single(mlfiPriv &priv, int32_t ip, const char *suffix);
uint32_t check_single(mlfiPriv &priv, int32_t ip, const char *suffix) {
    // make a dns question
    const u_char *src = (const u_char *)&ip;
    if (src[0] == 127) return 0;    // don't do dns lookups on localhost
    if (src[0] == 10)  return 0;    // don't do dns lookups on rfc1918 space
    if ((src[0] == 192) && (src[1] == 168)) return 0;
    if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return 0;
#ifdef NS_MAXDNAME
    char question[NS_MAXDNAME];
#else
    char question[1000];
#endif
    snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix);
    // ask the question, if we get an A record it implies a blacklisted ip address
    return dns_interface(priv, question, false, NULL);
}


////////////////////////////////////////////////
//  check a single dnsbl
//
bool check_single(mlfiPriv &priv, int32_t ip, DNSBL &bl);
bool check_single(mlfiPriv &priv, int32_t ip, DNSBL &bl) {
    return check_single(priv, ip, bl.suffix);
}


////////////////////////////////////////////////
//  check a single dnswl
//
bool check_single(mlfiPriv &priv, int32_t ip, DNSWL &wl);
bool check_single(mlfiPriv &priv, int32_t ip, DNSWL &wl) {
    uint32_t r = ntohl(check_single(priv, ip, wl.suffix));
    uint32_t v = (uint32_t)0x7f000000;
    uint32_t m = (uint32_t)0xffff0000;
    uint32_t m2 = (uint32_t)0x000000ff;
    if ((r & m) == v) {
        uint32_t l = r & m2;
        if ((int)l >= wl.level) return true;
    }
    return false;
}


////////////////////////////////////////////////
//  check the dnsbls specified for this recipient
//
bool check_dnsbl(mlfiPriv &priv, dnsblp_list &dnsbll, DNSBLP &rejectlist);
bool check_dnsbl(mlfiPriv &priv, dnsblp_list &dnsbll, DNSBLP &rejectlist) {
    for (dnsblp_list::iterator i=dnsbll.begin(); i!=dnsbll.end(); i++) {
        DNSBLP dp = *i;     // non null by construction
        bool st;
        map<DNSBLP, bool>::iterator f = priv.checked_black.find(dp);
        if (f == priv.checked_black.end()) {
            // have not checked this list yet
            st = check_single(priv, priv.ip, *dp);
            rejectlist = dp;
            priv.checked_black[dp] = st;
        }
        else {
            st = (*f).second;
            rejectlist = (*f).first;
        }
        if (st) return st;
    }
    return false;
}


////////////////////////////////////////////////
//  check the dnswls specified for this recipient
//
bool check_dnswl(mlfiPriv &priv, dnswlp_list &dnswll, DNSWLP &acceptlist);
bool check_dnswl(mlfiPriv &priv, dnswlp_list &dnswll, DNSWLP &acceptlist) {
    for (dnswlp_list::iterator i=dnswll.begin(); i!=dnswll.end(); i++) {
        DNSWLP dp = *i;     // non null by construction
        bool st;
        map<DNSWLP, bool>::iterator f = priv.checked_white.find(dp);
        if (f == priv.checked_white.end()) {
            // have not checked this list yet
            st = check_single(priv, priv.ip, *dp);
            acceptlist = dp;
            priv.checked_white[dp] = st;
        }
        else {
            st = (*f).second;
            acceptlist = (*f).first;
        }
        if (st) return st;
    }
    return false;
}


////////////////////////////////////////////////
//  check the hosts from the body against the content filter and uribl dnsbls
//
//
bool check_hosts(mlfiPriv &priv, bool random, int limit, const char *&msg, const char *&host, int32_t &ip, const char *&found);
bool check_hosts(mlfiPriv &priv, bool random, int limit, const char *&msg, const char *&host, int32_t &ip, const char *&found) {
    found = NULL;   // normally ip address style
    if (!priv.content_suffix && !priv.uribl_suffix) return false;   // nothing to check
    string_set &hosts  = priv.memory->get_hosts();
    string_set &ignore = *priv.content_host_ignore;

    int count = 0;
    int   cnt = hosts.size();   // number of hosts we could look at
    int32_t_set ips;
    ns_map  nameservers;
    for (string_set::iterator i=hosts.begin(); i!=hosts.end(); i++) {
        host = *i;  // a reference into hosts, which will live until this smtp transaction is closed

        // don't bother looking up hosts on the ignore list
        string_set::iterator j = ignore.find(host);
        if (j != ignore.end()) continue;

        // try to only look at limit/cnt fraction of the available cnt host names in random mode
        if ((cnt > limit) && (limit > 0) && random) {
            int r = rand() % cnt;
            if (r >= limit) {
                if (debug_syslog > 2) {
                    char buf[maxlen];
                    snprintf(buf, sizeof(buf), "host %s skipped", host);
                    my_syslog(&priv, buf);
                }
                continue;
            }
        }
        count++;
        ip = dns_interface(priv, host, true, &nameservers);
        if (debug_syslog > 2) {
            char buf[maxlen];
            if (ip) {
                char adr[sizeof "255.255.255.255   "];
                adr[0] = '\0';
                inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
                snprintf(buf, sizeof(buf), "host %s found at %s", host, adr);
            }
            else {
                snprintf(buf, sizeof(buf), "host %s not found", host);
            }
            my_syslog(&priv, buf);
        }
        if (ip) {
            int32_t_set::iterator i = ips.find(ip);
            if (i == ips.end()) {
                // we haven't looked this up yet
                ips.insert(ip);
                // check dnsbl style list
                if (priv.content_suffix && check_single(priv, ip, priv.content_suffix)) {
                    msg = priv.content_message;
                    return true;
                }
                // Check uribl & surbl style list
                if (priv.uribl_suffix && check_uribl(priv, hosts, host, found)) {
                    msg = priv.uribl_message;
                    return true;
                }
            }
        }
    }
    limit *= 4;   // allow average of 3 ns per host name
    for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) {
        count++;
        if ((count > limit) && (limit > 0)) return false;   // too many name servers to check them all
        host = (*i).first;  // a transient reference that needs to be replaced before we return it
        ip   = (*i).second;
        if (!ip) ip = dns_interface(priv, host, false, NULL);
        if (debug_syslog > 2) {
            char buf[maxlen];
            if (ip) {
                char adr[sizeof "255.255.255.255   "];
                adr[0] = '\0';
                inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
                snprintf(buf, sizeof(buf), "ns %s found at %s", host, adr);
            }
            else {
                snprintf(buf, sizeof(buf), "ns %s not found", host);
            }
            my_syslog(&priv, buf);
        }
        if (ip) {
            int32_t_set::iterator i = ips.find(ip);
            if (i == ips.end()) {
                ips.insert(ip);
                if (check_single(priv, ip, priv.content_suffix)) {
                    msg = priv.content_message;
                    string_map::iterator j = nameservers.ns_host.find(host);
                    if (j != nameservers.ns_host.end()) {
                        const char *refer = (*j).second;
                        char buf[maxlen];
                        snprintf(buf, sizeof(buf), "%s with nameserver %s", refer, host);
                        host = register_string(hosts, buf);    // put a copy into hosts, and return that reference
                    }
                    else {
                        host = register_string(hosts, host);   // put a copy into hosts, and return that reference
                    }
                    return true;
                }
            }
        }
    }
    return false;
}


////////////////////////////////////////////////
//
// this email address is passed in from sendmail, and will normally be
// enclosed in <>.  I think older versions of sendmail supplied the <>
// wrapper if the mail client did not, but the current version does not do
// that.  So the <> wrapper is now optional.  It may have mixed case, just
// as the mail client sent it.  We dup the string and convert the duplicate
// to lower case. Some clients enclose the entire address in single quotes,
// so we strip those as well. We also remove the SRS and prvs coding.
//
const char *to_lower_string(const char *email);
const char *to_lower_string(const char *email) {
    if (!email) return strdup("<>");
    int n = strlen(email);
    if (n == 0) return strdup("<>");
    if (email[0] == '<') {
        // assume it also ends with >
        n -= 2;
        if (n < 1) return strdup("<>");
        email++;
    }
    if ((email[0] == '\'') && (email[n-1] == '\'') && (n > 2)) {
        n -= 2;
        email++;
    }
    char *key = strdup(email);
    key[n] = '\0';
    for (int i=0; i<n; i++) key[i] = tolower(key[i]);
    if ((n > 14) && (strncmp(key, "srs", 3) == 0)) {
        // might have srs coding to be removed
        const int nmatch = 7;
        regmatch_t match[nmatch];
        if (0 == regexec(&srs_pattern, key, nmatch, match, 0)) {
            int s4 = match[5].rm_so;    // domain
            int e4 = match[5].rm_eo;
            int s5 = match[6].rm_so;    // user
            int e5 = match[6].rm_eo;
            if ((s4 != -1) && (s5 != -1)) {
                char *newkey = strdup(key);   // large enough
                key[e4] = '\0';
                key[e5] = '\0';
                strcpy(newkey, key+s5); // user
                strcat(newkey, "@");    // @
                strcat(newkey, key+s4); // domain
                free(key);
                key = newkey;
            }
        }
    }
    if ((n > 7) && (strncmp(key, "prvs", 4) == 0)) {
        // might have prvs coding to be removed
        const int nmatch = 3;
        regmatch_t match[nmatch];
        if (0 == regexec(&prvs_pattern, key, nmatch, match, 0)) {
            int s2 = match[2].rm_so;    // user@domain
            if (s2 != -1) {
                char *newkey = strdup(key+s2);  // user@domain
                free(key);
                key = newkey;
            }
        }
    }
    return key;
}


////////////////////////////////////////////////
// start of sendmail milter interfaces
//
sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr)
{
    // allocate some private memory
    mlfiPriv *priv = new mlfiPriv;
    if (hostaddr && (hostaddr->sa_family == AF_INET)) {
        priv->ip = ((struct sockaddr_in *)hostaddr)->sin_addr.s_addr;
    }
    // save the private data
    smfi_setpriv(ctx, (void*)priv);

    // continue processing
    return SMFIS_CONTINUE;
}

sfsistat mlfi_helo(SMFICTX * ctx, char *helohost)
{
    mlfiPriv &priv = *MLFIPRIV;
    priv.helo      = strdup(helohost);
    return SMFIS_CONTINUE;
}

sfsistat mlfi_envfrom(SMFICTX *ctx, char **from)
{
    mlfiPriv &priv     = *MLFIPRIV;
    CONFIG &dc         = *priv.pc;
    priv.mailaddr      = to_lower_string(from[0]);
    priv.queueid       = strdup(smfi_getsymval(ctx, (char*)"i"));
    priv.authenticated = smfi_getsymval(ctx, (char*)"{auth_authen}");
    priv.client_name   = smfi_getsymval(ctx, (char*)"_");
    if (!priv.helo)         priv.helo          = strdup("unknown");
    if (priv.authenticated) priv.authenticated = strdup(priv.authenticated);
    if (priv.client_name) {
        priv.client_name = strdup(priv.client_name);
        const char *p = strstr(priv.client_name, " [");
        if (p) {
            uint pp = p - priv.client_name;
            priv.client_dns_name = strdup(priv.client_name);
            priv.client_dns_name[pp] = '\0';
            //char text[500];
            //snprintf(text, sizeof(text), "found simple dns client name %s", priv.client_dns_name);
            //my_syslog(text);
        }
        p = strstr(priv.client_name, "] (may be forged)");
        if (p) {
            priv.client_dns_forged = true;
            if (priv.client_dns_name) {
                char text[500];
                snprintf(text, sizeof(text), "forged dns client name %s", priv.client_dns_name);
                my_syslog(text);
            }
        }
    }
    if (spamc != spamc_empty) {
        priv.assassin  = new SpamAssassin(&priv, priv.ip, priv.helo, priv.mailaddr, priv.queueid);
    }
    if (dccifd_port) {
        priv.dccifd = new DccInterface(dccifd_port, &priv, priv.ip, priv.helo, priv.mailaddr);
    }
    if (priv.authenticated) {
        int hourly, daily;
        add_auth_address(priv.authenticated, hourly, daily, priv.ip);
        int h_limit = dc.default_context->find_address_limit(priv.authenticated);
        int d_limit = dc.default_context->get_daily_address_multiple() * h_limit;
        if (debug_syslog > 1) {
            char msg[maxlen];
            snprintf(msg, sizeof(msg), "connect for %s (%d %d addresses, %d %d limits)", priv.authenticated, hourly, daily, h_limit, d_limit);
            my_syslog(&priv, msg);
        }
        if ((hourly > h_limit) || (daily > d_limit)){
            smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", (char*)"unique connection ip address limit exceeded");
            return SMFIS_REJECT;
        }
    }
    return SMFIS_CONTINUE;
}

sfsistat mlfi_envrcpt(SMFICTX *ctx, char **rcpt)
{
    DNSBLP rejectlist    = NULL;   // list that caused the reject
    mlfiPriv &priv       = *MLFIPRIV;
    CONFIG &dc           = *priv.pc;
    const char *rcptaddr = rcpt[0];
    const char *loto     = to_lower_string(rcptaddr);
    bool self = (strcmp(loto, priv.mailaddr) == 0);

    // some version of sendmail allowed rcpt to:<> and passed it thru to the milters
    if (strcmp(loto, "<>") == 0) {
        smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", (char*)"bogus recipient");
        free((void*)loto);      // cppcheck static analysis found memory leak
        return SMFIS_REJECT;
    }
    // priv.mailaddr sending original message to loto
    CONTEXT          &con = *(dc.find_context(loto)->find_context(priv.mailaddr));
    VERIFYP           ver = con.find_verify(loto);
    const char *fromvalue = con.find_from(priv.mailaddr, true, priv.queueid);
    // tell spam assassin and dccifd about this recipient
    if (priv.assassin) priv.assassin->mlfi_envrcpt(ctx, loto);
    if (priv.dccifd)   priv.dccifd->mlfi_envrcpt(ctx, loto, con.get_grey() && !priv.authenticated);
    // loto sending a reply back to priv.mailaddr
    CONTEXT    &con2 = *(dc.find_context(priv.mailaddr)->find_context(loto));
    const char *replyvalue = con2.find_from(loto);
    if (debug_syslog > 1) {
        char buf[maxlen];
        char buf2[maxlen];
        char msg[maxlen];
        snprintf(msg, sizeof(msg), "from <%s> to <%s> using context %s state %s reply context %s state %s", priv.mailaddr, loto, con.get_full_name(buf,maxlen), fromvalue, con2.get_full_name(buf2,maxlen), replyvalue);
        my_syslog(&priv, msg);
    }
    free((void*)loto);
    status st;
    if (replyvalue == token_black) {
        smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", (char*)"recipient can not reply due to blacklisting");
        return SMFIS_REJECT;
    }
    if (priv.authenticated) {
        int hourly, daily;
        incr_rcpt_count(priv.authenticated, hourly, daily);
        int h_limit = dc.default_context->find_rate_limit(priv.authenticated);
        int d_limit = dc.default_context->get_daily_rate_multiple() * h_limit;
        if (debug_syslog > 1) {
            char msg[maxlen];
            snprintf(msg, sizeof(msg), "authenticated id %s (%d %d recipients, %d %d limits)", priv.authenticated, hourly, daily, h_limit, d_limit);
            my_syslog(&priv, msg);
        }
        if ((hourly > h_limit) || (daily > d_limit)){
            smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", (char*)"recipient rate limit exceeded");
            return SMFIS_REJECT;
        }
        st = white;
    }
    else if (fromvalue == token_black) {
        st = black;
    }
    else if ((fromvalue == token_white) && !self) {
        st = white;
    }
    else {
        // check the dns based lists, whitelist first
        DNSWLP acceptlist = NULL;   // list that caused the whitelisting
        if (check_dnswl(priv, con.get_dnswl_list(), acceptlist)) {
            st = white;
            if (debug_syslog > 1) {
                char msg[maxlen];
                snprintf(msg, sizeof(msg), "whitelisted by %s", acceptlist->name);
                my_syslog(&priv, msg);
            }
        }
        else if (check_dnsbl(priv, con.get_dnsbl_list(), rejectlist)) {
            st = reject;
        }
        else {
            st = oksofar;
        }
    }
    if (st == reject) {
        // reject the recipient based on some dnsbl
        char adr[sizeof "255.255.255.255   "];
        adr[0] = '\0';
        inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr));
        char buf[maxlen];
        snprintf(buf, sizeof(buf), rejectlist->message, adr, adr);
        smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", buf);
        return SMFIS_REJECT;
    }
    if (st == oksofar) {
        // check forged rdns
        if (con.get_requirerdns() && (!priv.client_dns_name || priv.client_dns_forged)) {
            // reject the recipient based on forged reverse dns
            char buf[maxlen];
            snprintf(buf, sizeof(buf), "%s is not acceptable", priv.client_name);
            smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", buf);
            return SMFIS_REJECT;
        }
        // check generic rdns
        const char *msg = con.generic_match(priv.client_name);
        if (msg) {
            // reject the recipient based on generic reverse dns
            char buf[maxlen];
            snprintf(buf, sizeof(buf), msg, priv.client_name);
            smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", buf);
            return SMFIS_REJECT;
        }
    }
    if (st == black) {
        // reject the recipient based on blacklisting either from or to
        smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", (char*)"no such user");
        return SMFIS_REJECT;
    }
    if (ver) {
        // try to verify this from/to pair of addresses even if it might be explicitly whitelisted
        const char *loto = to_lower_string(rcptaddr);
        bool rc = ver->ok(priv.mailaddr, loto);
        free((void*)loto);
        if (!rc) {
            smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", (char*)"no such user");
            return SMFIS_REJECT;
        }
    }
    if (!priv.authenticated && dc.default_context->is_unauthenticated_limited(priv.mailaddr)) {
        int hourly, daily;
        incr_rcpt_count(priv.mailaddr, hourly, daily);
        int h_limit = dc.default_context->find_rate_limit(priv.mailaddr);
        int d_limit = dc.default_context->get_daily_rate_multiple() * h_limit;
        if (debug_syslog > 1) {
            char msg[maxlen];
            snprintf(msg, sizeof(msg), "unauthenticated address %s (%d %d recipients, %d %d limits)", priv.mailaddr, hourly, daily, h_limit, d_limit);
            my_syslog(&priv, msg);
        }
        if ((hourly > h_limit) || (daily > d_limit)){
            smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", (char*)"recipient rate limit exceeded");
            return SMFIS_REJECT;
        }
    }
    // we will accept the recipient, but add an auto-whitelist entry
    // if needed to ensure we can accept replies
    loto = to_lower_string(rcptaddr);
    WHITELISTERP w = con2.find_autowhite(loto, priv.mailaddr);

    // check if local part is too big
    const int max_local_size = 30;
    const char *p = strchr(loto, '@');
    int len = (p) ? p-loto : max_local_size;
    if (len >= max_local_size) w = NULL;    // too big, pretend we don't have a whitelister

    // ignore auto whitelisting from outgoing mail from localhost
    const u_char *src = (const u_char *)&priv.ip;
    if (src[0] == 127) w = NULL;            // outgoing mail from localhost, pretend we don't have a whitelister

    // record it if we have a whitelister
    if (w) {
        DELAYWHITEP dwp = new DELAYWHITE(loto, w, &con2);   // dwp takes ownership of the string
        priv.delayer.push_back(dwp);
    }
    else {
        free((void*)loto);                                  // or we free it here
    }

    // accept the recipient
    if (!con.get_content_filtering()) st = white;

    if (st == oksofar) {
        // remember first content filtering context
        if (con.get_content_filtering()) {
            if (!priv.content_context) priv.content_context = &con;
            else if (con.get_require() && (priv.content_context != &con)) {
                smfi_setreply(ctx, (char*)"452", (char*)"4.2.1", (char*)"incompatible filtering contexts");
                return SMFIS_TEMPFAIL;
            }
            priv.need_content_filter(rcptaddr, con);
            char bu[maxlen];
            bool uri = false;
            // content filtering implies also checking helo name on uribl (if enabled)
            if (priv.helo_uribl) {
                snprintf(bu, sizeof(bu), "(helo %s)", priv.host_uribl);
                uri = true;
            }
            // content filtering implies also checking client reverse dns name on uribl (if enabled)
            if (priv.client_uribl) {
                snprintf(bu, sizeof(bu), "(rdns %s)", priv.host_uribl);
                uri = true;
            }
            // content filtering implies also checking mail from domain name on uribl (if enabled)
            if (priv.from_uribl) {
                snprintf(bu, sizeof(bu), "(from %s)", priv.host_uribl);
                uri = true;
            }
            if (uri) {
                char buf[maxlen];
                snprintf(buf, sizeof(buf), priv.uribl_message, bu, priv.host_uribl);
                smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", buf);
                return SMFIS_REJECT;
            }
        }
        // remember the non-whites
        register_string(priv.env_to, rcptaddr, &con);
        priv.only_whites = false;
        priv.want_spamassassin |= (priv.assassin) &&                    // have spam assassin available and
                                  (con.get_spamassassin_limit() != 0);  // want to use it with a non-zero score
        priv.want_dccgrey      |= (priv.dccifd) &&                      // have dcc interface and
                                  (con.get_grey());                     // want to use it for greylisting
        priv.want_dccbulk      |= (priv.dccifd) &&                      // have dcc interface and
                                  (con.get_bulk() != 0);                // want to use it for bulk detection
    }
    if (st == white) {
        priv.have_whites = true;
    }
    return SMFIS_CONTINUE;
}

sfsistat mlfi_header(SMFICTX* ctx, char* headerf, char* headerv)
{
    mlfiPriv &priv = *MLFIPRIV;
    // headers that avoid autowhitelisting
    if (((strcasecmp(headerf, "precedence") == 0)   && (strcasecmp(headerv, "bulk") == 0)) ||
        ((strcasecmp(headerf, "content-type") == 0) && (strncasecmp(headerv, "multipart/report", 16) == 0))) {
        priv.allow_autowhitelisting = false;
    }

    // other headers are only needed for content filtering
    if (priv.authenticated)     return SMFIS_CONTINUE;
    if (priv.only_whites)       return SMFIS_CONTINUE;
    if (priv.want_spamassassin) priv.assassin->mlfi_header(headerf, headerv);
    if (priv.want_dccgrey || priv.want_dccbulk) priv.dccifd->mlfi_header(ctx, headerf, headerv);
    return SMFIS_CONTINUE;
}

sfsistat mlfi_eoh(SMFICTX* ctx)
{
    mlfiPriv &priv = *MLFIPRIV;
    // delayed autowhitelisting
    while (!priv.delayer.empty()) {
        DELAYWHITEP dwp  = priv.delayer.front();
        const char *loto = dwp->get_loto();
        if (priv.allow_autowhitelisting) {
            WHITELISTERP w     = dwp->get_w();
            CONTEXTP     con2  = dwp->get_con();
            if (debug_syslog > 1) {
                char buf[maxlen];
                char msg[maxlen];
                snprintf(msg, sizeof(msg), "whitelist reply from <%s> in context %s", loto, con2->get_full_name(buf,maxlen));
                my_syslog(&priv, msg);
            }
            w->sent(loto);              // don't free it, the whitelister takes ownership of the string
        }
        else {
            if (debug_syslog > 1) {
                char msg[maxlen];
                snprintf(msg, sizeof(msg), "avoid whitelist reply from <%s> for outgoing auto-responder", loto);
                my_syslog(&priv, msg);
            }
            if (loto) free((void*)loto);// or we free it here
        }
        delete dwp;
        priv.delayer.pop_front();
    }
    // content filtering
    if (priv.authenticated)     return SMFIS_CONTINUE;
    if (priv.only_whites)       return SMFIS_CONTINUE;
    if (priv.want_spamassassin) priv.assassin->mlfi_eoh();
    if (priv.want_dccgrey || priv.want_dccbulk) priv.dccifd->mlfi_eoh();
    return SMFIS_CONTINUE;
}

sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len)
{
    mlfiPriv &priv = *MLFIPRIV;
    if (priv.authenticated)     return SMFIS_CONTINUE;
    if (priv.only_whites)       return SMFIS_CONTINUE;
    if (priv.want_spamassassin) priv.assassin->mlfi_body(data, len);
    if (priv.want_dccgrey || priv.want_dccbulk) priv.dccifd->mlfi_body(data, len);
    priv.scanner->scan(data, len);
    return SMFIS_CONTINUE;
}

sfsistat mlfi_eom(SMFICTX *ctx)
{
    sfsistat  rc;
    mlfiPriv   &priv = *MLFIPRIV;
    const char *host = NULL;
    int32_t     ip;
    // process end of message
    priv.eom = true;
    if (priv.authenticated || priv.only_whites) rc = SMFIS_CONTINUE;
    else {
        // assert env_to not empty, it contains the
        // non-whitelisted folks that want content filtering
        int score = (priv.want_spamassassin) ? priv.assassin->mlfi_eom() : 0;
        bool grey = false;
        int  bulk = 0;
        if (priv.want_dccgrey || priv.want_dccbulk) priv.dccifd->mlfi_eom(grey, bulk);

        char buf[maxlen];
        string msg;
        string_set alive;
        bool random = false;
        int  limit  = 0;
        for (context_map::iterator i=priv.env_to.begin(); i!=priv.env_to.end(); i++) {
            const char *rcpt   = (*i).first;
            CONTEXT &con = *((*i).second);
            if (!con.acceptable_content(*priv.memory, score, bulk, msg)) {
                // bad html tags or excessive hosts or
                // high spam assassin score or dcc bulk threshold exceedeed
                smfi_delrcpt(ctx, (char*)rcpt);
            }
            else {
                alive.insert(rcpt);
                random |= con.get_host_random();
                limit   = max(limit, con.get_host_limit());
            }
        }
        bool rejecting = alive.empty(); // if alive is empty, we must have set msg above in acceptable_content()
        if (!rejecting) {
            const char *fmt;
            const char *found;
            if (check_hosts(priv, random, limit, fmt, host, ip, found)) {
                if (found) {
                    // uribl style
                    snprintf(buf, sizeof(buf), fmt, host, found);
                }
                else {
                    // dnsbl style
                    char adr[sizeof "255.255.255.255   "];
                    adr[0] = '\0';
                    inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
                    snprintf(buf, sizeof(buf), fmt, host, adr);
                }
                msg = string(buf);
                rejecting = true;
            }
        }
        if (!rejecting) {
            if (priv.want_dccgrey && grey) {
                smfi_setreply(ctx, (char*)"452", (char*)"4.2.1", (char*)"temporary greylist embargoed");
                rc = SMFIS_TEMPFAIL;
            }
            else rc = SMFIS_CONTINUE;
        }
        else if (!priv.have_whites) {
            // can reject the entire message
            snprintf(buf, sizeof(buf), "%s", msg.c_str());
            smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", buf);
            rc = SMFIS_REJECT;
        }
        else {
            // need to accept it but remove the recipients that don't want it
            for (string_set::iterator i=alive.begin(); i!=alive.end(); i++) {
                const char *rcpt = *i;
                smfi_delrcpt(ctx, (char*)rcpt);
            }
            rc = SMFIS_CONTINUE;
        }
    }
    // reset for a new message on the same connection
    mlfi_abort(ctx);
    return rc;
}

sfsistat mlfi_abort(SMFICTX *ctx)
{
    mlfiPriv &priv = *MLFIPRIV;
    priv.reset();
    return SMFIS_CONTINUE;
}

sfsistat mlfi_close(SMFICTX *ctx)
{
    mlfiPriv *priv = MLFIPRIV;
    if (!priv) return SMFIS_CONTINUE;
    delete priv;
    smfi_setpriv(ctx, NULL);
    return SMFIS_CONTINUE;
}

struct smfiDesc smfilter =
{
    (char*)"DNSBL",     // filter name
    SMFI_VERSION,       // version code -- do not change
    SMFIF_DELRCPT,      // flags
    mlfi_connect,       // connection info filter
    mlfi_helo,          // SMTP HELO command filter
    mlfi_envfrom,       // envelope sender filter
    mlfi_envrcpt,       // envelope recipient filter
    mlfi_header,        // header filter
    mlfi_eoh,           // end of header
    mlfi_body,          // body block filter
    mlfi_eom,           // end of message
    mlfi_abort,         // message aborted
    mlfi_close,         // connection cleanup
};


////////////////////////////////////////////////
//  reload the config
//
CONFIG* new_conf();
CONFIG* new_conf() {
    CONFIG *newc = new CONFIG;
    pthread_mutex_lock(&config_mutex);
        newc->generation = generation++;
    pthread_mutex_unlock(&config_mutex);
    if (debug_syslog) {
        char buf[maxlen];
        snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation);
        my_syslog(buf);
    }
    if (load_conf(*newc, "dnsbl.conf")) {
        newc->load_time = time(NULL);
        return newc;
    }
    delete newc;
    return NULL;
}


////////////////////////////////////////////////
//  thread to watch the old config files for changes
//  and reload when needed.
//  we also clear the SMTP AUTH recipient counts hourly
//
extern "C" {void* config_loader(void *arg);}
void* config_loader(void *arg) {
    int loop1 = 0;
    int loop2 = 0;
    while (loader_run) {
        sleep(180);  // look for modifications every 3 minutes
        if (!loader_run) break;
        loop1++;
        loop2++;
        if (loop1 == 20) {
            // three minutes thru each loop, 20 loops per hour
            // clear the recipient hourly counts and hourly sets of ip connection addresses
            pthread_mutex_lock(&rate_mutex);
                for (rates::iterator i=rcpt_hourly_counts.begin(); i!=rcpt_hourly_counts.end(); i++) {
                    (*i).second = 0;
                }
                for (auth_addresses::iterator j=auth_hourly_addresses.begin(); j!=auth_hourly_addresses.end(); j++) {
                    delete (*j).second;
                    (*j).second = new int32_t_set;
                }
            pthread_mutex_unlock(&rate_mutex);
            loop1 = 0;
        }
        if (loop2 == 480) {
            // three minutes thru each loop, 480 loops per day
            // clear the recipient daily counts and daily sets of connection ip addresses
            pthread_mutex_lock(&rate_mutex);
                for (rates::iterator i=rcpt_daily_counts.begin(); i!=rcpt_daily_counts.end(); i++) {
                    (*i).second = 0;
                }
                for (auth_addresses::iterator j=auth_daily_addresses.begin(); j!=auth_daily_addresses.end(); j++) {
                    delete (*j).second;
                    (*j).second = new int32_t_set;
                }
            pthread_mutex_unlock(&rate_mutex);
            loop2 = 0;
        }
        CONFIG &dc = *config;
        time_t then = dc.load_time;
        struct stat st;
        bool reload = false;
        for (string_set::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
            const char *fn = *i;
            if (stat(fn, &st))           reload = true; // file disappeared
            else if (st.st_mtime > then) reload = true; // file modified
            if (reload) break;
        }
        if (reload) {
            CONFIG *newc = new_conf();
            if (newc) {
                // replace the global config pointer
                pthread_mutex_lock(&config_mutex);
                    CONFIG *pc = config;
                    bool last = pc && (!pc->reference_count);
                    config = newc;
                pthread_mutex_unlock(&config_mutex);
                if (last) delete pc;    // there were no references to this config
            }
            else {
                // failed to load new config
                my_syslog("failed to load new configuration");
                system("echo 'failed to load new dnsbl configuration from /etc/dnsbl' | mail -s 'error in /etc/dnsbl configuration' root");
                // update the load time on the current config to prevent complaining every 3 minutes
                dc.load_time = time(NULL);
            }
        }
    }
    return NULL;
}


void usage(const char *prog);
void usage(const char *prog)
{
    fprintf(stderr, "Usage: %s  [-d [level]] [-c] [-s] [-e from|to] [-b dccifd-addr] -r port -p sm-sock-addr [-t timeout]\n", prog);
    fprintf(stderr, "where dccifd_addr is for the connection to dccifd\n");
    fprintf(stderr, "    and should be local-domain-socket-file-name\n");
    fprintf(stderr, "where port is for the connection to our own dns resolver processes\n");
    fprintf(stderr, "    and should be local-domain-socket-file-name\n");
    fprintf(stderr, "where sm-sock-addr is for the connection to sendmail\n");
    fprintf(stderr, "    and should be one of\n");
    fprintf(stderr, "        inet:port@ip-address\n");
    fprintf(stderr, "        local:local-domain-socket-file-name\n");
    fprintf(stderr, "-c will load and dump the config to stdout\n");
    fprintf(stderr, "-s will stress test the config loading code by repeating the load/free cycle\n");
    fprintf(stderr, "        in an infinte loop.\n");
    fprintf(stderr, "-d will set the syslog message level, currently 0 to 3\n");
    fprintf(stderr, "-e will print the results of looking up the from and to addresses in the\n");
    fprintf(stderr, "        current config. The | character is used to separate the from and to\n");
    fprintf(stderr, "        addresses in the argument to the -e switch\n");
}



void setup_socket(const char *sock);
void setup_socket(const char *sock) {
    unlink(sock);
}


/*
 * The signal handler function -- only gets called when a SIGCHLD
 * is received, ie when a child terminates
 */
void sig_chld(int signo)
{
    int status;
    /* Wait for any child without blocking */
    while (waitpid(-1, &status, WNOHANG) > 0) {
        // ignore child exit status, we only do this to cleanup zombies
    }
}


int main(int argc, char**argv)
{
    token_init();
    bool check   = false;
    bool stress  = false;
    bool setconn = false;
    bool setreso = false;
    char *email = NULL;
    int c;
    const char *args = "b:r:p:t:e:d:chs";
    extern char *optarg;

    // setup srs coding detection
    if (regcomp(&srs_pattern, "^srs(0|1)(=|\\+)([^=]+)=([^=]+)=([^=]+)=([^@]+)@", REG_ICASE | REG_EXTENDED)) {
        printf("cannot compile regex pattern to find srs coding in mail addresses\n");
        exit(3);
    }

    // setup prvs coding detection
    if (regcomp(&prvs_pattern, "^prvs=([^=]+)=(.+)$", REG_ICASE | REG_EXTENDED)) {
        printf("cannot compile regex pattern to find prvs coding in mail addresses\n");
        exit(3);
    }

    // Process command line options
    while ((c = getopt(argc, argv, args)) != -1) {
        switch (c) {
            case 'b':
                if (optarg == NULL || *optarg == '\0') {
                    fprintf(stderr, "Illegal dccifd socket: %s\n", optarg);
                    exit(EX_USAGE);
                }
                dccifd_port = strdup(optarg);
                break;

            case 'r':
                if (optarg == NULL || *optarg == '\0') {
                    fprintf(stderr, "Illegal resolver socket: %s\n", optarg);
                    exit(EX_USAGE);
                }
                resolver_port = strdup(optarg);
                setup_socket(resolver_port);
                setreso = true;
                break;

            case 'p':
                if (optarg == NULL || *optarg == '\0') {
                    fprintf(stderr, "Illegal sendmail socket: %s\n", optarg);
                    exit(EX_USAGE);
                }
                if (smfi_setconn(optarg) == MI_FAILURE) {
                    fprintf(stderr, "smfi_setconn failed\n");
                    exit(EX_SOFTWARE);
                }
                     if (strncasecmp(optarg, "unix:", 5) == 0)  setup_socket(optarg + 5);
                else if (strncasecmp(optarg, "local:", 6) == 0) setup_socket(optarg + 6);
                setconn = true;
                break;

            case 't':
                if (optarg == NULL || *optarg == '\0') {
                    fprintf(stderr, "Illegal timeout: %s\n", optarg);
                    exit(EX_USAGE);
                }
                if (smfi_settimeout(atoi(optarg)) == MI_FAILURE) {
                    fprintf(stderr, "smfi_settimeout failed\n");
                    exit(EX_SOFTWARE);
                }
                break;

            case 'e':
                if (email) free((void*)email);
                email = strdup(optarg);
                break;

            case 'c':
                check = true;
                break;

            case 's':
                stress = true;
                break;

            case 'd':
                if (optarg == NULL || *optarg == '\0') debug_syslog = 1;
                else                                   debug_syslog = atoi(optarg);
                break;

            case 'h':
            default:
                usage(argv[0]);
                exit(EX_USAGE);
        }
    }

    if (check) {
        use_syslog   = false;
        debug_syslog = 10;
        CONFIG *conf = new_conf();
        if (conf) {
            conf->dump();
            delete conf;
            clear_strings();    // for valgrind checking
            return 0;
        }
        else {
            return 1;   // config failed to load
        }
    }

    if (stress) {
        fprintf(stdout, "stress testing\n");
        while (1) {
            for (int i=0; i<10; i++) {
                CONFIG *conf = new_conf();
                if (conf) delete conf;
            }
            fprintf(stdout, ".");
            fflush(stdout);
            sleep(1);
        }
    }

    if (email) {
        char *x = strchr(email, '|');
        if (x) {
            *x = '\0';
            const char *from = to_lower_string(email);
            const char *to   = to_lower_string(x+1);
            use_syslog = false;
            CONFIG *conf = new_conf();
            if (conf) {
                CONTEXTP con = conf->find_context(to);
                char buf[maxlen];
                fprintf(stdout, "envelope to   <%s> finds context %s\n", to, con->get_full_name(buf,maxlen));
                CONTEXTP fc = con->find_context(from);
                fprintf(stdout, "envelope from <%s> finds context %s\n", from, fc->get_full_name(buf,maxlen));
                const char *st = fc->find_from(from);
                fprintf(stdout, "envelope from <%s> finds status %s\n", from, st);
                bool self = (strcmp(from, to) == 0);
                if ((st == token_white) && self) fprintf(stdout, "ignore self whitelisting\n");
                delete conf;
            }
        }
        return 0;
    }

    if (!setconn) {
        fprintf(stderr, "%s: Missing required -p argument\n", argv[0]);
        usage(argv[0]);
        exit(EX_USAGE);
    }

    if (!setreso) {
        fprintf(stderr, "%s: Missing required -r argument\n", argv[0]);
        usage(argv[0]);
        exit(EX_USAGE);
    }

    if (smfi_register(smfilter) == MI_FAILURE) {
        fprintf(stderr, "smfi_register failed\n");
        exit(EX_UNAVAILABLE);
    }

    // switch to background mode
    if (daemon(1,0) < 0) {
        fprintf(stderr, "daemon() call failed\n");
        exit(EX_UNAVAILABLE);
    }

    // write the pid
    const char *pidpath = "/var/run/dnsbl.pid";
    unlink(pidpath);
    FILE *f = fopen(pidpath, "w");
    if (f) {
#ifdef linux
        // from a comment in the DCC source code:
        // Linux threads are broken.  Signals given the
        // original process are delivered to only the
        // thread that happens to have that PID.  The
        // sendmail libmilter thread that needs to hear
        // SIGINT and other signals does not, and that breaks
        // scripts that need to stop milters.
        // However, signaling the process group works.
        fprintf(f, "-%d\n", (u_int)getpgrp());
#else
        fprintf(f, "%d\n", (u_int)getpid());
#endif
        fclose(f);
    }

    // initialize the thread sync objects
    pthread_mutex_init(&config_mutex, 0);
    pthread_mutex_init(&syslog_mutex, 0);
    pthread_mutex_init(&resolve_mutex, 0);
    pthread_mutex_init(&fd_pool_mutex, 0);
    pthread_mutex_init(&verifier_mutex, 0);
    pthread_mutex_init(&whitelister_mutex, 0);

    // drop root privs
    struct passwd *pw = getpwnam("dnsbl");
    if (pw) {
        if (setgid(pw->pw_gid) == -1) {
            my_syslog("failed to switch to group dnsbl");
        }
        if (setuid(pw->pw_uid) == -1) {
            my_syslog("failed to switch to user dnsbl");
        }
    }

    // load the initial config
    config = new_conf();
    if (!config) {
        my_syslog("failed to load initial configuration, quitting");
        exit(1);
    }

    // fork off the resolver listener process
    pid_t child = fork();
    if (child < 0) {
        my_syslog("failed to create resolver listener process");
        exit(0);
    }
    if (child == 0) {
        // we are the child - dns resolver listener process
        resolver_socket = socket(AF_UNIX, SOCK_STREAM, 0);
        if (resolver_socket < 0) {
            my_syslog("child failed to create resolver socket");
            exit(0);   // failed
        }
        sockaddr_un server;
        memset(&server, '\0', sizeof(server));
        server.sun_family = AF_UNIX;
        strncpy(server.sun_path, resolver_port, sizeof(server.sun_path)-1);
        //try to bind the address to the socket.
        if (bind(resolver_socket, (sockaddr *)&server, sizeof(server)) < 0) {
            // bind failed
            shutdown(resolver_socket, SHUT_RDWR);
            close(resolver_socket);
            my_syslog("child failed to bind resolver socket");
            exit(0);   // failed
        }
        //listen on the socket.
        if (listen(resolver_socket, 10) < 0) {
            // listen failed
            shutdown(resolver_socket, SHUT_RDWR);
            close(resolver_socket);
            my_syslog("child failed to listen to resolver socket");
            exit(0);   // failed
        }
        // setup sigchld handler to prevent zombies
        struct sigaction act;
        act.sa_handler = sig_chld;      // Assign sig_chld as our SIGCHLD handler
        sigemptyset(&act.sa_mask);      // We don't want to block any other signals in this example
        act.sa_flags = SA_NOCLDSTOP;    // only want children that have terminated
        if (sigaction(SIGCHLD, &act, NULL) < 0) {
            my_syslog("child failed to setup SIGCHLD handler");
            exit(0);   // failed
        }
        while (true) {
            sockaddr_un client;
            socklen_t   clientlen = sizeof(client);
            int s = accept(resolver_socket, (sockaddr *)&client, &clientlen);
            if (s > 0) {
                // accept worked, it did not get cancelled before we could accept it
                // fork off a process to handle this connection
                int newchild = fork();
                if (newchild == 0) {
                    // this is the worker process
                    // child does not need the listening socket
                    close(resolver_socket);
#ifdef NS_PACKETSZ
                    res_init();
                    _res.retry   = 2;
                    _res.retrans = RES_TIMEOUT;
#endif
                    process_resolver_requests(s);
                    exit(0);
                }
                else {
                    // this is the parent
                    // parent does not need the accepted socket
                    close(s);
                }
            }
        }
        exit(0);    // make sure we don't fall thru.
    }
    else {
        sleep(2);   // allow child to get started
    }

    // only create threads after the fork() in daemon
    pthread_t tid;
    if (pthread_create(&tid, 0, config_loader, 0))
        my_syslog("failed to create config loader thread");
    if (pthread_detach(tid))
        my_syslog("failed to detach config loader thread");

    if (pthread_create(&tid, 0, verify_closer, 0))
        my_syslog("failed to create verify closer thread");
    if (pthread_detach(tid))
        my_syslog("failed to detach verify closer thread");

    if (pthread_create(&tid, 0, whitelister_writer, 0))
        my_syslog("failed to create autowhite writer thread");
    if (pthread_detach(tid))
        my_syslog("failed to detach autowhite writer thread");

    time_t starting = time(NULL);
    int rc = smfi_main();
    if ((rc != MI_SUCCESS) && (time(NULL) > starting+5*60)) {
        my_syslog("trying to restart after smfi_main()");
        loader_run = false;     // eventually the config loader thread will terminate
        execvp(argv[0], argv);
    }
    exit((rc == MI_SUCCESS) ? 0 : EX_UNAVAILABLE);
}