diff src/dnsbl.cpp @ 8:dbe18921f741

integration work on url scanner
author carl
date Thu, 22 Apr 2004 11:25:45 -0700
parents 793ac9cc114d
children 8c65411cd7ab
line wrap: on
line diff
--- a/src/dnsbl.cpp	Thu Apr 22 08:38:07 2004 -0700
+++ b/src/dnsbl.cpp	Thu Apr 22 11:25:45 2004 -0700
@@ -19,7 +19,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <errno.h>
-#include <stdio.h>
+//#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
@@ -52,8 +52,18 @@
 #include <fstream>
 #include <syslog.h>
 
+static char* dnsbl_version="$Id$";
 
-static char* version="$Id$";
+#define DEFAULT "default"
+#define WHITE   "white"
+#define BLACK   "black"
+#define OK      "ok"
+#define MANY    "many"
+
+enum status {oksofar,   // not rejected yet
+             white,     // whitelisted by envelope from
+             black,     // blacklisted by envelope from or to
+             reject};   // rejected by a dns list
 
 using namespace std;
 
@@ -63,7 +73,9 @@
     sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr);
     sfsistat mlfi_envfrom(SMFICTX *ctx, char **argv);
     sfsistat mlfi_envrcpt(SMFICTX *ctx, char **argv);
-    sfsistat mlfi_eom_or_abort(SMFICTX *ctx);
+    sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len);
+    sfsistat mlfi_eom(SMFICTX *ctx);
+    sfsistat mlfi_abort(SMFICTX *ctx);
     sfsistat mlfi_close(SMFICTX *ctx);
 }
 
@@ -104,12 +116,16 @@
     from_map    env_from;
     string_map  env_to_dnsbll;      // map recipient to a named dnsbll
     string_map  env_to_chkfrom;     // map recipient to a named from map
+    char *      content_suffix;     // for sbl url body filtering
+    char *      content_message;
     CONFIG();
     ~CONFIG();
 };
 CONFIG::CONFIG() {
     reference_count = 0;
     load_time       = 0;
+    content_suffix  = NULL;
+    content_message = NULL;
 }
 CONFIG::~CONFIG() {
     for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) {
@@ -135,32 +151,41 @@
 
 
 
+// include the content scanner
+#include "scanner.cpp"
+
+
 ////////////////////////////////////////////////
-// predefined names
+// helper to discard the strings held by a string_set
 //
-#define DEFAULT "default"
-#define WHITE   "white"
-#define BLACK   "black"
-#define OK      "ok"
-#define MANY    "many"
+static void discard(string_set s);
+static void discard(string_set s) {
+    for (string_set::iterator i=s.begin(); i!=s.end(); i++) {
+        free(*i);
+    }
+}
 
 
 ////////////////////////////////////////////////
 // mail filter private data, held for us by sendmail
 //
-enum status {oksofar,   // not rejected yet
-             white,     // whitelisted by envelope from
-             black,     // blacklisted by envelope from or to
-             reject};   // rejected by a dns list
 struct mlfiPriv
 {
+    // connection specific data
     CONFIG  *pc;            // global context with our maps
     int     ip;             // ip4 address of the smtp client
+    map<DNSBLP, status> checked;    // status from those lists
+    // message specific data
     char    *mailaddr;      // envelope from value
     bool    authenticated;  // client authenticated? if so, suppress all dnsbl checks
-    map<DNSBLP, status> checked;    // status from those lists
+    bool    have_whites;    // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails
+    bool    only_whites;    // every recipient is whitelisted?
+    url_scanner *scanner;   // object to handle body scanning
+    string_set  non_whites; // remember the non-whitelisted recipients so we can remove them if need be
+    string_set  urls;       // remember the urls that we have checked
     mlfiPriv();
     ~mlfiPriv();
+    void reset(bool final = false); // for a new message
 };
 mlfiPriv::mlfiPriv() {
     pthread_mutex_lock(&config_mutex);
@@ -169,12 +194,29 @@
     pthread_mutex_unlock(&config_mutex);
     ip       = 0;
     mailaddr = NULL;
+    authenticated = false;
+    have_whites   = false;
+    only_whites   = true;
+    scanner       = new url_scanner(&urls);
 }
 mlfiPriv::~mlfiPriv() {
     pthread_mutex_lock(&config_mutex);
         pc->reference_count--;
     pthread_mutex_unlock(&config_mutex);
+    reset(true);
+}
+void mlfiPriv::reset(bool final) {
     if (mailaddr) free(mailaddr);
+    delete scanner;
+    discard(non_whites);
+    discard(urls);
+    if (!final) {
+        mailaddr      = NULL;
+        authenticated = false;
+        have_whites   = false;
+        only_whites   = true;
+        scanner       = new url_scanner(&urls);
+    }
 }
 
 #define MLFIPRIV    ((struct mlfiPriv *) smfi_getpriv(ctx))
@@ -294,34 +336,64 @@
 
 
 ////////////////////////////////////////////////
-//  check a single dnsbl - we don't try very hard, just
-//  using the default resolver retry settings. If we cannot
-//  get an answer, we just accept the mail. The caller
-//  must ensure thread safety.
+//
+//  ask a dns question and get an A record answer - we don't try
+//  very hard, just using the default resolver retry settings.
+//  If we cannot get an answer, we just accept the mail.  The
+//  caller must ensure thread safety.
+//
 //
-static status check_single(int ip, DNSBL &bl);
-static status check_single(int ip, DNSBL &bl) {
+static int dns_interface(char *question);
+static int dns_interface(char *question) {
+    u_char answer[NS_PACKETSZ];
+    int length = res_search(question, ns_c_in, ns_t_a, answer, sizeof(answer));
+    if (length < 0) return 0;   // error in getting answer
+    // parse the answer
+    ns_msg handle;
+    ns_rr  rr;
+    if (ns_initparse(answer, length, &handle) != 0) return 0;
+    int rrnum = 0;
+    while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) {
+        if (ns_rr_type(rr) == ns_t_a) {
+            int address;
+            memcpy(&address, ns_rr_rdata(rr), sizeof(address));
+            return address;
+        }
+    }
+    return 0;
+}
+
+static int protected_dns_interface(char *question);
+static int protected_dns_interface(char *question) {
+    int ans;
+    pthread_mutex_lock(&resolve_mutex);
+        ans = dns_interface(question);
+    pthread_mutex_unlock(&resolve_mutex);
+    return ans;
+
+}
+
+////////////////////////////////////////////////
+//  check a single dnsbl
+//
+static status check_single(int ip, char *suffix);
+static status check_single(int ip, char *suffix) {
     // make a dns question
     const u_char *src = (const u_char *)&ip;
     if (src[0] == 127) return oksofar;  // don't do dns lookups on localhost
     char question[NS_MAXDNAME];
-    snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], bl.suffix);
-    // ask the question
-    u_char answer[NS_PACKETSZ];
-    int length = res_search(question, ns_c_in, ns_t_a, answer, sizeof(answer));
-    if (length < 0) return oksofar;     // error in getting answer
-    // parse the answer
-    ns_msg handle;
-    ns_rr  rr;
-    if (ns_initparse(answer, length, &handle) != 0) return oksofar;
-    int rrnum = 0;
-    while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) {
-        if (ns_rr_type(rr) == ns_t_a) {
-            // we see an A record, implies blacklisted ip address
-            return reject;
+    snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix);
+    // ask the question, if we get an A record it implies a blacklisted ip address
+    return (protected_dns_interface(question)) ? reject : oksofar;
         }
-    }
-    return oksofar;
+
+
+////////////////////////////////////////////////
+//  check a single dnsbl
+//
+static status check_single(int ip, DNSBL &bl);
+static status check_single(int ip, DNSBL &bl) {
+    return check_single(ip, bl.suffix);
 }
 
 
@@ -339,9 +411,7 @@
         map<DNSBLP, status>::iterator f = priv.checked.find(dp);
         if (f == priv.checked.end()) {
             // have not checked this list yet
-            pthread_mutex_lock(&resolve_mutex);
                 st = check_single(priv.ip, *dp);
-            pthread_mutex_unlock(&resolve_mutex);
             rejectlist = dp;
             priv.checked[dp] = st;
         }
@@ -356,6 +426,30 @@
 
 
 ////////////////////////////////////////////////
+//  check the dnsbls specified for this recipient
+//
+static status check_urls(mlfiPriv &priv, char *&url, int &ip);
+static status check_urls(mlfiPriv &priv, char *&url, int &ip) {
+    CONFIG     &dc   = *priv.pc;
+    if (!dc.content_suffix) return oksofar;
+    int count = 0;
+    for (string_set::iterator i=priv.urls.begin(); i!=priv.urls.end(); i++) {
+        count++;
+        if (count > 20) break;  // silly to check too many urls
+        url = *i;
+        char buf[200];
+        snprintf(buf, sizeof(buf), "looking for url %s", url);
+        my_syslog(buf);
+        ip  = protected_dns_interface(url);
+        if (ip) {
+            status st = check_single(ip, dc.content_suffix);
+            if (st == reject) return st;
+        }
+    }
+}
+
+
+////////////////////////////////////////////////
 // start of sendmail milter interfaces
 //
 sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr)
@@ -419,7 +513,7 @@
         // reject the recipient based on some dnsbl
         char adr[sizeof "255.255.255.255"];
         adr[0] = '\0';
-        const char *rc = inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr));
+        inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr));
         char buf[2000];
         snprintf(buf, sizeof(buf), rejectlist->message, adr, adr);
         smfi_setreply(ctx, "550", "5.7.1", buf);
@@ -432,17 +526,65 @@
     }
     else {
         // accept the recipient
+        if (st == oksofar) {
+            // but remember the non-whites
+            priv.non_whites.insert(strdup(rcptaddr));
+            priv.only_whites = false;
+        }
+        if (st == white) {
+            priv.have_whites = true;
+        }
         return SMFIS_CONTINUE;
     }
 }
 
-sfsistat mlfi_eom_or_abort(SMFICTX *ctx)
+sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len)
 {
     mlfiPriv &priv = *MLFIPRIV;
-    if (priv.mailaddr) {
-        free(priv.mailaddr);
-        priv.mailaddr = NULL;
+    if (priv.authenticated) return SMFIS_CONTINUE;
+    if (priv.only_whites)   return SMFIS_CONTINUE;
+    priv.scanner->scan(data, len);
     }
+
+sfsistat mlfi_eom(SMFICTX *ctx)
+{
+    sfsistat rc;
+    mlfiPriv &priv = *MLFIPRIV;
+    char *url = NULL;
+    int  ip;
+    // process end of message
+    if (priv.authenticated ||
+        priv.only_whites   ||
+        (check_urls(priv, url, ip) == oksofar)) rc = SMFIS_CONTINUE;
+    else {
+        if (!priv.have_whites) {
+            // can reject the entire message
+            char adr[sizeof "255.255.255.255"];
+            adr[0] = '\0';
+            inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
+            char buf[2000];
+            snprintf(buf, sizeof(buf), priv.pc->content_message, url, adr);
+            smfi_setreply(ctx, "550", "5.7.1", buf);
+            rc = SMFIS_REJECT;
+        }
+        else {
+            // need to accept it but remove the recipients that don't want it
+            for (string_set::iterator i=priv.non_whites.begin(); i!=priv.non_whites.end(); i++) {
+                char *rcpt = *i;
+                smfi_delrcpt(ctx, rcpt);
+            }
+            rc = SMFIS_CONTINUE;
+        }
+    }
+    // reset for a new message on the same connection
+    mlfi_abort(ctx);
+    return rc;
+}
+
+sfsistat mlfi_abort(SMFICTX *ctx)
+{
+    mlfiPriv &priv = *MLFIPRIV;
+    priv.reset();
     return SMFIS_CONTINUE;
 }
 
@@ -466,9 +608,9 @@
     mlfi_envrcpt,       // envelope recipient filter
     NULL,               // header filter
     NULL,               // end of header
-    NULL,               // body block filter
-    mlfi_eom_or_abort,  // end of message
-    mlfi_eom_or_abort,  // message aborted
+    mlfi_body,          // body block filter
+    mlfi_eom,           // end of message
+    mlfi_abort,         // message aborted
     mlfi_close,         // connection cleanup
 };
 
@@ -610,7 +752,8 @@
 static void load_conf(CONFIG &dc, char *fn) {
     dc.config_files.push_back(fn);
     map<char*, int, ltstr> commands;
-    enum {dummy, dnsbl, dnsbll, envfrom, envto, include, includedcc};
+    enum {dummy, content, dnsbl, dnsbll, envfrom, envto, include, includedcc};
+    commands["content"    ] = content;
     commands["dnsbl"      ] = dnsbl;
     commands["dnsbl_list" ] = dnsbll;
     commands["env_from"   ] = envfrom;
@@ -635,6 +778,23 @@
             // have a decent command
             bool processed = false;
             switch (commands[cmd]) {
+                case content: {
+                    char *suff = strtok(NULL, delim);
+                    if (!suff) break;                           // no dns suffic
+                    char *msg = suff + strlen(suff);
+                    if ((msg - line) >= strlen(orig)) break;    // line ended with the dns suffix
+                    msg  = strchr(msg+1, '\'');
+                    if (!msg) break;                            // no reply message template
+                    msg++; // move over the leading '
+                    if ((msg - line) >= strlen(orig)) break;    // line ended with the leading quote
+                    char *last = strchr(msg, '\'');
+                    if (!last) break;                           // no trailing quote
+                    *last = '\0';                               // make it a null terminator
+                    dc.content_suffix  = register_string(suff);
+                    dc.content_message = register_string(msg);
+                    processed = true;
+                    } break;
+
                 case dnsbl: {
                     // have a new dnsbl to use
                     char *name = next_token(delim);