diff src/dnsbl.cpp @ 236:c0d2e99c0a1d

Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name
author Carl Byington <carl@five-ten-sg.com>
date Tue, 29 Sep 2009 11:36:15 -0700
parents e6c66640f6f9
children 7b818a4e21a4
line wrap: on
line diff
--- a/src/dnsbl.cpp	Tue Jun 09 08:36:34 2009 -0700
+++ b/src/dnsbl.cpp	Tue Sep 29 11:36:15 2009 -0700
@@ -253,6 +253,215 @@
 }
 
 
+////////////////////////////////////////////////
+//  ask a dns question and get an A record answer - we don't try
+//  very hard, just using the default resolver retry settings.
+//  If we cannot get an answer, we just accept the mail.
+//
+//
+int dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers);
+int dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers) {
+    // tell sendmail we are still working
+    #if _FFR_SMFI_PROGRESS
+        if (priv.eom) smfi_progress(priv.ctx);
+    #endif
+
+    // this part can be done without locking the resolver mutex. Each
+    // milter thread is talking over its own socket to a separate resolver
+    // process, which does the actual dns resolution.
+    if (priv.err) return 0; // cannot ask more questions on this socket.
+    if (maybe_ip) {
+        // might be a bare ip address, try this first to avoid dns lookups that may not be needed
+        in_addr ip;
+        if (inet_aton(question, &ip)) {
+            return (int)ip.s_addr;
+        }
+    }
+    int n = strlen(question);
+    if (question[n-1] == '.') {
+        priv.my_write(question, n+1);   // write the question including the null terminator
+    }
+    else {
+        priv.my_write(question, n);     // write the question
+        priv.my_write(".", 2);          // and the fully qualified . terminator and null string terminator
+    }
+    glommer glom;
+    char *buf = (char *)&glom;
+    priv.my_read(buf, sizeof(glom.length));
+    buf += sizeof(glom.length);
+    #ifdef RESOLVER_DEBUG
+        char text[1000];
+        snprintf(text, sizeof(text), "dns_interface() wrote question %s and has answer length %d", question, glom.length);
+        my_syslog(text);
+    #endif
+    if (glom.length == 0) return 0;
+    if (glom.length > sizeof(glom.answer)) {
+        priv.err = true;
+        return 0;  // cannot process overlarge answers
+    }
+    priv.my_read(buf, glom.length);
+
+#ifdef NS_PACKETSZ
+    // now we need to lock the resolver mutex to keep the milter threads from
+    // stepping on each other while parsing the dns answer.
+    int ret_address = 0;
+    pthread_mutex_lock(&resolve_mutex);
+        // parse the answer
+        ns_msg handle;
+        ns_rr  rr;
+        if (ns_initparse(glom.answer, glom.length, &handle) == 0) {
+            // look for ns names
+            if (nameservers) {
+                ns_map &ns = *nameservers;
+                int rrnum = 0;
+                while (ns_parserr(&handle, ns_s_ns, rrnum++, &rr) == 0) {
+                    if (ns_rr_type(rr) == ns_t_ns) {
+                        char nam[NS_MAXDNAME+1];
+                        char         *n = nam;
+                        const u_char *p = ns_rr_rdata(rr);
+                        while (((n-nam) < NS_MAXDNAME) && ((size_t)(p-glom.answer) < glom.length) && *p) {
+                            size_t s = *(p++);
+                            if (s > 191) {
+                                // compression pointer
+                                s = (s-192)*256 + *(p++);
+                                if (s >= glom.length) break; // pointer outside bounds of answer
+                                p = glom.answer + s;
+                                s = *(p++);
+                            }
+                            if (s > 0) {
+                                if ((size_t)(n-nam)         >= (NS_MAXDNAME-s)) break;  // destination would overflow name buffer
+                                if ((size_t)(p-glom.answer) >= (glom.length-s)) break;  // source outside bounds of answer
+                                memcpy(n, p, s);
+                                n += s;
+                                p += s;
+                                *(n++) = '.';
+                            }
+                        }
+                        if (n-nam) n--;             // remove trailing .
+                        *n = '\0';                  // null terminate it
+                        ns.add(nam, question);      // ns host to lookup later
+                    }
+                }
+                rrnum = 0;
+                while (ns_parserr(&handle, ns_s_ar, rrnum++, &rr) == 0) {
+                    if (ns_rr_type(rr) == ns_t_a) {
+                        char* nam = (char*)ns_rr_name(rr);
+                        ns_mapper::iterator i = ns.ns_ip.find(nam);
+                        if (i != ns.ns_ip.end()) {
+                            // we want this ip address
+                            int address;
+                            memcpy(&address, ns_rr_rdata(rr), sizeof(address));
+                            ns.ns_ip[nam] = address;
+                        }
+                    }
+                }
+            }
+            int rrnum = 0;
+            while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) {
+                if (ns_rr_type(rr) == ns_t_a) {
+                    int address;
+                    memcpy(&address, ns_rr_rdata(rr), sizeof(address));
+                    ret_address = address;
+                }
+            }
+        }
+    pthread_mutex_unlock(&resolve_mutex);
+    #ifdef RESOLVER_DEBUG
+        snprintf(text, sizeof(text), "dns_interface() found ip %d", ret_address);
+        my_syslog(text);
+    #endif
+    return ret_address;
+#else
+    return glom.answer;
+#endif
+}
+
+
+////////////////////////////////////////////////
+//  lookup the domain name part of a hostname on the uribl
+//
+//  if we find part of the hostname on the uribl, return
+//  true and point found to the part of the hostname that we found
+//  as a string registered in hosts.
+//  otherwise, return false and preserve the value of found.
+//
+bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) ;
+bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) {
+    // top is pointer to '.' char at end of base domain, or null for ip address form
+    // so for hostname of www.fred.mydomain.co.uk
+    // top points to-----------------------^
+    // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff
+    char buf[maxlen];
+    if (top) {
+        // add one more component
+        const char *x = (const char *)memrchr(hostname, '.', top-hostname);
+        if (x) hostname = x+1;
+    }
+    snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix);
+    if (dns_interface(priv, buf, false, NULL)) {
+        if (debug_syslog > 2) {
+            char tmp[maxlen];
+            snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix);
+            my_syslog(tmp);
+        }
+        found = register_string(hosts, hostname);
+        return true;
+    }
+    return false;
+}
+
+
+////////////////////////////////////////////////
+//  uribl checker
+//  -------------
+//  hostname MUST not have a trailing dot
+//  If tld, two level lookup.
+//  Else, look up three level domain.
+//
+//  if we find part of the hostname on the uribl, return
+//  true and point found to the part of the hostname that we found
+//  as a string registered in hosts.
+//  otherwise, return false and preserve the value of found.
+//
+bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ;
+bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) {
+    in_addr ip;
+    if (inet_aton(hostname, &ip)) {
+        const u_char *src = (const u_char *)&ip.s_addr;
+        if (src[0] == 127) return false;    // don't do dns lookups on localhost
+        if (src[0] == 10)  return false;    // don't do dns lookups on rfc1918 space
+        if ((src[0] == 192) && (src[1] == 168)) return false;
+        if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false;
+        char adr[sizeof "255.255.255.255   "];
+        snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]);
+        // cannot use inet_ntop here since we want the octets reversed.
+        return (uriblookup(priv, hosts, adr, NULL, found));
+    }
+
+    const char *top, *top2, *top3;
+    top = strrchr(hostname, '.');
+    if (top) {
+        top2 = (const char *)memrchr(hostname, '.', top-hostname);
+
+        if (top2) {
+            string_set::iterator i = priv.memory->get_cctlds()->find(top2+1);
+            string_set::iterator x = priv.memory->get_cctlds()->end();
+            // if we have a 2-level-cctld, just look at top three levels of the name
+            if (i != x) return uriblookup(priv, hosts, hostname, top2, found);
+
+            // if we have more than 3 levels in the name, look at the top three levels of the name
+            top3 = (const char *)memrchr(hostname, '.', top2-hostname);
+            if (top3 && uriblookup(priv, hosts, hostname, top2, found)) return true;
+
+            // if that was not found, fall thru to looking at the top two levels
+        }
+        // look at the top two levels of the name
+        return uriblookup(priv, hosts, hostname, top, found);
+    }
+    return false;
+}
+
+
 mlfiPriv::mlfiPriv() {
     pthread_mutex_lock(&config_mutex);
         pc = config;
@@ -267,6 +476,9 @@
     queueid                 = NULL;
     authenticated           = NULL;
     client_name             = NULL;
+    helo_uribl              = false;
+    client_uribl            = false;
+    from_uribl              = false;
     have_whites             = false;
     only_whites             = true;
     want_spamassassin       = false;
@@ -321,6 +533,9 @@
         queueid                 = NULL;
         authenticated           = NULL;
         client_name             = NULL;
+        helo_uribl              = false;
+        client_uribl            = false;
+        from_uribl              = false;
         have_whites             = false;
         only_whites             = true;
         want_spamassassin       = false;
@@ -438,7 +653,6 @@
 }
 
 void mlfiPriv::need_content_filter(const char *rcpt, CONTEXT &con) {
-    register_string(env_to, rcpt, &con);
     if (!memory) {
         // first recipient that needs content filtering sets
         // some of the content filtering parameters
@@ -449,6 +663,22 @@
         uribl_suffix        = con.get_uribl_suffix();
         uribl_message       = con.get_uribl_message();
         content_host_ignore = &con.get_content_host_ignore();
+        // if we are using uribl, test helo and client names here
+        if (uribl_suffix) {
+            const char *found = NULL;
+            string_set hosts;
+            if (helo) {
+                helo_uribl = check_uribl(*this, hosts, helo, found);
+            }
+            if (client_name && !helo_uribl) {
+                client_uribl = check_uribl(*this, hosts, client_name, found);
+            }
+            if (mailaddr && !client_uribl) {
+                char *f = strchr(mailaddr, '@');
+                if (f) from_uribl = check_uribl(*this, hosts, f+1, found);
+            }
+            discard(hosts);
+        }
     }
 }
 
@@ -586,130 +816,6 @@
 
 
 ////////////////////////////////////////////////
-//  ask a dns question and get an A record answer - we don't try
-//  very hard, just using the default resolver retry settings.
-//  If we cannot get an answer, we just accept the mail.
-//
-//
-int dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers);
-int dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers) {
-    // tell sendmail we are still working
-    #if _FFR_SMFI_PROGRESS
-        if (priv.eom) smfi_progress(priv.ctx);
-    #endif
-
-    // this part can be done without locking the resolver mutex. Each
-    // milter thread is talking over its own socket to a separate resolver
-    // process, which does the actual dns resolution.
-    if (priv.err) return 0; // cannot ask more questions on this socket.
-    if (maybe_ip) {
-        // might be a bare ip address, try this first to avoid dns lookups that may not be needed
-        in_addr ip;
-        if (inet_aton(question, &ip)) {
-            return (int)ip.s_addr;
-        }
-    }
-    int n = strlen(question);
-    if (question[n-1] == '.') {
-        priv.my_write(question, n+1);   // write the question including the null terminator
-    }
-    else {
-        priv.my_write(question, n);     // write the question
-        priv.my_write(".", 2);          // and the fully qualified . terminator and null string terminator
-    }
-    glommer glom;
-    char *buf = (char *)&glom;
-    priv.my_read(buf, sizeof(glom.length));
-    buf += sizeof(glom.length);
-    #ifdef RESOLVER_DEBUG
-        char text[1000];
-        snprintf(text, sizeof(text), "dns_interface() wrote question %s and has answer length %d", question, glom.length);
-        my_syslog(text);
-    #endif
-    if (glom.length == 0) return 0;
-    if (glom.length > sizeof(glom.answer)) {
-        priv.err = true;
-        return 0;  // cannot process overlarge answers
-    }
-    priv.my_read(buf, glom.length);
-
-#ifdef NS_PACKETSZ
-    // now we need to lock the resolver mutex to keep the milter threads from
-    // stepping on each other while parsing the dns answer.
-    int ret_address = 0;
-    pthread_mutex_lock(&resolve_mutex);
-        // parse the answer
-        ns_msg handle;
-        ns_rr  rr;
-        if (ns_initparse(glom.answer, glom.length, &handle) == 0) {
-            // look for ns names
-            if (nameservers) {
-                ns_map &ns = *nameservers;
-                int rrnum = 0;
-                while (ns_parserr(&handle, ns_s_ns, rrnum++, &rr) == 0) {
-                    if (ns_rr_type(rr) == ns_t_ns) {
-                        char nam[NS_MAXDNAME+1];
-                        char         *n = nam;
-                        const u_char *p = ns_rr_rdata(rr);
-                        while (((n-nam) < NS_MAXDNAME) && ((size_t)(p-glom.answer) < glom.length) && *p) {
-                            size_t s = *(p++);
-                            if (s > 191) {
-                                // compression pointer
-                                s = (s-192)*256 + *(p++);
-                                if (s >= glom.length) break; // pointer outside bounds of answer
-                                p = glom.answer + s;
-                                s = *(p++);
-                            }
-                            if (s > 0) {
-                                if ((size_t)(n-nam)         >= (NS_MAXDNAME-s)) break;  // destination would overflow name buffer
-                                if ((size_t)(p-glom.answer) >= (glom.length-s)) break;  // source outside bounds of answer
-                                memcpy(n, p, s);
-                                n += s;
-                                p += s;
-                                *(n++) = '.';
-                            }
-                        }
-                        if (n-nam) n--;             // remove trailing .
-                        *n = '\0';                  // null terminate it
-                        ns.add(nam, question);      // ns host to lookup later
-                    }
-                }
-                rrnum = 0;
-                while (ns_parserr(&handle, ns_s_ar, rrnum++, &rr) == 0) {
-                    if (ns_rr_type(rr) == ns_t_a) {
-                        char* nam = (char*)ns_rr_name(rr);
-                        ns_mapper::iterator i = ns.ns_ip.find(nam);
-                        if (i != ns.ns_ip.end()) {
-                            // we want this ip address
-                            int address;
-                            memcpy(&address, ns_rr_rdata(rr), sizeof(address));
-                            ns.ns_ip[nam] = address;
-                        }
-                    }
-                }
-            }
-            int rrnum = 0;
-            while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) {
-                if (ns_rr_type(rr) == ns_t_a) {
-                    int address;
-                    memcpy(&address, ns_rr_rdata(rr), sizeof(address));
-                    ret_address = address;
-                }
-            }
-        }
-    pthread_mutex_unlock(&resolve_mutex);
-    #ifdef RESOLVER_DEBUG
-        snprintf(text, sizeof(text), "dns_interface() found ip %d", ret_address);
-        my_syslog(text);
-    #endif
-    return ret_address;
-#else
-    return glom.answer;
-#endif
-}
-
-
-////////////////////////////////////////////////
 //  check a single dnsbl
 //
 bool check_single(mlfiPriv &priv, int ip, const char *suffix);
@@ -766,91 +872,6 @@
 
 
 ////////////////////////////////////////////////
-//  lookup the domain name part of a hostname on the uribl
-//
-//  if we find part of the hostname on the uribl, return
-//  true and point found to the part of the hostname that we found
-//  as a string registered in hosts.
-//  otherwise, return false and preserve the value of found.
-//
-bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) ;
-bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) {
-    // top is pointer to '.' char at end of base domain, or null for ip address form
-    // so for hostname of www.fred.mydomain.co.uk
-    // top points to-----------------------^
-    // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff
-    char buf[maxlen];
-    if (top) {
-        // add one more component
-        const char *x = (const char *)memrchr(hostname, '.', top-hostname);
-        if (x) hostname = x+1;
-    }
-    snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix);
-    if (dns_interface(priv, buf, false, NULL)) {
-        if (debug_syslog > 2) {
-            char tmp[maxlen];
-            snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix);
-            my_syslog(tmp);
-        }
-        found = register_string(hosts, hostname);
-        return true;
-    }
-    return false;
-}
-
-
-////////////////////////////////////////////////
-//  uribl checker
-//  -------------
-//  hostname MUST not have a trailing dot
-//  If tld, two level lookup.
-//  Else, look up three level domain.
-//
-//  if we find part of the hostname on the uribl, return
-//  true and point found to the part of the hostname that we found
-//  as a string registered in hosts.
-//  otherwise, return false and preserve the value of found.
-//
-bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ;
-bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) {
-    in_addr ip;
-    if (inet_aton(hostname, &ip)) {
-        const u_char *src = (const u_char *)&ip.s_addr;
-        if (src[0] == 127) return false;    // don't do dns lookups on localhost
-        if (src[0] == 10)  return false;    // don't do dns lookups on rfc1918 space
-        if ((src[0] == 192) && (src[1] == 168)) return false;
-        if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false;
-        char adr[sizeof "255.255.255.255   "];
-        snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]);
-        // cannot use inet_ntop here since we want the octets reversed.
-        return (uriblookup(priv, hosts, adr, NULL, found));
-    }
-
-    const char *top, *top2, *top3;
-    top = strrchr(hostname, '.');
-    if (top) {
-        top2 = (const char *)memrchr(hostname, '.', top-hostname);
-
-        if (top2) {
-            string_set::iterator i = priv.memory->get_cctlds()->find(top2+1);
-            string_set::iterator x = priv.memory->get_cctlds()->end();
-            // if we have a 2-level-cctld, just look at top three levels of the name
-            if (i != x) return uriblookup(priv, hosts, hostname, top2, found);
-
-            // if we have more than 3 levels in the name, look at the top three levels of the name
-            top3 = (const char *)memrchr(hostname, '.', top2-hostname);
-            if (top3 && uriblookup(priv, hosts, hostname, top2, found)) return true;
-
-            // if that was not found, fall thru to looking at the top two levels
-        }
-        // look at the top two levels of the name
-        return uriblookup(priv, hosts, hostname, top, found);
-    }
-    return false;
-}
-
-
-////////////////////////////////////////////////
 //  check the hosts from the body against the content filter and uribl dnsbls
 //
 //
@@ -1180,9 +1201,33 @@
                 smfi_setreply(ctx, (char*)"452", (char*)"4.2.1", (char*)"incompatible filtering contexts");
                 return SMFIS_TEMPFAIL;
             }
+            priv.need_content_filter(rcptaddr, con);
+            char bu[maxlen];
+            bool uri = false;
+            // content filtering implies also checking helo name on uribl (if enabled)
+            if (priv.helo_uribl) {
+                snprintf(bu, sizeof(bu), "(helo %s)", priv.helo);
+                uri = true;
+            }
+            // content filterint implies also checking client reverse dns name on uribl (if enabled)
+            if (priv.client_uribl) {
+                snprintf(bu, sizeof(bu), "(rdns %s)", priv.client_name);
+                uri = true;
+            }
+            // content filterint implies also checking mail from domain name on uribl (if enabled)
+            if (priv.from_uribl) {
+                snprintf(bu, sizeof(bu), "(from %s)", priv.mailaddr);
+                uri = true;
+            }
+            if (uri) {
+                char buf[maxlen];
+                snprintf(buf, sizeof(buf), priv.uribl_message, bu);
+                smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", buf);
+                return SMFIS_REJECT;
+            }
         }
         // remember the non-whites
-        priv.need_content_filter(rcptaddr, con);
+        register_string(priv.env_to, rcptaddr, &con);
         priv.only_whites = false;
         priv.want_spamassassin |= (priv.assassin) &&                    // have spam assassin available and
                                   (con.get_spamassassin_limit() != 0);  // want to use it with a non-zero score
@@ -1622,8 +1667,8 @@
         char *x = strchr(email, '|');
         if (x) {
             *x = '\0';
-            char *from = strdup(email);
-            char *to   = strdup(x+1);
+            const char *from = to_lower_string(email);
+            const char *to   = to_lower_string(x+1);
             use_syslog = false;
             CONFIG *conf = new_conf();
             if (conf) {