# HG changeset patch # User Carl Byington # Date 1254249375 25200 # Node ID c0d2e99c0a1dcabece3e2f0149c0b11f1d78b9e8 # Parent e6c66640f6f93d3d616da86309a6c9e31d2e71d9 Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name diff -r e6c66640f6f9 -r c0d2e99c0a1d ChangeLog --- a/ChangeLog Tue Jun 09 08:36:34 2009 -0700 +++ b/ChangeLog Tue Sep 29 11:36:15 2009 -0700 @@ -1,3 +1,7 @@ +6.25 2009-09-29 + Add surbl checks on the smtp helo value, client reverse dns name, + and mail from domain name. + 6.24 2009-06-09 Add SRS decoding to envelope addresses. diff -r e6c66640f6f9 -r c0d2e99c0a1d NEWS --- a/NEWS Tue Jun 09 08:36:34 2009 -0700 +++ b/NEWS Tue Sep 29 11:36:15 2009 -0700 @@ -1,3 +1,4 @@ +6.25 2009-09-29 Add surbl checks on the smtp helo value, client reverse dns name, and mail from domain name. 6.24 2009-06-09 Add SRS decoding to envelope addresses. 6.23 2009-05-25 Add whitelisting by regex expression filtering. 6.22 2009-05-08 Prevent auto whitelisting due to outgoing multipart/report delivery notifications. diff -r e6c66640f6f9 -r c0d2e99c0a1d configure.in --- a/configure.in Tue Jun 09 08:36:34 2009 -0700 +++ b/configure.in Tue Sep 29 11:36:15 2009 -0700 @@ -1,6 +1,6 @@ AC_PREREQ(2.59) -AC_INIT(dnsbl,6.24,carl@five-ten-sg.com) +AC_INIT(dnsbl,6.25,carl@five-ten-sg.com) AC_CONFIG_SRCDIR([config.h.in]) AC_CONFIG_HEADER([config.h]) diff -r e6c66640f6f9 -r c0d2e99c0a1d dnsbl.spec.in --- a/dnsbl.spec.in Tue Jun 09 08:36:34 2009 -0700 +++ b/dnsbl.spec.in Tue Sep 29 11:36:15 2009 -0700 @@ -96,6 +96,10 @@ %changelog +* Tue Sep 29 2009 Carl Byington - 6.25-1 +- Add surbl checks on the smtp helo value, client reverse dns name, + and mail from domain name. + * Tue Jun 09 2009 Carl Byington - 6.24-1 - Add SRS decoding to envelope addresses. diff -r e6c66640f6f9 -r c0d2e99c0a1d src/dnsbl.cpp --- a/src/dnsbl.cpp Tue Jun 09 08:36:34 2009 -0700 +++ b/src/dnsbl.cpp Tue Sep 29 11:36:15 2009 -0700 @@ -253,6 +253,215 @@ } +//////////////////////////////////////////////// +// ask a dns question and get an A record answer - we don't try +// very hard, just using the default resolver retry settings. +// If we cannot get an answer, we just accept the mail. +// +// +int dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers); +int dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers) { + // tell sendmail we are still working + #if _FFR_SMFI_PROGRESS + if (priv.eom) smfi_progress(priv.ctx); + #endif + + // this part can be done without locking the resolver mutex. Each + // milter thread is talking over its own socket to a separate resolver + // process, which does the actual dns resolution. + if (priv.err) return 0; // cannot ask more questions on this socket. + if (maybe_ip) { + // might be a bare ip address, try this first to avoid dns lookups that may not be needed + in_addr ip; + if (inet_aton(question, &ip)) { + return (int)ip.s_addr; + } + } + int n = strlen(question); + if (question[n-1] == '.') { + priv.my_write(question, n+1); // write the question including the null terminator + } + else { + priv.my_write(question, n); // write the question + priv.my_write(".", 2); // and the fully qualified . terminator and null string terminator + } + glommer glom; + char *buf = (char *)&glom; + priv.my_read(buf, sizeof(glom.length)); + buf += sizeof(glom.length); + #ifdef RESOLVER_DEBUG + char text[1000]; + snprintf(text, sizeof(text), "dns_interface() wrote question %s and has answer length %d", question, glom.length); + my_syslog(text); + #endif + if (glom.length == 0) return 0; + if (glom.length > sizeof(glom.answer)) { + priv.err = true; + return 0; // cannot process overlarge answers + } + priv.my_read(buf, glom.length); + +#ifdef NS_PACKETSZ + // now we need to lock the resolver mutex to keep the milter threads from + // stepping on each other while parsing the dns answer. + int ret_address = 0; + pthread_mutex_lock(&resolve_mutex); + // parse the answer + ns_msg handle; + ns_rr rr; + if (ns_initparse(glom.answer, glom.length, &handle) == 0) { + // look for ns names + if (nameservers) { + ns_map &ns = *nameservers; + int rrnum = 0; + while (ns_parserr(&handle, ns_s_ns, rrnum++, &rr) == 0) { + if (ns_rr_type(rr) == ns_t_ns) { + char nam[NS_MAXDNAME+1]; + char *n = nam; + const u_char *p = ns_rr_rdata(rr); + while (((n-nam) < NS_MAXDNAME) && ((size_t)(p-glom.answer) < glom.length) && *p) { + size_t s = *(p++); + if (s > 191) { + // compression pointer + s = (s-192)*256 + *(p++); + if (s >= glom.length) break; // pointer outside bounds of answer + p = glom.answer + s; + s = *(p++); + } + if (s > 0) { + if ((size_t)(n-nam) >= (NS_MAXDNAME-s)) break; // destination would overflow name buffer + if ((size_t)(p-glom.answer) >= (glom.length-s)) break; // source outside bounds of answer + memcpy(n, p, s); + n += s; + p += s; + *(n++) = '.'; + } + } + if (n-nam) n--; // remove trailing . + *n = '\0'; // null terminate it + ns.add(nam, question); // ns host to lookup later + } + } + rrnum = 0; + while (ns_parserr(&handle, ns_s_ar, rrnum++, &rr) == 0) { + if (ns_rr_type(rr) == ns_t_a) { + char* nam = (char*)ns_rr_name(rr); + ns_mapper::iterator i = ns.ns_ip.find(nam); + if (i != ns.ns_ip.end()) { + // we want this ip address + int address; + memcpy(&address, ns_rr_rdata(rr), sizeof(address)); + ns.ns_ip[nam] = address; + } + } + } + } + int rrnum = 0; + while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { + if (ns_rr_type(rr) == ns_t_a) { + int address; + memcpy(&address, ns_rr_rdata(rr), sizeof(address)); + ret_address = address; + } + } + } + pthread_mutex_unlock(&resolve_mutex); + #ifdef RESOLVER_DEBUG + snprintf(text, sizeof(text), "dns_interface() found ip %d", ret_address); + my_syslog(text); + #endif + return ret_address; +#else + return glom.answer; +#endif +} + + +//////////////////////////////////////////////// +// lookup the domain name part of a hostname on the uribl +// +// if we find part of the hostname on the uribl, return +// true and point found to the part of the hostname that we found +// as a string registered in hosts. +// otherwise, return false and preserve the value of found. +// +bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) ; +bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) { + // top is pointer to '.' char at end of base domain, or null for ip address form + // so for hostname of www.fred.mydomain.co.uk + // top points to-----------------------^ + // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff + char buf[maxlen]; + if (top) { + // add one more component + const char *x = (const char *)memrchr(hostname, '.', top-hostname); + if (x) hostname = x+1; + } + snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix); + if (dns_interface(priv, buf, false, NULL)) { + if (debug_syslog > 2) { + char tmp[maxlen]; + snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix); + my_syslog(tmp); + } + found = register_string(hosts, hostname); + return true; + } + return false; +} + + +//////////////////////////////////////////////// +// uribl checker +// ------------- +// hostname MUST not have a trailing dot +// If tld, two level lookup. +// Else, look up three level domain. +// +// if we find part of the hostname on the uribl, return +// true and point found to the part of the hostname that we found +// as a string registered in hosts. +// otherwise, return false and preserve the value of found. +// +bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ; +bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) { + in_addr ip; + if (inet_aton(hostname, &ip)) { + const u_char *src = (const u_char *)&ip.s_addr; + if (src[0] == 127) return false; // don't do dns lookups on localhost + if (src[0] == 10) return false; // don't do dns lookups on rfc1918 space + if ((src[0] == 192) && (src[1] == 168)) return false; + if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false; + char adr[sizeof "255.255.255.255 "]; + snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]); + // cannot use inet_ntop here since we want the octets reversed. + return (uriblookup(priv, hosts, adr, NULL, found)); + } + + const char *top, *top2, *top3; + top = strrchr(hostname, '.'); + if (top) { + top2 = (const char *)memrchr(hostname, '.', top-hostname); + + if (top2) { + string_set::iterator i = priv.memory->get_cctlds()->find(top2+1); + string_set::iterator x = priv.memory->get_cctlds()->end(); + // if we have a 2-level-cctld, just look at top three levels of the name + if (i != x) return uriblookup(priv, hosts, hostname, top2, found); + + // if we have more than 3 levels in the name, look at the top three levels of the name + top3 = (const char *)memrchr(hostname, '.', top2-hostname); + if (top3 && uriblookup(priv, hosts, hostname, top2, found)) return true; + + // if that was not found, fall thru to looking at the top two levels + } + // look at the top two levels of the name + return uriblookup(priv, hosts, hostname, top, found); + } + return false; +} + + mlfiPriv::mlfiPriv() { pthread_mutex_lock(&config_mutex); pc = config; @@ -267,6 +476,9 @@ queueid = NULL; authenticated = NULL; client_name = NULL; + helo_uribl = false; + client_uribl = false; + from_uribl = false; have_whites = false; only_whites = true; want_spamassassin = false; @@ -321,6 +533,9 @@ queueid = NULL; authenticated = NULL; client_name = NULL; + helo_uribl = false; + client_uribl = false; + from_uribl = false; have_whites = false; only_whites = true; want_spamassassin = false; @@ -438,7 +653,6 @@ } void mlfiPriv::need_content_filter(const char *rcpt, CONTEXT &con) { - register_string(env_to, rcpt, &con); if (!memory) { // first recipient that needs content filtering sets // some of the content filtering parameters @@ -449,6 +663,22 @@ uribl_suffix = con.get_uribl_suffix(); uribl_message = con.get_uribl_message(); content_host_ignore = &con.get_content_host_ignore(); + // if we are using uribl, test helo and client names here + if (uribl_suffix) { + const char *found = NULL; + string_set hosts; + if (helo) { + helo_uribl = check_uribl(*this, hosts, helo, found); + } + if (client_name && !helo_uribl) { + client_uribl = check_uribl(*this, hosts, client_name, found); + } + if (mailaddr && !client_uribl) { + char *f = strchr(mailaddr, '@'); + if (f) from_uribl = check_uribl(*this, hosts, f+1, found); + } + discard(hosts); + } } } @@ -586,130 +816,6 @@ //////////////////////////////////////////////// -// ask a dns question and get an A record answer - we don't try -// very hard, just using the default resolver retry settings. -// If we cannot get an answer, we just accept the mail. -// -// -int dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers); -int dns_interface(mlfiPriv &priv, const char *question, bool maybe_ip, ns_map *nameservers) { - // tell sendmail we are still working - #if _FFR_SMFI_PROGRESS - if (priv.eom) smfi_progress(priv.ctx); - #endif - - // this part can be done without locking the resolver mutex. Each - // milter thread is talking over its own socket to a separate resolver - // process, which does the actual dns resolution. - if (priv.err) return 0; // cannot ask more questions on this socket. - if (maybe_ip) { - // might be a bare ip address, try this first to avoid dns lookups that may not be needed - in_addr ip; - if (inet_aton(question, &ip)) { - return (int)ip.s_addr; - } - } - int n = strlen(question); - if (question[n-1] == '.') { - priv.my_write(question, n+1); // write the question including the null terminator - } - else { - priv.my_write(question, n); // write the question - priv.my_write(".", 2); // and the fully qualified . terminator and null string terminator - } - glommer glom; - char *buf = (char *)&glom; - priv.my_read(buf, sizeof(glom.length)); - buf += sizeof(glom.length); - #ifdef RESOLVER_DEBUG - char text[1000]; - snprintf(text, sizeof(text), "dns_interface() wrote question %s and has answer length %d", question, glom.length); - my_syslog(text); - #endif - if (glom.length == 0) return 0; - if (glom.length > sizeof(glom.answer)) { - priv.err = true; - return 0; // cannot process overlarge answers - } - priv.my_read(buf, glom.length); - -#ifdef NS_PACKETSZ - // now we need to lock the resolver mutex to keep the milter threads from - // stepping on each other while parsing the dns answer. - int ret_address = 0; - pthread_mutex_lock(&resolve_mutex); - // parse the answer - ns_msg handle; - ns_rr rr; - if (ns_initparse(glom.answer, glom.length, &handle) == 0) { - // look for ns names - if (nameservers) { - ns_map &ns = *nameservers; - int rrnum = 0; - while (ns_parserr(&handle, ns_s_ns, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_ns) { - char nam[NS_MAXDNAME+1]; - char *n = nam; - const u_char *p = ns_rr_rdata(rr); - while (((n-nam) < NS_MAXDNAME) && ((size_t)(p-glom.answer) < glom.length) && *p) { - size_t s = *(p++); - if (s > 191) { - // compression pointer - s = (s-192)*256 + *(p++); - if (s >= glom.length) break; // pointer outside bounds of answer - p = glom.answer + s; - s = *(p++); - } - if (s > 0) { - if ((size_t)(n-nam) >= (NS_MAXDNAME-s)) break; // destination would overflow name buffer - if ((size_t)(p-glom.answer) >= (glom.length-s)) break; // source outside bounds of answer - memcpy(n, p, s); - n += s; - p += s; - *(n++) = '.'; - } - } - if (n-nam) n--; // remove trailing . - *n = '\0'; // null terminate it - ns.add(nam, question); // ns host to lookup later - } - } - rrnum = 0; - while (ns_parserr(&handle, ns_s_ar, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_a) { - char* nam = (char*)ns_rr_name(rr); - ns_mapper::iterator i = ns.ns_ip.find(nam); - if (i != ns.ns_ip.end()) { - // we want this ip address - int address; - memcpy(&address, ns_rr_rdata(rr), sizeof(address)); - ns.ns_ip[nam] = address; - } - } - } - } - int rrnum = 0; - while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_a) { - int address; - memcpy(&address, ns_rr_rdata(rr), sizeof(address)); - ret_address = address; - } - } - } - pthread_mutex_unlock(&resolve_mutex); - #ifdef RESOLVER_DEBUG - snprintf(text, sizeof(text), "dns_interface() found ip %d", ret_address); - my_syslog(text); - #endif - return ret_address; -#else - return glom.answer; -#endif -} - - -//////////////////////////////////////////////// // check a single dnsbl // bool check_single(mlfiPriv &priv, int ip, const char *suffix); @@ -766,91 +872,6 @@ //////////////////////////////////////////////// -// lookup the domain name part of a hostname on the uribl -// -// if we find part of the hostname on the uribl, return -// true and point found to the part of the hostname that we found -// as a string registered in hosts. -// otherwise, return false and preserve the value of found. -// -bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) ; -bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) { - // top is pointer to '.' char at end of base domain, or null for ip address form - // so for hostname of www.fred.mydomain.co.uk - // top points to-----------------------^ - // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff - char buf[maxlen]; - if (top) { - // add one more component - const char *x = (const char *)memrchr(hostname, '.', top-hostname); - if (x) hostname = x+1; - } - snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix); - if (dns_interface(priv, buf, false, NULL)) { - if (debug_syslog > 2) { - char tmp[maxlen]; - snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix); - my_syslog(tmp); - } - found = register_string(hosts, hostname); - return true; - } - return false; -} - - -//////////////////////////////////////////////// -// uribl checker -// ------------- -// hostname MUST not have a trailing dot -// If tld, two level lookup. -// Else, look up three level domain. -// -// if we find part of the hostname on the uribl, return -// true and point found to the part of the hostname that we found -// as a string registered in hosts. -// otherwise, return false and preserve the value of found. -// -bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ; -bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) { - in_addr ip; - if (inet_aton(hostname, &ip)) { - const u_char *src = (const u_char *)&ip.s_addr; - if (src[0] == 127) return false; // don't do dns lookups on localhost - if (src[0] == 10) return false; // don't do dns lookups on rfc1918 space - if ((src[0] == 192) && (src[1] == 168)) return false; - if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false; - char adr[sizeof "255.255.255.255 "]; - snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]); - // cannot use inet_ntop here since we want the octets reversed. - return (uriblookup(priv, hosts, adr, NULL, found)); - } - - const char *top, *top2, *top3; - top = strrchr(hostname, '.'); - if (top) { - top2 = (const char *)memrchr(hostname, '.', top-hostname); - - if (top2) { - string_set::iterator i = priv.memory->get_cctlds()->find(top2+1); - string_set::iterator x = priv.memory->get_cctlds()->end(); - // if we have a 2-level-cctld, just look at top three levels of the name - if (i != x) return uriblookup(priv, hosts, hostname, top2, found); - - // if we have more than 3 levels in the name, look at the top three levels of the name - top3 = (const char *)memrchr(hostname, '.', top2-hostname); - if (top3 && uriblookup(priv, hosts, hostname, top2, found)) return true; - - // if that was not found, fall thru to looking at the top two levels - } - // look at the top two levels of the name - return uriblookup(priv, hosts, hostname, top, found); - } - return false; -} - - -//////////////////////////////////////////////// // check the hosts from the body against the content filter and uribl dnsbls // // @@ -1180,9 +1201,33 @@ smfi_setreply(ctx, (char*)"452", (char*)"4.2.1", (char*)"incompatible filtering contexts"); return SMFIS_TEMPFAIL; } + priv.need_content_filter(rcptaddr, con); + char bu[maxlen]; + bool uri = false; + // content filtering implies also checking helo name on uribl (if enabled) + if (priv.helo_uribl) { + snprintf(bu, sizeof(bu), "(helo %s)", priv.helo); + uri = true; + } + // content filterint implies also checking client reverse dns name on uribl (if enabled) + if (priv.client_uribl) { + snprintf(bu, sizeof(bu), "(rdns %s)", priv.client_name); + uri = true; + } + // content filterint implies also checking mail from domain name on uribl (if enabled) + if (priv.from_uribl) { + snprintf(bu, sizeof(bu), "(from %s)", priv.mailaddr); + uri = true; + } + if (uri) { + char buf[maxlen]; + snprintf(buf, sizeof(buf), priv.uribl_message, bu); + smfi_setreply(ctx, (char*)"550", (char*)"5.7.1", buf); + return SMFIS_REJECT; + } } // remember the non-whites - priv.need_content_filter(rcptaddr, con); + register_string(priv.env_to, rcptaddr, &con); priv.only_whites = false; priv.want_spamassassin |= (priv.assassin) && // have spam assassin available and (con.get_spamassassin_limit() != 0); // want to use it with a non-zero score @@ -1622,8 +1667,8 @@ char *x = strchr(email, '|'); if (x) { *x = '\0'; - char *from = strdup(email); - char *to = strdup(x+1); + const char *from = to_lower_string(email); + const char *to = to_lower_string(x+1); use_syslog = false; CONFIG *conf = new_conf(); if (conf) { diff -r e6c66640f6f9 -r c0d2e99c0a1d src/dnsbl.h --- a/src/dnsbl.h Tue Jun 09 08:36:34 2009 -0700 +++ b/src/dnsbl.h Tue Sep 29 11:36:15 2009 -0700 @@ -39,6 +39,9 @@ const char *queueid; // sendmail queue id const char *authenticated; // client authenticated? if so, suppress all dnsbl checks, but check rate limits const char *client_name; // fully qualified host name of the smtp client + bool helo_uribl; // helo value on uribl + bool client_uribl; // client_name on uribl + bool from_uribl; // envelope from value on uribl bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails bool only_whites; // every recipient is whitelisted? bool want_spamassassin; // at least one non-whitelisted recipient has a non zero spamassassin limit diff -r e6c66640f6f9 -r c0d2e99c0a1d src/scanner.cpp --- a/src/scanner.cpp Tue Jun 09 08:36:34 2009 -0700 +++ b/src/scanner.cpp Tue Sep 29 11:36:15 2009 -0700 @@ -1202,13 +1202,13 @@ binary_tags = 0; discard(hosts); } -void recorder::new_url(char *host) { +void recorder::new_url(const char *host) { register_string(hosts, host); } void recorder::binary() { binary_tags++; } -void recorder::new_tag(char *tag) { +void recorder::new_tag(const char *tag) { string_set::iterator i = html_tags->find(tag); if (i == html_tags->end()) { bad_html_tags++; diff -r e6c66640f6f9 -r c0d2e99c0a1d src/scanner.h --- a/src/scanner.h Tue Jun 09 08:36:34 2009 -0700 +++ b/src/scanner.h Tue Sep 29 11:36:15 2009 -0700 @@ -28,10 +28,10 @@ recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &cctlds_); ~recorder() { empty(); }; void empty(); - void new_url(char *host); - void new_tag(char *tag); + void new_url(const char *host); + void new_tag(const char *tag); void binary(); - void syslog(char *buf) { my_syslog(priv, buf); }; + void syslog(const char *buf) { my_syslog(priv, buf); }; mlfiPriv *get_priv() { return priv; }; string_set *get_cctlds() { return cctlds; }; string_set *get_tlds() { return tlds; };