Mercurial > dnsbl
diff src/dnsbl.cpp @ 270:f92f24950bd3 stable-6-0-35
Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Mon, 09 Sep 2013 15:15:53 -0700 |
parents | f941563c2a95 |
children | a99b6c1f5f67 |
line wrap: on
line diff
--- a/src/dnsbl.cpp Wed May 22 11:34:37 2013 -0700 +++ b/src/dnsbl.cpp Mon Sep 09 15:15:53 2013 -0700 @@ -1,6 +1,6 @@ /* -Copyright (c) 2009 Carl Byington - 510 Software Group, released under +Copyright (c) 2013 Carl Byington - 510 Software Group, released under the GPL version 3 or any later version at your choice available at http://www.gnu.org/licenses/gpl-3.0.txt @@ -389,27 +389,23 @@ //////////////////////////////////////////////// -// lookup the domain name part of a hostname on the uribl +// lookup a hostname on the uribl // -// if we find part of the hostname on the uribl, return -// true and point found to the part of the hostname that we found +// if we find hostname on the uribl, return true and point found to hostname // as a string registered in hosts. // otherwise, return false and preserve the value of found. // -bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) ; -bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) { - // top is pointer to '.' char at end of base domain, or null for ip address form - // so for hostname of www.fred.mydomain.co.uk - // top points to-----------------------^ - // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff +bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ; +bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) { + if (debug_syslog > 4) { + char tmp[maxlen]; + snprintf(tmp, sizeof(tmp), "looking for %s on %s", hostname, priv.uribl_suffix); + my_syslog(tmp); + } char buf[maxlen]; - if (top) { - // add one more component - const char *x = (const char *)memrchr(hostname, '.', top-hostname); - if (x) hostname = x+1; - } snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix); - if (dns_interface(priv, buf, false, NULL)) { + uint32_t ip = ntohl(dns_interface(priv, buf, false, NULL)); + if (ip and (ip != 0x7f000000)) { if (debug_syslog > 2) { char tmp[maxlen]; snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix); @@ -425,49 +421,60 @@ //////////////////////////////////////////////// // uribl checker // ------------- -// hostname MUST not have a trailing dot -// If tld, two level lookup. -// Else, look up three level domain. -// -// if we find part of the hostname on the uribl, return -// true and point found to the part of the hostname that we found -// as a string registered in hosts. -// otherwise, return false and preserve the value of found. +// hostname MUST not have a trailing dot. Find the tld part of +// the hostname, and add one more level. If that is listed on +// the uribl, return true and point found to the part of the +// hostname that we found as a string registered in hosts. +// Otherwise, return false and preserve the value of found. // bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ; bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) { in_addr ip; - if (inet_aton(hostname, &ip)) { - const u_char *src = (const u_char *)&ip.s_addr; - if (src[0] == 127) return false; // don't do dns lookups on localhost - if (src[0] == 10) return false; // don't do dns lookups on rfc1918 space - if ((src[0] == 192) && (src[1] == 168)) return false; - if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false; - char adr[sizeof "255.255.255.255 "]; - snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]); - // cannot use inet_ntop here since we want the octets reversed. - return (uriblookup(priv, hosts, adr, NULL, found)); + if (inet_aton(hostname, &ip)) return false; // don't check ip addresses in uribls + const char* components[maxlen]; + int n = 0; // number of components in the hostname + while (n < maxlen) { + components[n++] = hostname; + const char *c = strchr(hostname, '.'); + if (!c) break; + hostname = c+1; + } + string_set *tlds = priv.memory->get_tlds(); + string_set *tldwilds = priv.memory->get_tldwilds(); + string_set *tldnots = priv.memory->get_tldnots(); + string_set::iterator xtlds = tlds->end(); + string_set::iterator xtldwilds = tldwilds->end(); + string_set::iterator xtldnots = tldnots->end(); + for (int i=max(0,n-4); i<n; i++) { + const char* name = components[i]; + bool rc = false; + string_set::iterator tt = tldnots->find(name); + if (tt != xtldnots) { + rc = true; } - - const char *top, *top2, *top3; - top = strrchr(hostname, '.'); - if (top) { - top2 = (const char *)memrchr(hostname, '.', top-hostname); - - if (top2) { - string_set::iterator i = priv.memory->get_cctlds()->find(top2+1); - string_set::iterator x = priv.memory->get_cctlds()->end(); - // if we have a 2-level-cctld, just look at top three levels of the name - if (i != x) return uriblookup(priv, hosts, hostname, top2, found); - - // if we have more than 3 levels in the name, look at the top three levels of the name - top3 = (const char *)memrchr(hostname, '.', top2-hostname); - if (top3 && uriblookup(priv, hosts, hostname, top2, found)) return true; - - // if that was not found, fall thru to looking at the top two levels + else { + tt = tldwilds->find(name); + if (tt != xtldwilds) { + if (i > 1) { + rc = true; + name = components[i-2]; + } + else return false; } - // look at the top two levels of the name - return uriblookup(priv, hosts, hostname, top, found); + else { + tt = tlds->find(name); + if (tt != xtlds) { + if (i > 0) { + rc = true; + name = components[i-1]; + } + else return false; + } + } + } + if (rc) { + return uriblookup(priv, hosts, name, found); + } } return false; } @@ -674,7 +681,7 @@ if (!memory) { // first recipient that needs content filtering sets // some of the content filtering parameters - memory = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_cctlds()); + memory = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_tldwilds(), con.get_content_tldnots()); scanner = new url_scanner(memory); content_suffix = con.get_content_suffix(); content_message = con.get_content_message();