comparison src/dnsbl.cpp @ 270:f92f24950bd3 stable-6-0-35

Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
author Carl Byington <carl@five-ten-sg.com>
date Mon, 09 Sep 2013 15:15:53 -0700
parents f941563c2a95
children a99b6c1f5f67
comparison
equal deleted inserted replaced
269:6d2a11f0ae41 270:f92f24950bd3
1 /* 1 /*
2 2
3 Copyright (c) 2009 Carl Byington - 510 Software Group, released under 3 Copyright (c) 2013 Carl Byington - 510 Software Group, released under
4 the GPL version 3 or any later version at your choice available at 4 the GPL version 3 or any later version at your choice available at
5 http://www.gnu.org/licenses/gpl-3.0.txt 5 http://www.gnu.org/licenses/gpl-3.0.txt
6 6
7 Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its 7 Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its
8 suppliers. Inspired by the DCC by Rhyolite Software 8 suppliers. Inspired by the DCC by Rhyolite Software
387 #endif 387 #endif
388 } 388 }
389 389
390 390
391 //////////////////////////////////////////////// 391 ////////////////////////////////////////////////
392 // lookup the domain name part of a hostname on the uribl 392 // lookup a hostname on the uribl
393 // 393 //
394 // if we find part of the hostname on the uribl, return 394 // if we find hostname on the uribl, return true and point found to hostname
395 // true and point found to the part of the hostname that we found
396 // as a string registered in hosts. 395 // as a string registered in hosts.
397 // otherwise, return false and preserve the value of found. 396 // otherwise, return false and preserve the value of found.
398 // 397 //
399 bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) ; 398 bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ;
400 bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) { 399 bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) {
401 // top is pointer to '.' char at end of base domain, or null for ip address form 400 if (debug_syslog > 4) {
402 // so for hostname of www.fred.mydomain.co.uk 401 char tmp[maxlen];
403 // top points to-----------------------^ 402 snprintf(tmp, sizeof(tmp), "looking for %s on %s", hostname, priv.uribl_suffix);
404 // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff 403 my_syslog(tmp);
404 }
405 char buf[maxlen]; 405 char buf[maxlen];
406 if (top) {
407 // add one more component
408 const char *x = (const char *)memrchr(hostname, '.', top-hostname);
409 if (x) hostname = x+1;
410 }
411 snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix); 406 snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix);
412 if (dns_interface(priv, buf, false, NULL)) { 407 uint32_t ip = ntohl(dns_interface(priv, buf, false, NULL));
408 if (ip and (ip != 0x7f000000)) {
413 if (debug_syslog > 2) { 409 if (debug_syslog > 2) {
414 char tmp[maxlen]; 410 char tmp[maxlen];
415 snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix); 411 snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix);
416 my_syslog(tmp); 412 my_syslog(tmp);
417 } 413 }
423 419
424 420
425 //////////////////////////////////////////////// 421 ////////////////////////////////////////////////
426 // uribl checker 422 // uribl checker
427 // ------------- 423 // -------------
428 // hostname MUST not have a trailing dot 424 // hostname MUST not have a trailing dot. Find the tld part of
429 // If tld, two level lookup. 425 // the hostname, and add one more level. If that is listed on
430 // Else, look up three level domain. 426 // the uribl, return true and point found to the part of the
431 // 427 // hostname that we found as a string registered in hosts.
432 // if we find part of the hostname on the uribl, return 428 // Otherwise, return false and preserve the value of found.
433 // true and point found to the part of the hostname that we found
434 // as a string registered in hosts.
435 // otherwise, return false and preserve the value of found.
436 // 429 //
437 bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ; 430 bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ;
438 bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) { 431 bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) {
439 in_addr ip; 432 in_addr ip;
440 if (inet_aton(hostname, &ip)) { 433 if (inet_aton(hostname, &ip)) return false; // don't check ip addresses in uribls
441 const u_char *src = (const u_char *)&ip.s_addr; 434 const char* components[maxlen];
442 if (src[0] == 127) return false; // don't do dns lookups on localhost 435 int n = 0; // number of components in the hostname
443 if (src[0] == 10) return false; // don't do dns lookups on rfc1918 space 436 while (n < maxlen) {
444 if ((src[0] == 192) && (src[1] == 168)) return false; 437 components[n++] = hostname;
445 if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false; 438 const char *c = strchr(hostname, '.');
446 char adr[sizeof "255.255.255.255 "]; 439 if (!c) break;
447 snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]); 440 hostname = c+1;
448 // cannot use inet_ntop here since we want the octets reversed. 441 }
449 return (uriblookup(priv, hosts, adr, NULL, found)); 442 string_set *tlds = priv.memory->get_tlds();
450 } 443 string_set *tldwilds = priv.memory->get_tldwilds();
451 444 string_set *tldnots = priv.memory->get_tldnots();
452 const char *top, *top2, *top3; 445 string_set::iterator xtlds = tlds->end();
453 top = strrchr(hostname, '.'); 446 string_set::iterator xtldwilds = tldwilds->end();
454 if (top) { 447 string_set::iterator xtldnots = tldnots->end();
455 top2 = (const char *)memrchr(hostname, '.', top-hostname); 448 for (int i=max(0,n-4); i<n; i++) {
456 449 const char* name = components[i];
457 if (top2) { 450 bool rc = false;
458 string_set::iterator i = priv.memory->get_cctlds()->find(top2+1); 451 string_set::iterator tt = tldnots->find(name);
459 string_set::iterator x = priv.memory->get_cctlds()->end(); 452 if (tt != xtldnots) {
460 // if we have a 2-level-cctld, just look at top three levels of the name 453 rc = true;
461 if (i != x) return uriblookup(priv, hosts, hostname, top2, found); 454 }
462 455 else {
463 // if we have more than 3 levels in the name, look at the top three levels of the name 456 tt = tldwilds->find(name);
464 top3 = (const char *)memrchr(hostname, '.', top2-hostname); 457 if (tt != xtldwilds) {
465 if (top3 && uriblookup(priv, hosts, hostname, top2, found)) return true; 458 if (i > 1) {
466 459 rc = true;
467 // if that was not found, fall thru to looking at the top two levels 460 name = components[i-2];
468 } 461 }
469 // look at the top two levels of the name 462 else return false;
470 return uriblookup(priv, hosts, hostname, top, found); 463 }
464 else {
465 tt = tlds->find(name);
466 if (tt != xtlds) {
467 if (i > 0) {
468 rc = true;
469 name = components[i-1];
470 }
471 else return false;
472 }
473 }
474 }
475 if (rc) {
476 return uriblookup(priv, hosts, name, found);
477 }
471 } 478 }
472 return false; 479 return false;
473 } 480 }
474 481
475 482
672 679
673 void mlfiPriv::need_content_filter(const char *rcpt, CONTEXT &con) { 680 void mlfiPriv::need_content_filter(const char *rcpt, CONTEXT &con) {
674 if (!memory) { 681 if (!memory) {
675 // first recipient that needs content filtering sets 682 // first recipient that needs content filtering sets
676 // some of the content filtering parameters 683 // some of the content filtering parameters
677 memory = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_cctlds()); 684 memory = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_tldwilds(), con.get_content_tldnots());
678 scanner = new url_scanner(memory); 685 scanner = new url_scanner(memory);
679 content_suffix = con.get_content_suffix(); 686 content_suffix = con.get_content_suffix();
680 content_message = con.get_content_message(); 687 content_message = con.get_content_message();
681 uribl_suffix = con.get_uribl_suffix(); 688 uribl_suffix = con.get_uribl_suffix();
682 uribl_message = con.get_uribl_message(); 689 uribl_message = con.get_uribl_message();