Mercurial > dnsbl
comparison src/dnsbl.cpp @ 270:f92f24950bd3 stable-6-0-35
Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Mon, 09 Sep 2013 15:15:53 -0700 |
parents | f941563c2a95 |
children | a99b6c1f5f67 |
comparison
equal
deleted
inserted
replaced
269:6d2a11f0ae41 | 270:f92f24950bd3 |
---|---|
1 /* | 1 /* |
2 | 2 |
3 Copyright (c) 2009 Carl Byington - 510 Software Group, released under | 3 Copyright (c) 2013 Carl Byington - 510 Software Group, released under |
4 the GPL version 3 or any later version at your choice available at | 4 the GPL version 3 or any later version at your choice available at |
5 http://www.gnu.org/licenses/gpl-3.0.txt | 5 http://www.gnu.org/licenses/gpl-3.0.txt |
6 | 6 |
7 Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its | 7 Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its |
8 suppliers. Inspired by the DCC by Rhyolite Software | 8 suppliers. Inspired by the DCC by Rhyolite Software |
387 #endif | 387 #endif |
388 } | 388 } |
389 | 389 |
390 | 390 |
391 //////////////////////////////////////////////// | 391 //////////////////////////////////////////////// |
392 // lookup the domain name part of a hostname on the uribl | 392 // lookup a hostname on the uribl |
393 // | 393 // |
394 // if we find part of the hostname on the uribl, return | 394 // if we find hostname on the uribl, return true and point found to hostname |
395 // true and point found to the part of the hostname that we found | |
396 // as a string registered in hosts. | 395 // as a string registered in hosts. |
397 // otherwise, return false and preserve the value of found. | 396 // otherwise, return false and preserve the value of found. |
398 // | 397 // |
399 bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) ; | 398 bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ; |
400 bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *top, const char *&found) { | 399 bool uriblookup(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) { |
401 // top is pointer to '.' char at end of base domain, or null for ip address form | 400 if (debug_syslog > 4) { |
402 // so for hostname of www.fred.mydomain.co.uk | 401 char tmp[maxlen]; |
403 // top points to-----------------------^ | 402 snprintf(tmp, sizeof(tmp), "looking for %s on %s", hostname, priv.uribl_suffix); |
404 // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff | 403 my_syslog(tmp); |
404 } | |
405 char buf[maxlen]; | 405 char buf[maxlen]; |
406 if (top) { | |
407 // add one more component | |
408 const char *x = (const char *)memrchr(hostname, '.', top-hostname); | |
409 if (x) hostname = x+1; | |
410 } | |
411 snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix); | 406 snprintf(buf, sizeof(buf), "%s.%s.", hostname, priv.uribl_suffix); |
412 if (dns_interface(priv, buf, false, NULL)) { | 407 uint32_t ip = ntohl(dns_interface(priv, buf, false, NULL)); |
408 if (ip and (ip != 0x7f000000)) { | |
413 if (debug_syslog > 2) { | 409 if (debug_syslog > 2) { |
414 char tmp[maxlen]; | 410 char tmp[maxlen]; |
415 snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix); | 411 snprintf(tmp, sizeof(tmp), "found %s on %s", hostname, priv.uribl_suffix); |
416 my_syslog(tmp); | 412 my_syslog(tmp); |
417 } | 413 } |
423 | 419 |
424 | 420 |
425 //////////////////////////////////////////////// | 421 //////////////////////////////////////////////// |
426 // uribl checker | 422 // uribl checker |
427 // ------------- | 423 // ------------- |
428 // hostname MUST not have a trailing dot | 424 // hostname MUST not have a trailing dot. Find the tld part of |
429 // If tld, two level lookup. | 425 // the hostname, and add one more level. If that is listed on |
430 // Else, look up three level domain. | 426 // the uribl, return true and point found to the part of the |
431 // | 427 // hostname that we found as a string registered in hosts. |
432 // if we find part of the hostname on the uribl, return | 428 // Otherwise, return false and preserve the value of found. |
433 // true and point found to the part of the hostname that we found | |
434 // as a string registered in hosts. | |
435 // otherwise, return false and preserve the value of found. | |
436 // | 429 // |
437 bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ; | 430 bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) ; |
438 bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) { | 431 bool check_uribl(mlfiPriv &priv, string_set &hosts, const char *hostname, const char *&found) { |
439 in_addr ip; | 432 in_addr ip; |
440 if (inet_aton(hostname, &ip)) { | 433 if (inet_aton(hostname, &ip)) return false; // don't check ip addresses in uribls |
441 const u_char *src = (const u_char *)&ip.s_addr; | 434 const char* components[maxlen]; |
442 if (src[0] == 127) return false; // don't do dns lookups on localhost | 435 int n = 0; // number of components in the hostname |
443 if (src[0] == 10) return false; // don't do dns lookups on rfc1918 space | 436 while (n < maxlen) { |
444 if ((src[0] == 192) && (src[1] == 168)) return false; | 437 components[n++] = hostname; |
445 if ((src[0] == 172) && (16 <= src[1]) && (src[1] <= 31)) return false; | 438 const char *c = strchr(hostname, '.'); |
446 char adr[sizeof "255.255.255.255 "]; | 439 if (!c) break; |
447 snprintf(adr, sizeof(adr), "%u.%u.%u.%u", src[3], src[2], src[1], src[0]); | 440 hostname = c+1; |
448 // cannot use inet_ntop here since we want the octets reversed. | 441 } |
449 return (uriblookup(priv, hosts, adr, NULL, found)); | 442 string_set *tlds = priv.memory->get_tlds(); |
450 } | 443 string_set *tldwilds = priv.memory->get_tldwilds(); |
451 | 444 string_set *tldnots = priv.memory->get_tldnots(); |
452 const char *top, *top2, *top3; | 445 string_set::iterator xtlds = tlds->end(); |
453 top = strrchr(hostname, '.'); | 446 string_set::iterator xtldwilds = tldwilds->end(); |
454 if (top) { | 447 string_set::iterator xtldnots = tldnots->end(); |
455 top2 = (const char *)memrchr(hostname, '.', top-hostname); | 448 for (int i=max(0,n-4); i<n; i++) { |
456 | 449 const char* name = components[i]; |
457 if (top2) { | 450 bool rc = false; |
458 string_set::iterator i = priv.memory->get_cctlds()->find(top2+1); | 451 string_set::iterator tt = tldnots->find(name); |
459 string_set::iterator x = priv.memory->get_cctlds()->end(); | 452 if (tt != xtldnots) { |
460 // if we have a 2-level-cctld, just look at top three levels of the name | 453 rc = true; |
461 if (i != x) return uriblookup(priv, hosts, hostname, top2, found); | 454 } |
462 | 455 else { |
463 // if we have more than 3 levels in the name, look at the top three levels of the name | 456 tt = tldwilds->find(name); |
464 top3 = (const char *)memrchr(hostname, '.', top2-hostname); | 457 if (tt != xtldwilds) { |
465 if (top3 && uriblookup(priv, hosts, hostname, top2, found)) return true; | 458 if (i > 1) { |
466 | 459 rc = true; |
467 // if that was not found, fall thru to looking at the top two levels | 460 name = components[i-2]; |
468 } | 461 } |
469 // look at the top two levels of the name | 462 else return false; |
470 return uriblookup(priv, hosts, hostname, top, found); | 463 } |
464 else { | |
465 tt = tlds->find(name); | |
466 if (tt != xtlds) { | |
467 if (i > 0) { | |
468 rc = true; | |
469 name = components[i-1]; | |
470 } | |
471 else return false; | |
472 } | |
473 } | |
474 } | |
475 if (rc) { | |
476 return uriblookup(priv, hosts, name, found); | |
477 } | |
471 } | 478 } |
472 return false; | 479 return false; |
473 } | 480 } |
474 | 481 |
475 | 482 |
672 | 679 |
673 void mlfiPriv::need_content_filter(const char *rcpt, CONTEXT &con) { | 680 void mlfiPriv::need_content_filter(const char *rcpt, CONTEXT &con) { |
674 if (!memory) { | 681 if (!memory) { |
675 // first recipient that needs content filtering sets | 682 // first recipient that needs content filtering sets |
676 // some of the content filtering parameters | 683 // some of the content filtering parameters |
677 memory = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_cctlds()); | 684 memory = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_tldwilds(), con.get_content_tldnots()); |
678 scanner = new url_scanner(memory); | 685 scanner = new url_scanner(memory); |
679 content_suffix = con.get_content_suffix(); | 686 content_suffix = con.get_content_suffix(); |
680 content_message = con.get_content_message(); | 687 content_message = con.get_content_message(); |
681 uribl_suffix = con.get_uribl_suffix(); | 688 uribl_suffix = con.get_uribl_suffix(); |
682 uribl_message = con.get_uribl_message(); | 689 uribl_message = con.get_uribl_message(); |