Mercurial > dnsbl
comparison src/dnsbl.cpp @ 117:aa07452e641b
uribl patch from Jeff Evans <jeffe@tricab.com>
author | carl |
---|---|
date | Sun, 12 Mar 2006 10:15:39 -0800 |
parents | 07e5d4721213 |
children | d9d2f8699621 |
comparison
equal
deleted
inserted
replaced
116:0094678a16d0 | 117:aa07452e641b |
---|---|
363 void mlfiPriv::need_content_filter(char *rcpt, CONTEXT &con) { | 363 void mlfiPriv::need_content_filter(char *rcpt, CONTEXT &con) { |
364 register_string(env_to, rcpt, &con); | 364 register_string(env_to, rcpt, &con); |
365 if (!memory) { | 365 if (!memory) { |
366 // first recipient that needs content filtering sets all | 366 // first recipient that needs content filtering sets all |
367 // the content filtering parameters | 367 // the content filtering parameters |
368 memory = new recorder(this, con.get_html_tags(), con.get_content_tlds()); | 368 memory = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_cctlds()); |
369 scanner = new url_scanner(memory); | 369 scanner = new url_scanner(memory); |
370 content_suffix = con.get_content_suffix(); | 370 content_suffix = con.get_content_suffix(); |
371 content_message = con.get_content_message(); | 371 content_message = con.get_content_message(); |
372 content_host_ignore = &con.get_content_host_ignore(); | 372 content_host_ignore = &con.get_content_host_ignore(); |
373 } | 373 } |
648 return false; | 648 return false; |
649 } | 649 } |
650 | 650 |
651 | 651 |
652 //////////////////////////////////////////////// | 652 //////////////////////////////////////////////// |
653 // lookup the domain name part of a hostname on two lists | |
654 // | |
655 bool uriblookup(mlfiPriv &priv ,char *hostname, char *top) ; | |
656 bool uriblookup(mlfiPriv &priv, char *hostname, char *top) { | |
657 // top is pointer to '.' char at end of base domain, or null for ip address form | |
658 // so for hostname of www.fred.mydomain.co.uk | |
659 // top points to-----------------------^ | |
660 // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff | |
661 char buf[maxlen]; | |
662 char buf2[maxlen]; | |
663 const char *uriblname[2] = { "multi.surbl.org", "multi.uribl.com" }; | |
664 | |
665 if (top) { | |
666 // add one more component | |
667 *top = '\0'; | |
668 char *x = strrchr(hostname, '.'); | |
669 if (x) hostname = x+1; | |
670 *top = '.'; | |
671 } | |
672 for (int i=0; i<2; i++) { | |
673 snprintf(buf, sizeof(buf), "%s.%s", hostname, uriblname[i]); | |
674 if (debug_syslog > 2) { | |
675 char tmp[maxlen]; | |
676 snprintf(tmp, sizeof(tmp), "Looking up %s on %s", hostname, uriblname[i]); | |
677 my_syslog(tmp); | |
678 } | |
679 if (dns_interface(priv, buf, false, NULL)) return true; | |
680 } | |
681 return false; | |
682 } | |
683 | |
684 | |
685 //////////////////////////////////////////////// | |
686 // uribl checker | |
687 // ------------- | |
688 // hostname MUST not have a trailing dot | |
689 // If tld, two level lookup. | |
690 // Else, look up three level domain. | |
691 bool check_uribl(mlfiPriv &priv, char *hostname) ; | |
692 bool check_uribl(mlfiPriv &priv, char *hostname) { | |
693 in_addr ip; | |
694 if (inet_aton(hostname, &ip)) { | |
695 char adr[sizeof "255.255.255.255"]; | |
696 adr[0] = '\0'; | |
697 inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); | |
698 return (uriblookup(priv, adr, NULL)); | |
699 } | |
700 | |
701 char *top, *top2, *top3; | |
702 top = strrchr(hostname, '.'); | |
703 if (top) { | |
704 *top = '\0'; | |
705 top2 = strrchr(hostname, '.'); | |
706 *top = '.'; | |
707 | |
708 if (top2) { | |
709 string_set::iterator i = priv.memory->get_cctlds()->find(top2+1); | |
710 string_set::iterator x = priv.memory->get_cctlds()->end(); | |
711 // if we have a 2-level-cctld, just look at top three levels of the name | |
712 if (i != x) return uriblookup(priv, hostname, top2); | |
713 | |
714 *top2 = '\0'; | |
715 top3 = strrchr(hostname, '.'); | |
716 *top2 = '.'; | |
717 | |
718 // if we have more than 3 levels in the name, look at the top three levels of the name | |
719 if (top3 && uriblookup(priv, hostname, top2)) return true; | |
720 // if that was not found, fall thru to looking at the top two levels | |
721 } | |
722 // look at the top two levels of the name | |
723 return uriblookup(priv, hostname, top); | |
724 } | |
725 return false; | |
726 } | |
727 | |
728 | |
729 //////////////////////////////////////////////// | |
653 // check the hosts from the body against the content dnsbl | 730 // check the hosts from the body against the content dnsbl |
654 // | 731 // |
655 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip); | 732 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip); |
656 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip) { | 733 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip) { |
657 CONFIG &dc = *priv.pc; | 734 CONFIG &dc = *priv.pc; |
698 my_syslog(&priv, buf); | 775 my_syslog(&priv, buf); |
699 } | 776 } |
700 if (ip) { | 777 if (ip) { |
701 int_set::iterator i = ips.find(ip); | 778 int_set::iterator i = ips.find(ip); |
702 if (i == ips.end()) { | 779 if (i == ips.end()) { |
780 // we haven't looked this up yet | |
703 ips.insert(ip); | 781 ips.insert(ip); |
704 if (check_single(priv, ip, suffix)) { | 782 if (check_single(priv, ip, suffix)) return true; |
705 return true; | 783 // Check uribl & surbl |
706 } | 784 if (check_uribl(priv, host)) return true; |
707 } | 785 } |
708 } | 786 } |
709 } | 787 } |
710 limit *= 4; // allow average of 3 ns per host name | 788 limit *= 4; // allow average of 3 ns per host name |
711 for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) { | 789 for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) { |
750 } | 828 } |
751 } | 829 } |
752 } | 830 } |
753 return false; | 831 return false; |
754 } | 832 } |
755 | |
756 | 833 |
757 //////////////////////////////////////////////// | 834 //////////////////////////////////////////////// |
758 // this email address is passed in from sendmail, and will | 835 // this email address is passed in from sendmail, and will |
759 // always be enclosed in <>. It may have mixed case, just | 836 // always be enclosed in <>. It may have mixed case, just |
760 // as the mail client sent it. We dup the string and convert | 837 // as the mail client sent it. We dup the string and convert |