comparison src/dnsbl.cpp @ 117:aa07452e641b

uribl patch from Jeff Evans <jeffe@tricab.com>
author carl
date Sun, 12 Mar 2006 10:15:39 -0800
parents 07e5d4721213
children d9d2f8699621
comparison
equal deleted inserted replaced
116:0094678a16d0 117:aa07452e641b
363 void mlfiPriv::need_content_filter(char *rcpt, CONTEXT &con) { 363 void mlfiPriv::need_content_filter(char *rcpt, CONTEXT &con) {
364 register_string(env_to, rcpt, &con); 364 register_string(env_to, rcpt, &con);
365 if (!memory) { 365 if (!memory) {
366 // first recipient that needs content filtering sets all 366 // first recipient that needs content filtering sets all
367 // the content filtering parameters 367 // the content filtering parameters
368 memory = new recorder(this, con.get_html_tags(), con.get_content_tlds()); 368 memory = new recorder(this, con.get_html_tags(), con.get_content_tlds(), con.get_content_cctlds());
369 scanner = new url_scanner(memory); 369 scanner = new url_scanner(memory);
370 content_suffix = con.get_content_suffix(); 370 content_suffix = con.get_content_suffix();
371 content_message = con.get_content_message(); 371 content_message = con.get_content_message();
372 content_host_ignore = &con.get_content_host_ignore(); 372 content_host_ignore = &con.get_content_host_ignore();
373 } 373 }
648 return false; 648 return false;
649 } 649 }
650 650
651 651
652 //////////////////////////////////////////////// 652 ////////////////////////////////////////////////
653 // lookup the domain name part of a hostname on two lists
654 //
655 bool uriblookup(mlfiPriv &priv ,char *hostname, char *top) ;
656 bool uriblookup(mlfiPriv &priv, char *hostname, char *top) {
657 // top is pointer to '.' char at end of base domain, or null for ip address form
658 // so for hostname of www.fred.mydomain.co.uk
659 // top points to-----------------------^
660 // and we end up looking at only mydomain.co.uk, ignoring the www.fred stuff
661 char buf[maxlen];
662 char buf2[maxlen];
663 const char *uriblname[2] = { "multi.surbl.org", "multi.uribl.com" };
664
665 if (top) {
666 // add one more component
667 *top = '\0';
668 char *x = strrchr(hostname, '.');
669 if (x) hostname = x+1;
670 *top = '.';
671 }
672 for (int i=0; i<2; i++) {
673 snprintf(buf, sizeof(buf), "%s.%s", hostname, uriblname[i]);
674 if (debug_syslog > 2) {
675 char tmp[maxlen];
676 snprintf(tmp, sizeof(tmp), "Looking up %s on %s", hostname, uriblname[i]);
677 my_syslog(tmp);
678 }
679 if (dns_interface(priv, buf, false, NULL)) return true;
680 }
681 return false;
682 }
683
684
685 ////////////////////////////////////////////////
686 // uribl checker
687 // -------------
688 // hostname MUST not have a trailing dot
689 // If tld, two level lookup.
690 // Else, look up three level domain.
691 bool check_uribl(mlfiPriv &priv, char *hostname) ;
692 bool check_uribl(mlfiPriv &priv, char *hostname) {
693 in_addr ip;
694 if (inet_aton(hostname, &ip)) {
695 char adr[sizeof "255.255.255.255"];
696 adr[0] = '\0';
697 inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
698 return (uriblookup(priv, adr, NULL));
699 }
700
701 char *top, *top2, *top3;
702 top = strrchr(hostname, '.');
703 if (top) {
704 *top = '\0';
705 top2 = strrchr(hostname, '.');
706 *top = '.';
707
708 if (top2) {
709 string_set::iterator i = priv.memory->get_cctlds()->find(top2+1);
710 string_set::iterator x = priv.memory->get_cctlds()->end();
711 // if we have a 2-level-cctld, just look at top three levels of the name
712 if (i != x) return uriblookup(priv, hostname, top2);
713
714 *top2 = '\0';
715 top3 = strrchr(hostname, '.');
716 *top2 = '.';
717
718 // if we have more than 3 levels in the name, look at the top three levels of the name
719 if (top3 && uriblookup(priv, hostname, top2)) return true;
720 // if that was not found, fall thru to looking at the top two levels
721 }
722 // look at the top two levels of the name
723 return uriblookup(priv, hostname, top);
724 }
725 return false;
726 }
727
728
729 ////////////////////////////////////////////////
653 // check the hosts from the body against the content dnsbl 730 // check the hosts from the body against the content dnsbl
654 // 731 //
655 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip); 732 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip);
656 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip) { 733 bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int &ip) {
657 CONFIG &dc = *priv.pc; 734 CONFIG &dc = *priv.pc;
698 my_syslog(&priv, buf); 775 my_syslog(&priv, buf);
699 } 776 }
700 if (ip) { 777 if (ip) {
701 int_set::iterator i = ips.find(ip); 778 int_set::iterator i = ips.find(ip);
702 if (i == ips.end()) { 779 if (i == ips.end()) {
780 // we haven't looked this up yet
703 ips.insert(ip); 781 ips.insert(ip);
704 if (check_single(priv, ip, suffix)) { 782 if (check_single(priv, ip, suffix)) return true;
705 return true; 783 // Check uribl & surbl
706 } 784 if (check_uribl(priv, host)) return true;
707 } 785 }
708 } 786 }
709 } 787 }
710 limit *= 4; // allow average of 3 ns per host name 788 limit *= 4; // allow average of 3 ns per host name
711 for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) { 789 for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) {
750 } 828 }
751 } 829 }
752 } 830 }
753 return false; 831 return false;
754 } 832 }
755
756 833
757 //////////////////////////////////////////////// 834 ////////////////////////////////////////////////
758 // this email address is passed in from sendmail, and will 835 // this email address is passed in from sendmail, and will
759 // always be enclosed in <>. It may have mixed case, just 836 // always be enclosed in <>. It may have mixed case, just
760 // as the mail client sent it. We dup the string and convert 837 // as the mail client sent it. We dup the string and convert