# HG changeset patch # User carl # Date 1085677731 25200 # Node ID 33e1e3910506412b6dba89256b8dfba24d295c6c # Parent 43a4f6b3e668cc1c5768f1de1ad938bdfe0a6501 add configurable list of tlds diff -r 43a4f6b3e668 -r 33e1e3910506 install.bash --- a/install.bash Sat May 22 22:30:45 2004 -0700 +++ b/install.bash Thu May 27 10:08:51 2004 -0700 @@ -33,6 +33,9 @@ if [ ! -f $DST/html-tags.conf ]; then cp html-tags.conf $DST fi +if [ ! -f $DST/tld.conf ]; then + cp tld.conf $DST +fi mv -f dnsbl $DST cp dnsbl.rc /etc/rc.d/init.d/dnsbl chmod 755 /etc/rc.d/init.d/dnsbl diff -r 43a4f6b3e668 -r 33e1e3910506 sendmail.st Binary file sendmail.st has changed diff -r 43a4f6b3e668 -r 33e1e3910506 src/dnsbl.cpp --- a/src/dnsbl.cpp Sat May 22 22:30:45 2004 -0700 +++ b/src/dnsbl.cpp Thu May 27 10:08:51 2004 -0700 @@ -132,6 +132,7 @@ char * tag_limit_message; // error message for excessive bad html tags int tag_limit; // limit on bad html tags string_set html_tags; // set of valid html tags + string_set tlds; // set of valid tld components CONFIG(); ~CONFIG(); }; @@ -244,7 +245,7 @@ authenticated = false; have_whites = false; only_whites = true; - memory = new recorder(&pc->html_tags); + memory = new recorder(&pc->html_tags, &pc->tlds); scanner = new url_scanner(memory); } mlfiPriv::~mlfiPriv() { @@ -263,7 +264,7 @@ authenticated = false; have_whites = false; only_whites = true; - memory = new recorder(&pc->html_tags); + memory = new recorder(&pc->html_tags, &pc->tlds); scanner = new url_scanner(memory); } } @@ -845,7 +846,8 @@ static void load_conf(CONFIG &dc, char *fn) { dc.config_files.push_back(fn); map commands; - enum {dummy, content, hostlimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc}; + enum {dummy, tld, content, hostlimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc}; + commands["tld" ] = tld; commands["content" ] = content; commands["host_limit" ] = hostlimit; commands["html_limit" ] = htmllimit; @@ -874,6 +876,15 @@ // have a decent command bool processed = false; switch (commands[cmd]) { + case tld: { + char *tld = strtok(NULL, delim); + if (!tld) break; // no tld value + char buf[200]; + snprintf(buf, sizeof(buf), ".%s", tld); + dc.tlds.insert(register_string(buf)); // leading . + processed = true; + } break; + case content: { char *suff = strtok(NULL, delim); if (!suff) break; // no dns suffix diff -r 43a4f6b3e668 -r 33e1e3910506 src/package --- a/src/package Sat May 22 22:30:45 2004 -0700 +++ b/src/package Thu May 27 10:08:51 2004 -0700 @@ -1,13 +1,14 @@ #!/bin/bash -VER=dnsbl-2.2 +VER=dnsbl-2.4 mkdir $VER target1=/home/httpd/html/510sg/util/dnsbl.tar.gz target2=/home/httpd/html/510sg/dnsbl.conf target3=/home/httpd/html/510sg/dnsbl.html cp sample.conf $VER/dnsbl.conf - cp html-tags.conf $VER/html-tags.conf + cp html-tags.conf $VER + cp tld.conf $VER cp *cpp $VER cp *rc $VER cp install.bash $VER @@ -20,4 +21,7 @@ echo scp $target1 ns1:$target1 echo scp $target2 ns1:$target2 echo scp $target3 ns1:$target3 + scp $target1 ams:/tmp/`basename $target1` + scp $target2 ams:/tmp/`basename $target2` + scp $target3 ams:/tmp/`basename $target3` rm -rf $VER diff -r 43a4f6b3e668 -r 33e1e3910506 src/scanner.cpp --- a/src/scanner.cpp Sat May 22 22:30:45 2004 -0700 +++ b/src/scanner.cpp Thu May 27 10:08:51 2004 -0700 @@ -15,18 +15,20 @@ struct recorder { string_set *html_tags; // valid tags + string_set *tlds; // valid tlds string_set hosts; int bad_html_tags; int binary_tags; - recorder(string_set *html_tags_); + recorder(string_set *html_tags_, string_set *tlds_); ~recorder(); void empty(); void new_url(char *host); void new_tag(char *tag); void binary(); }; -recorder::recorder(string_set *html_tags_) { +recorder::recorder(string_set *html_tags_, string_set *tlds_) { html_tags = html_tags_; + tlds = tlds_; bad_html_tags = 0; binary_tags = 0; } @@ -35,6 +37,7 @@ } void recorder::empty() { bad_html_tags = 0; + binary_tags = 0; discard(hosts); } void recorder::new_url(char *host) { @@ -47,7 +50,7 @@ string_set::iterator i = html_tags->find(tag); if (i == html_tags->end()) { bad_html_tags++; - if (debug_syslog && (bad_html_tags < 10)) { + if (debug_syslog && (bad_html_tags < 10) && (binary_tags < 10)) { // only log the first 10 bad tags char buf[200]; snprintf(buf, sizeof(buf), "bad html tag %s", tag); @@ -374,15 +377,6 @@ }; -char *tlds[] = { - ".com", - ".net", - ".org", - ".biz", - ".info", - NULL -}; - u_char hex_decode[256] = { 0, // 0x00 0, // 0x01 @@ -953,15 +947,14 @@ pending[--count] = '\0'; // null terminate host name by overwriting the terminator if (!strchr((const char *)pending, '@')) { // not an email address or message id - char *tld; - for (int i=0; (tld = tlds[i]); i++) { - int n = strlen(tld); - if (count > n) { - if (strncasecmp((const char *)(pending+count-n), tld, n) == 0) { - memory->new_url((char*)pending); - break; - } - } + char *p1 = strchr((const char *)pending, '.'); + char *p2 = strrchr((const char *)pending, '.'); + if (p1 && (p1 != p2)) { + // have two periods, so three components + for (int i=1; itlds->find(p2); + if (i != memory->tlds->end()) memory->new_url((char*)pending); } } st = h_init; diff -r 43a4f6b3e668 -r 33e1e3910506 xml/sample.conf --- a/xml/sample.conf Sat May 22 22:30:45 2004 -0700 +++ b/xml/sample.conf Thu May 27 10:08:51 2004 -0700 @@ -4,6 +4,9 @@ # tokens are separated by spaces or tabs # # +# tld: +# second token is the tld suffix - com, net, org, etc +# # content: # second token is the dns suffix used for the actual lookups # third token? is a string enclosed in single quotes, so it @@ -102,6 +105,7 @@ host_limit 20 'Mail containing too many host names rejected' html_limit 20 'Mail containing excessive bad html tags rejected' include html-tags.conf +include tld.conf ##############################################