# HG changeset patch # User carl # Date 1082658345 25200 # Node ID dbe18921f74181f5d659467702d0cb369cacce56 # Parent 93ff6d1ef6475171c38858c5a91e54118cac407b integration work on url scanner diff -r 93ff6d1ef647 -r dbe18921f741 install.bash --- a/install.bash Thu Apr 22 08:38:07 2004 -0700 +++ b/install.bash Thu Apr 22 11:25:45 2004 -0700 @@ -11,8 +11,13 @@ ##################### # build the milter g++ -c dnsbl.cpp +if [ $? -ne 0 ]; then + echo "compiler errors" + exit +fi g++ -o dnsbl dnsbl.o /usr/lib/libresolv.a -lmilter -pthread -if [ "$1" == "comp" ]; then +if [ $? -ne 0 ]; then + echo "linker errors" exit fi diff -r 93ff6d1ef647 -r dbe18921f741 sendmail.st Binary file sendmail.st has changed diff -r 93ff6d1ef647 -r dbe18921f741 src/dnsbl.cpp --- a/src/dnsbl.cpp Thu Apr 22 08:38:07 2004 -0700 +++ b/src/dnsbl.cpp Thu Apr 22 11:25:45 2004 -0700 @@ -19,7 +19,7 @@ #include #include #include -#include +//#include #include #include #include @@ -52,8 +52,18 @@ #include #include +static char* dnsbl_version="$Id$"; -static char* version="$Id$"; +#define DEFAULT "default" +#define WHITE "white" +#define BLACK "black" +#define OK "ok" +#define MANY "many" + +enum status {oksofar, // not rejected yet + white, // whitelisted by envelope from + black, // blacklisted by envelope from or to + reject}; // rejected by a dns list using namespace std; @@ -63,7 +73,9 @@ sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr); sfsistat mlfi_envfrom(SMFICTX *ctx, char **argv); sfsistat mlfi_envrcpt(SMFICTX *ctx, char **argv); - sfsistat mlfi_eom_or_abort(SMFICTX *ctx); + sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len); + sfsistat mlfi_eom(SMFICTX *ctx); + sfsistat mlfi_abort(SMFICTX *ctx); sfsistat mlfi_close(SMFICTX *ctx); } @@ -104,12 +116,16 @@ from_map env_from; string_map env_to_dnsbll; // map recipient to a named dnsbll string_map env_to_chkfrom; // map recipient to a named from map + char * content_suffix; // for sbl url body filtering + char * content_message; CONFIG(); ~CONFIG(); }; CONFIG::CONFIG() { reference_count = 0; load_time = 0; + content_suffix = NULL; + content_message = NULL; } CONFIG::~CONFIG() { for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) { @@ -135,32 +151,41 @@ +// include the content scanner +#include "scanner.cpp" + + //////////////////////////////////////////////// -// predefined names +// helper to discard the strings held by a string_set // -#define DEFAULT "default" -#define WHITE "white" -#define BLACK "black" -#define OK "ok" -#define MANY "many" +static void discard(string_set s); +static void discard(string_set s) { + for (string_set::iterator i=s.begin(); i!=s.end(); i++) { + free(*i); + } +} //////////////////////////////////////////////// // mail filter private data, held for us by sendmail // -enum status {oksofar, // not rejected yet - white, // whitelisted by envelope from - black, // blacklisted by envelope from or to - reject}; // rejected by a dns list struct mlfiPriv { + // connection specific data CONFIG *pc; // global context with our maps int ip; // ip4 address of the smtp client + map checked; // status from those lists + // message specific data char *mailaddr; // envelope from value bool authenticated; // client authenticated? if so, suppress all dnsbl checks - map checked; // status from those lists + bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails + bool only_whites; // every recipient is whitelisted? + url_scanner *scanner; // object to handle body scanning + string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be + string_set urls; // remember the urls that we have checked mlfiPriv(); ~mlfiPriv(); + void reset(bool final = false); // for a new message }; mlfiPriv::mlfiPriv() { pthread_mutex_lock(&config_mutex); @@ -169,12 +194,29 @@ pthread_mutex_unlock(&config_mutex); ip = 0; mailaddr = NULL; + authenticated = false; + have_whites = false; + only_whites = true; + scanner = new url_scanner(&urls); } mlfiPriv::~mlfiPriv() { pthread_mutex_lock(&config_mutex); pc->reference_count--; pthread_mutex_unlock(&config_mutex); + reset(true); +} +void mlfiPriv::reset(bool final) { if (mailaddr) free(mailaddr); + delete scanner; + discard(non_whites); + discard(urls); + if (!final) { + mailaddr = NULL; + authenticated = false; + have_whites = false; + only_whites = true; + scanner = new url_scanner(&urls); + } } #define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx)) @@ -294,34 +336,64 @@ //////////////////////////////////////////////// -// check a single dnsbl - we don't try very hard, just -// using the default resolver retry settings. If we cannot -// get an answer, we just accept the mail. The caller -// must ensure thread safety. +// +// ask a dns question and get an A record answer - we don't try +// very hard, just using the default resolver retry settings. +// If we cannot get an answer, we just accept the mail. The +// caller must ensure thread safety. +// // -static status check_single(int ip, DNSBL &bl); -static status check_single(int ip, DNSBL &bl) { +static int dns_interface(char *question); +static int dns_interface(char *question) { + u_char answer[NS_PACKETSZ]; + int length = res_search(question, ns_c_in, ns_t_a, answer, sizeof(answer)); + if (length < 0) return 0; // error in getting answer + // parse the answer + ns_msg handle; + ns_rr rr; + if (ns_initparse(answer, length, &handle) != 0) return 0; + int rrnum = 0; + while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { + if (ns_rr_type(rr) == ns_t_a) { + int address; + memcpy(&address, ns_rr_rdata(rr), sizeof(address)); + return address; + } + } + return 0; +} + +static int protected_dns_interface(char *question); +static int protected_dns_interface(char *question) { + int ans; + pthread_mutex_lock(&resolve_mutex); + ans = dns_interface(question); + pthread_mutex_unlock(&resolve_mutex); + return ans; + +} + +//////////////////////////////////////////////// +// check a single dnsbl +// +static status check_single(int ip, char *suffix); +static status check_single(int ip, char *suffix) { // make a dns question const u_char *src = (const u_char *)&ip; if (src[0] == 127) return oksofar; // don't do dns lookups on localhost char question[NS_MAXDNAME]; - snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], bl.suffix); - // ask the question - u_char answer[NS_PACKETSZ]; - int length = res_search(question, ns_c_in, ns_t_a, answer, sizeof(answer)); - if (length < 0) return oksofar; // error in getting answer - // parse the answer - ns_msg handle; - ns_rr rr; - if (ns_initparse(answer, length, &handle) != 0) return oksofar; - int rrnum = 0; - while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_a) { - // we see an A record, implies blacklisted ip address - return reject; + snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix); + // ask the question, if we get an A record it implies a blacklisted ip address + return (protected_dns_interface(question)) ? reject : oksofar; } - } - return oksofar; + + +//////////////////////////////////////////////// +// check a single dnsbl +// +static status check_single(int ip, DNSBL &bl); +static status check_single(int ip, DNSBL &bl) { + return check_single(ip, bl.suffix); } @@ -339,9 +411,7 @@ map::iterator f = priv.checked.find(dp); if (f == priv.checked.end()) { // have not checked this list yet - pthread_mutex_lock(&resolve_mutex); st = check_single(priv.ip, *dp); - pthread_mutex_unlock(&resolve_mutex); rejectlist = dp; priv.checked[dp] = st; } @@ -356,6 +426,30 @@ //////////////////////////////////////////////// +// check the dnsbls specified for this recipient +// +static status check_urls(mlfiPriv &priv, char *&url, int &ip); +static status check_urls(mlfiPriv &priv, char *&url, int &ip) { + CONFIG &dc = *priv.pc; + if (!dc.content_suffix) return oksofar; + int count = 0; + for (string_set::iterator i=priv.urls.begin(); i!=priv.urls.end(); i++) { + count++; + if (count > 20) break; // silly to check too many urls + url = *i; + char buf[200]; + snprintf(buf, sizeof(buf), "looking for url %s", url); + my_syslog(buf); + ip = protected_dns_interface(url); + if (ip) { + status st = check_single(ip, dc.content_suffix); + if (st == reject) return st; + } + } +} + + +//////////////////////////////////////////////// // start of sendmail milter interfaces // sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr) @@ -419,7 +513,7 @@ // reject the recipient based on some dnsbl char adr[sizeof "255.255.255.255"]; adr[0] = '\0'; - const char *rc = inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr)); + inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr)); char buf[2000]; snprintf(buf, sizeof(buf), rejectlist->message, adr, adr); smfi_setreply(ctx, "550", "5.7.1", buf); @@ -432,17 +526,65 @@ } else { // accept the recipient + if (st == oksofar) { + // but remember the non-whites + priv.non_whites.insert(strdup(rcptaddr)); + priv.only_whites = false; + } + if (st == white) { + priv.have_whites = true; + } return SMFIS_CONTINUE; } } -sfsistat mlfi_eom_or_abort(SMFICTX *ctx) +sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len) { mlfiPriv &priv = *MLFIPRIV; - if (priv.mailaddr) { - free(priv.mailaddr); - priv.mailaddr = NULL; + if (priv.authenticated) return SMFIS_CONTINUE; + if (priv.only_whites) return SMFIS_CONTINUE; + priv.scanner->scan(data, len); } + +sfsistat mlfi_eom(SMFICTX *ctx) +{ + sfsistat rc; + mlfiPriv &priv = *MLFIPRIV; + char *url = NULL; + int ip; + // process end of message + if (priv.authenticated || + priv.only_whites || + (check_urls(priv, url, ip) == oksofar)) rc = SMFIS_CONTINUE; + else { + if (!priv.have_whites) { + // can reject the entire message + char adr[sizeof "255.255.255.255"]; + adr[0] = '\0'; + inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); + char buf[2000]; + snprintf(buf, sizeof(buf), priv.pc->content_message, url, adr); + smfi_setreply(ctx, "550", "5.7.1", buf); + rc = SMFIS_REJECT; + } + else { + // need to accept it but remove the recipients that don't want it + for (string_set::iterator i=priv.non_whites.begin(); i!=priv.non_whites.end(); i++) { + char *rcpt = *i; + smfi_delrcpt(ctx, rcpt); + } + rc = SMFIS_CONTINUE; + } + } + // reset for a new message on the same connection + mlfi_abort(ctx); + return rc; +} + +sfsistat mlfi_abort(SMFICTX *ctx) +{ + mlfiPriv &priv = *MLFIPRIV; + priv.reset(); return SMFIS_CONTINUE; } @@ -466,9 +608,9 @@ mlfi_envrcpt, // envelope recipient filter NULL, // header filter NULL, // end of header - NULL, // body block filter - mlfi_eom_or_abort, // end of message - mlfi_eom_or_abort, // message aborted + mlfi_body, // body block filter + mlfi_eom, // end of message + mlfi_abort, // message aborted mlfi_close, // connection cleanup }; @@ -610,7 +752,8 @@ static void load_conf(CONFIG &dc, char *fn) { dc.config_files.push_back(fn); map commands; - enum {dummy, dnsbl, dnsbll, envfrom, envto, include, includedcc}; + enum {dummy, content, dnsbl, dnsbll, envfrom, envto, include, includedcc}; + commands["content" ] = content; commands["dnsbl" ] = dnsbl; commands["dnsbl_list" ] = dnsbll; commands["env_from" ] = envfrom; @@ -635,6 +778,23 @@ // have a decent command bool processed = false; switch (commands[cmd]) { + case content: { + char *suff = strtok(NULL, delim); + if (!suff) break; // no dns suffic + char *msg = suff + strlen(suff); + if ((msg - line) >= strlen(orig)) break; // line ended with the dns suffix + msg = strchr(msg+1, '\''); + if (!msg) break; // no reply message template + msg++; // move over the leading ' + if ((msg - line) >= strlen(orig)) break; // line ended with the leading quote + char *last = strchr(msg, '\''); + if (!last) break; // no trailing quote + *last = '\0'; // make it a null terminator + dc.content_suffix = register_string(suff); + dc.content_message = register_string(msg); + processed = true; + } break; + case dnsbl: { // have a new dnsbl to use char *name = next_token(delim); diff -r 93ff6d1ef647 -r dbe18921f741 src/package --- a/src/package Thu Apr 22 08:38:07 2004 -0700 +++ b/src/package Thu Apr 22 11:25:45 2004 -0700 @@ -6,7 +6,7 @@ mv -f dnsbl.conf dnsbl.conf.save mv sample.conf dnsbl.conf - tar cfvz $target1 dnsbl.cpp dnsbl.conf install.bash LICENSE + tar cfvz $target1 dnsbl.cpp scanner.cpp dnsbl.conf dnsbl.rc install.bash LICENSE mv dnsbl.conf sample.conf mv dnsbl.conf.save dnsbl.conf diff -r 93ff6d1ef647 -r dbe18921f741 src/scanner.cpp --- a/src/scanner.cpp Thu Apr 22 08:38:07 2004 -0700 +++ b/src/scanner.cpp Thu Apr 22 11:25:45 2004 -0700 @@ -1,30 +1,7 @@ -// normal stuff -#include -#include - -// needed for std c++ collections -#include -#include -#include - -// for the dns resolver -#include -#include -#include - -// misc stuff needed here -#include -#include - -static char* version="$Id$"; +static char* scanner_version="$Id$"; using namespace std; -enum status {oksofar, // not rejected yet - white, // whitelisted by envelope from - black, // blacklisted by envelope from or to - reject}; // rejected by a dns list - enum state {// url decoder states u_init, u_http, @@ -841,23 +818,25 @@ 0, // 0xff }; -#define PENDING_LIMIT 1000 +#define PENDING_LIMIT 100 struct fsa { u_char pending[PENDING_LIMIT]; int count; state st; state init; fsa* next; + string_set *urls; - fsa(state init, fsa* next_); + fsa(state init, fsa* next_, string_set *urls_); void push(u_char *buf, int len); }; -fsa::fsa(state init_, fsa* next_) { +fsa::fsa(state init_, fsa *next_, string_set *urls_) { count = 0; st = init_; init = init_; next = next_; + urls = urls_; } void fsa::push(u_char *buf, int len) { @@ -884,7 +863,9 @@ case u_reco: { pending[count-1] = 0; - if (strncasecmp((const char *)pending, "http://", 7) == 0) fprintf(stdout, "%s\n", pending); + if (strncasecmp((const char *)pending, "http://", 7) == 0) { + urls->insert(strdup((const char *)pending+7)); + } } // fall thru case u_init: { @@ -979,75 +960,32 @@ } } - +struct url_scanner { + fsa *urls_parser; + fsa *html_parser; + fsa *mime_parser; + fsa *b64_parser; -//////////////////////////////////////////////// -// ask a dns question and get an A record answer -// -static unsigned long dns_interface(char *question); -static unsigned long dns_interface(char *question) { - u_char answer[NS_PACKETSZ]; - int length = res_search(question, ns_c_in, ns_t_a, answer, sizeof(answer)); - if (length < 0) return oksofar; // error in getting answer - // parse the answer - ns_msg handle; - ns_rr rr; - if (ns_initparse(answer, length, &handle) != 0) return oksofar; - int rrnum = 0; - while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_a) { - unsigned long address; - memcpy(&address, ns_rr_rdata(rr), sizeof(address)); - return reject; - } - } - return 0; + url_scanner(string_set *urls); + ~url_scanner(); + void scan(u_char *buffer, size_t length); +}; + +url_scanner::url_scanner(string_set *urls) { + urls_parser = new fsa(u_init, NULL, urls); + html_parser = new fsa(e_init, urls_parser, NULL); + mime_parser = new fsa(m_init, html_parser, NULL); + b64_parser = new fsa(b_init, mime_parser, NULL); } -//////////////////////////////////////////////// -// check a single dnsbl - we don't try very hard, just -// using the default resolver retry settings. If we cannot -// get an answer, we just accept the mail. The caller -// must ensure thread safety. -// -static status check_single(int ip, char *suffix); -static status check_single(int ip, char *suffix) { - // make a dns question - const u_char *src = (const u_char *)&ip; - if (src[0] == 127) return oksofar; // don't do dns lookups on localhost - char question[NS_MAXDNAME]; - snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix); - // ask the question, if we get an A record it implies a blacklisted ip address - unsigned long ans = dns_interface(question); - return (ans) ? reject : oksofar; +url_scanner::~url_scanner() { + delete urls_parser; + delete html_parser; + delete mime_parser; + delete b64_parser; } - -//////////////////////////////////////////////// -// scan a file for URLs -// -static void scan_file(char *fn, fsa& parser); -static void scan_file(char *fn, fsa& parser) { - const int LINE_SIZE = 2000; - char line[LINE_SIZE]; - ifstream is(fn); - while (!is.eof()) { - is.getline(line, LINE_SIZE-1); - int n = strlen(line); - line[n++] = '\n'; - parser.push((u_char*)line, n); - } - is.close(); +void url_scanner::scan(u_char *buffer, size_t length) { + b64_parser->push(buffer, length); } - -int main(int argc, char**argv) -{ - char *fn = argv[1]; - fsa *urls_parser = new fsa(u_init, NULL); - fsa *html_parser = new fsa(e_init, urls_parser); - fsa *mime_parser = new fsa(m_init, html_parser); - fsa *b64_parser = new fsa(b_init, mime_parser); - if (fn) scan_file(fn, *b64_parser); - return 0; -} diff -r 93ff6d1ef647 -r dbe18921f741 src/test2.bash --- a/src/test2.bash Thu Apr 22 08:38:07 2004 -0700 +++ b/src/test2.bash Thu Apr 22 11:25:45 2004 -0700 @@ -20,9 +20,9 @@ #./scanner /var/dcc/log/msg.1Q2cYs #base64 test #./scanner /var/dcc/log/msg.1Q2bD7 #html entity test -for i in /var/dcc/log/*; do +#for i in /var/dcc/log/*; do +for i in /tmp/dcc/msg*; do echo '**********************' echo $i - sleep 2 - ./scanner $i | less + ./scanner $i done diff -r 93ff6d1ef647 -r dbe18921f741 test.bash --- a/test.bash Thu Apr 22 08:38:07 2004 -0700 +++ b/test.bash Thu Apr 22 11:25:45 2004 -0700 @@ -1,7 +1,16 @@ #!/bin/bash g++ -c dnsbl.cpp +if [ $? -ne 0 ]; then + echo "compiler errors" + exit +fi g++ -o dnsbl dnsbl.o /usr/lib/libresolv.a -lmilter -pthread +if [ $? -ne 0 ]; then + echo "linker errors" + exit +fi + if [ "$1" == "comp" ]; then exit fi @@ -16,17 +25,20 @@ chmod 700 /var/run/dnsbl mv -f $pid $pid.save rm -f /var/run/dnsbl/dnsbl.sock -cp /var/dnsbl/*conf . -./dnsbl -c -p local:/var/run/dnsbl/dnsbl.sock >check.txt 2>&1 -#sleep 5 -#P2=`cat $pid` -#mv -f $pid.save $pid -#echo started dnsbl milter as process $P2 -# -#/usr/lib/sendmail -bd -Ctest.cf -Ldnsbl -#sleep 5 -#P3=`head -1 /var/run/sm-test.pid` -#echo started sendmail as process $P3 -# -#echo eventually "'"kill -KILL $P2 $P3"'" +dir=`pwd` +pushd /var/dnsbl +echo "$dir/dnsbl -p local:/var/run/dnsbl/dnsbl.sock" +$dir/dnsbl -p local:/var/run/dnsbl/dnsbl.sock +popd +sleep 5 +P2=`cat $pid` +mv -f $pid.save $pid +echo started dnsbl milter as process $P2 +/usr/lib/sendmail -bd -Ctest.cf -Ldnsbl +sleep 5 +P3=`head -1 /var/run/sm-test.pid` +echo started sendmail as process $P3 + +echo eventually "'"kill -KILL $P2 $P3"'" + diff -r 93ff6d1ef647 -r dbe18921f741 xml/sample.conf --- a/xml/sample.conf Thu Apr 22 08:38:07 2004 -0700 +++ b/xml/sample.conf Thu Apr 22 11:25:45 2004 -0700 @@ -4,6 +4,13 @@ # tokens are separated by spaces or tabs # # +# content: +# second token is the dns suffix used for the actual lookups +# third token? is a string enclosed in single quotes, so it +# is not really a token. This is the error message, with +# up to two %s parameters for the url and the client ip +# address. +# # dnsbl: # second token is the name of this dnsbl # third token is the dns suffix used for the actual lookups @@ -72,6 +79,8 @@ # define the dnsbls to use # # +content sbl-xbl.spamhaus.org 'Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s' +# dnsbl LOCAL blackholes.five-ten-sg.com 'Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s' dnsbl SPEWS blackholes.spews.org 'Mail from %s rejected - spews; see http://www.spews.org/ask.cgi?x=%s' dnsbl SBL sbl-xbl.spamhaus.org 'Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s'