Mercurial > dnsbl
changeset 76:81f1e400e8ab
start coding on new config syntax
author | carl |
---|---|
date | Sat, 16 Jul 2005 13:47:19 -0700 |
parents | 1142e46be550 |
children | 8487650c98ee |
files | clients.conf install.bash sendmail.st src/context.cpp src/context.h src/dnsbl.cpp src/dnsbl.h src/scanner.cpp src/scanner.h test.bash xml/dnsbl.in xml/sample.conf |
diffstat | 12 files changed, 311 insertions(+), 171 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clients.conf Sat Jul 16 13:47:19 2005 -0700 @@ -0,0 +1,6 @@ +include "510sg.conf"; +include "ams.conf"; +include "davd.conf"; +include "mbmg.conf"; +include "neuro.conf"; +include "pmg.conf";
--- a/install.bash Wed Jul 13 23:04:14 2005 -0700 +++ b/install.bash Sat Jul 16 13:47:19 2005 -0700 @@ -11,12 +11,12 @@ ##################### # build the milter # add compiler flags - suggested by Nigel Horne -g++ -c $CXXFLAGS -pthread dnsbl.cpp +g++ -c $CXXFLAGS -pthread dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp if [ $? -ne 0 ]; then echo "compiler errors" exit fi -g++ -o dnsbl dnsbl.o /usr/lib/libresolv.a -lmilter -pthread +g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o /usr/lib/libresolv.a -lmilter -pthread if [ $? -ne 0 ]; then echo "linker errors" exit @@ -35,8 +35,26 @@ rm /var/dnsbl/dnsbl # remove the old binary rmdir /var/dnsbl fi -if [ ! -f $DST/dnsbl.conf ]; then +CONF=$DST/dnsbl.conf +if [ -f $CONF [; then + grep $CONF '^context' >/dev/null + if [ $? -eq 1 ]; then + # config file exists, but it is for the older version + # preserve it and start over + suf=4.old + for i in dnsbl hosts-ignore html-tags tld; do + j=$DST/$i.conf + if [ -f $j ]; then + mv -f $j $j.$suf + fi + done + fi +fi +if [ ! -f $CONF ]; then cp dnsbl.conf $DST + cp hosts-ignore.conf $DST + cp html-tags.conf $DST + cp tld.conf $DST fi if [ ! -f $DST/hosts-ignore.conf ]; then cp hosts-ignore.conf $DST
--- a/src/context.cpp Wed Jul 13 23:04:14 2005 -0700 +++ b/src/context.cpp Sat Jul 16 13:47:19 2005 -0700 @@ -61,6 +61,7 @@ CONFIG::~CONFIG() { + if (debug_syslog) my_syslog("config::~config destructor"); for (context_list::iterator i=contexts.begin(); i!=contexts.end(); i++) { CONTEXT *c = *i; delete c; @@ -81,6 +82,20 @@ context_map::iterator i = env_to.find(to); if (i != env_to.end()) { CONTEXTP c = (*i).second; + int s = strlen(to); + bool at = s && (to[s-1] == '@'); + if (at && con->is_parent(c->get_parent())) { + if (debug_syslog) { + char oldname[maxlen]; + char newname[maxlen]; + char *oldn = c->get_full_name(oldname, maxlen); + char *newn = con->get_full_name(newname, maxlen); + char buf[maxlen*3]; + snprintf(buf, maxlen*3, "both %s and %s claim envelope to %s, the first one wins", oldn, newn, to); + my_syslog(buf); + } + return; // don't take over user@ entries from your ancestors children + } if ((c != con) && (c != con->get_parent())) { char oldname[maxlen]; char newname[maxlen]; @@ -145,6 +160,12 @@ CONTEXT::~CONTEXT() { + if (debug_syslog) { + char buf[maxlen]; + char msg[maxlen]; + snprintf(msg, maxlen, "context::~context %s destructor", get_full_name(buf,maxlen)); + my_syslog(msg); + } for (dnsblp_map::iterator i=dnsbl_names.begin(); i!=dnsbl_names.end(); i++) { DNSBLP d = (*i).second; // delete the underlying DNSBL objects. @@ -153,6 +174,13 @@ } +bool CONTEXT::is_parent(CONTEXTP p) { + if (p == parent) return true; + if (!parent) return false; + return parent->is_parent(p); +} + + char *CONTEXT::get_full_name(char *buffer, int size) { if (!parent) return name; char buf[maxlen]; @@ -174,23 +202,27 @@ char *CONTEXT::find_from(char *from) { + char *rc = token_inherit; string_map::iterator i = env_from.find(from); - if (i != env_from.end()) return (*i).second; // found user@domain.tld key + if (i != env_from.end()) rc = (*i).second; // found user@domain.tld key + else { char *x = strchr(from, '@'); if (x) { x++; i = env_from.find(x); - if (i != env_from.end()) return (*i).second; // found domain.tld key + if (i != env_from.end()) rc = (*i).second; // found domain.tld key + else { char y = *x; *x = '\0'; i = env_from.find(from); *x = y; - if (i != env_from.end()) return (*i).second; // found user@ key + if (i != env_from.end()) rc = (*i).second; // found user@ key + } + } } - if ((env_from_default == token_inherit) && parent) { - return parent->find_from(from); - } - return (env_from_default == token_inherit) ? token_unknown : env_from_default; + if (rc == token_inherit) rc = env_from_default; + if ((rc == token_inherit) && parent) return parent->find_from(from); + return (rc == token_inherit) ? token_unknown : rc; } @@ -227,6 +259,42 @@ } +char* CONTEXT::get_content_suffix() { + if (!content_suffix && parent) return parent->get_content_suffix(); + return content_suffix; +} + + +char* CONTEXT::get_content_message() { + if (!content_message && parent) return parent->get_content_message(); + return content_message; +} + + +string_set& CONTEXT::get_content_host_ignore() { + if (content_host_ignore.empty() && parent) return parent->get_content_host_ignore(); + return content_host_ignore; +} + + +string_set& CONTEXT::get_content_tlds() { + if (content_tlds.empty() && parent) return parent->get_content_tlds(); + return content_tlds; +} + + +string_set& CONTEXT::get_html_tags() { + if (html_tags.empty() && parent) return parent->get_html_tags(); + return html_tags; +} + + +dnsblp_list& CONTEXT::get_dnsbl_list() { + if (dnsbl_list.empty() && parent) return parent->get_dnsbl_list(); + return dnsbl_list; +} + + bool CONTEXT::acceptable_content(recorder &memory, char *&msg) { if (memory.excessive_bad_tags(tag_limit)) { msg = tag_limit_message; @@ -427,7 +495,6 @@ // bool parse_content(TOKEN &tok, CONFIG &dc, CONTEXT &me); bool parse_content(TOKEN &tok, CONFIG &dc, CONTEXT &me) { - bool topdefault = (!me.get_parent()) && (!dc.default_context); char *setting = tok.next(); if (setting == token_on) { me.set_content_filtering(true); @@ -446,25 +513,18 @@ if (have == token_filter) { char *suffix = tok.next(); char *messag = tok.next(); - if (topdefault) { me.set_content_suffix(suffix); me.set_content_message(messag); - } if (!tsa(tok, token_semi)) return false; - if (!topdefault) tok.token_error("content filters may only be speciried in the top default context"); } else if (have == token_ignore) { if (!tsa(tok, token_lbrace)) return false; while (true) { if (!have) break; char *have = tok.next(); - if (have == token_rbrace) { - break; // done - } - else { + if (have == token_rbrace) break; // done me.add_ignore(have); } - } if (!tsa(tok, token_semi)) return false; } else if (have == token_tld) { @@ -472,15 +532,10 @@ while (true) { char *have = tok.next(); if (!have) break; - if (have == token_rbrace) { - break; // done - } - else { - if (topdefault) me.add_tld(have); - } + if (have == token_rbrace) break; // done + me.add_tld(have); } if (!tsa(tok, token_semi)) return false; - if (!topdefault) tok.token_error("tld values may only be specified in the top default context"); } else if (have == token_html_limit) { have = tok.next(); @@ -507,11 +562,10 @@ break; // done } else { - if (topdefault) me.add_tag(have); + me.add_tag(have); } } if (!tsa(tok, token_semi)) return false; - if (!topdefault) tok.token_error("html tags may only be specified in the top default context"); } else if (have == token_host_limit) { have = tok.next();
--- a/src/context.h Wed Jul 13 23:04:14 2005 -0700 +++ b/src/context.h Sat Jul 16 13:47:19 2005 -0700 @@ -58,6 +58,7 @@ CONTEXT(CONTEXTP parent_, char *name_); ~CONTEXT(); CONTEXTP get_parent() {return parent;}; + bool is_parent(CONTEXTP p); // is p a parent of this? char* get_full_name(char *buf, int size); void add_context(CONTEXTP child) {children[child->name] = child;}; bool allow_env_to(char *to) {return (parent) ? parent->cover_env_to(to) : true;}; @@ -88,15 +89,15 @@ void add_dnsbl(DNSBLP dns) {dnsbl_list.push_back(dns);}; DNSBLP find_dnsbl(char *name); + bool get_content_filtering() {return content_filtering;}; int get_host_limit() {return host_limit;}; bool get_host_random() {return host_random;}; - char* get_content_suffix() {return content_suffix;}; - char* get_content_message() {return content_message;}; - string_set& get_content_host_ignore() {return content_host_ignore;}; - string_set& get_content_tlds() {return content_tlds;}; - string_set& get_html_tags() {return html_tags;}; - dnsblp_list& get_dnsbl_list() {return dnsbl_list;}; - bool get_content_filtering() {return content_filtering;}; + char* get_content_suffix(); + char* get_content_message(); + string_set& get_content_host_ignore(); + string_set& get_content_tlds(); + string_set& get_html_tags(); + dnsblp_list& get_dnsbl_list(); bool acceptable_content(recorder &memory, char *&msg); bool ignore_host(char *host); @@ -122,13 +123,6 @@ void add_context(CONTEXTP con); void add_to(char *to, CONTEXTP con); CONTEXTP find_context(char *to); - - char* get_content_suffix() {return default_context->get_content_suffix() ;}; - char* get_content_message() {return default_context->get_content_message() ;}; - string_set& get_content_host_ignore() {return default_context->get_content_host_ignore() ;}; - string_set& get_content_tlds() {return default_context->get_content_tlds() ;}; - string_set& get_html_tags() {return default_context->get_html_tags() ;}; - void dump(); };
--- a/src/dnsbl.cpp Wed Jul 13 23:04:14 2005 -0700 +++ b/src/dnsbl.cpp Sat Jul 16 13:47:19 2005 -0700 @@ -12,23 +12,15 @@ -t sec The timeout value. -c Check the config, and print a copy to stdout. Don't start the milter or do anything with the socket. +-s Stress test by loading and deleting the current config in a loop. -d Add debug syslog entries +-e f|t Print the results of looking up from address f and to address + t in the current config TODO: -1) Add config for max_recipients for each mail domain. Recipients in -excess of that limit will be rejected, and the entire data will be -rejected if it is sent. -2) Add config for poison addresses. If any recipient is poison, all -recipients are rejected even if they would be whitelisted, and the -data is rejected if sent. - -3) Add option to only allow one recipient if the return path is empty. - -4) Check if the envelope from domain name primary MX points 127.0.0.0/8 - -5) Add option for using smtp connections to verify addresses from backup +1) Add option for using smtp connections to verify addresses from backup mx machines. This allows the backup mx to learn the valid addresses on the primary machine. @@ -95,6 +87,7 @@ bool loader_run = true; // used to stop the config loader thread CONFIG * config = NULL; // protected by the config_mutex int generation = 0; // protected by the config_mutex +const int maxlen = 1000; // used for snprintf buffers pthread_mutex_t config_mutex; pthread_mutex_t syslog_mutex; @@ -163,6 +156,7 @@ char *x = (*i).first; free(x); } + cm.clear(); } @@ -241,8 +235,11 @@ authenticated = false; have_whites = false; only_whites = true; - memory = new recorder(this, pc->get_html_tags(), pc->get_content_tlds()); - scanner = new url_scanner(memory); + memory = NULL; + scanner = NULL; + content_suffix = NULL; + content_message = NULL; + content_host_ignore = NULL; } mlfiPriv::~mlfiPriv() { @@ -254,20 +251,25 @@ } void mlfiPriv::reset(bool final) { + if (debug_syslog) my_syslog(this, "mlfiPriv::reset"); if (mailaddr) free(mailaddr); if (queueid) free(queueid); discard(env_to); - delete memory; - delete scanner; + if (memory) delete memory; + if (scanner) delete scanner; if (!final) { mailaddr = NULL; queueid = NULL; authenticated = false; have_whites = false; only_whites = true; - memory = new recorder(this, pc->get_html_tags(), pc->get_content_tlds()); - scanner = new url_scanner(memory); + memory = NULL; + scanner = NULL; + content_suffix = NULL; + content_message = NULL; + content_host_ignore = NULL; } + if (debug_syslog) my_syslog("mlfiPriv::reset exit"); } void mlfiPriv::get_fd() { @@ -368,7 +370,18 @@ } void mlfiPriv::need_content_filter(char *rcpt, CONTEXT &con) { + if (debug_syslog) my_syslog(this, "need_content_filter"); register_string(env_to, rcpt, &con); + if (!memory) { + // first recipient that needs content filtering sets all + // the content filtering parameters + memory = new recorder(this, con.get_html_tags(), con.get_content_tlds()); + scanner = new url_scanner(memory); + content_suffix = con.get_content_suffix(); + content_message = con.get_content_message(); + content_host_ignore = &con.get_content_host_ignore(); + } + if (debug_syslog) my_syslog(this, "need_content_filter exit"); } #define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx)) @@ -378,7 +391,7 @@ // syslog a message // void my_syslog(mlfiPriv *priv, char *text) { - char buf[1000]; + char buf[maxlen]; if (priv) { snprintf(buf, sizeof(buf), "%s: %s", priv->queueid, text); text = buf; @@ -390,6 +403,8 @@ syslog_opened = true; } syslog(LOG_NOTICE, "%s", text); + closelog(); + syslog_opened = false; pthread_mutex_unlock(&syslog_mutex); } else { @@ -644,8 +659,8 @@ bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int ip) { CONFIG &dc = *priv.pc; string_set &hosts = priv.memory->get_hosts(); - string_set &ignore = dc.get_content_host_ignore(); - char *suffix = dc.get_content_suffix(); + string_set &ignore = *priv.content_host_ignore; + char *suffix = priv.content_suffix; int count = 0; int cnt = hosts.size(); // number of hosts we could look at @@ -662,7 +677,7 @@ if ((cnt > limit) && (limit > 0) && random) { int r = rand() % cnt; if (r >= limit) { - char buf[1000]; + char buf[maxlen]; snprintf(buf, sizeof(buf), "host %s skipped", host); my_syslog(&priv, buf); continue; @@ -671,7 +686,7 @@ count++; ip = dns_interface(priv, host, true, &nameservers); if (debug_syslog) { - char buf[1000]; + char buf[maxlen]; if (ip) { char adr[sizeof "255.255.255.255"]; adr[0] = '\0'; @@ -704,7 +719,7 @@ ip = (*i).second; if (!ip) ip = dns_interface(priv, host, false, NULL); if (debug_syslog) { - char buf[200]; + char buf[maxlen]; if (ip) { char adr[sizeof "255.255.255.255"]; adr[0] = '\0'; @@ -724,7 +739,7 @@ string_map::iterator j = nameservers.ns_host.find(host); if (j != nameservers.ns_host.end()) { char *refer = (*j).second; - char buf[1000]; + char buf[maxlen]; snprintf(buf, sizeof(buf), "%s with nameserver %s", refer, host); host = register_string(hosts, buf); // put a copy into hosts, and return that reference } @@ -762,6 +777,7 @@ // sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr) { + if (debug_syslog) my_syslog("mlfi_connect"); // allocate some private memory mlfiPriv *priv = new mlfiPriv; if (hostaddr->sa_family == AF_INET) { @@ -778,6 +794,7 @@ sfsistat mlfi_envfrom(SMFICTX *ctx, char **from) { mlfiPriv &priv = *MLFIPRIV; + if (debug_syslog) my_syslog(&priv, "mlfi_envfrom"); priv.mailaddr = to_lower_string(from[0]); priv.authenticated = (smfi_getsymval(ctx, "{auth_authen}") != NULL); return SMFIS_CONTINUE; @@ -787,11 +804,20 @@ { DNSBLP rejectlist = NULL; // list that caused the reject mlfiPriv &priv = *MLFIPRIV; + if (debug_syslog) my_syslog(&priv, "mlfi_envrcpt"); CONFIG &dc = *priv.pc; if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i")); char *rcptaddr = rcpt[0]; char *loto = to_lower_string(rcptaddr); - CONTEXT con = *(dc.find_context(loto)->find_context(priv.mailaddr)); + if (debug_syslog) my_syslog(&priv, "finding context"); + CONTEXT &con = *(dc.find_context(loto)->find_context(priv.mailaddr)); + if (debug_syslog) { + char buf[maxlen]; + char msg[maxlen]; + snprintf(msg, sizeof(msg), "from <%s> to <%s> using context %s", priv.mailaddr, loto, con.get_full_name(buf,maxlen)); + my_syslog(&priv, msg); + } + if (debug_syslog) my_syslog(&priv, "finding from value"); char *fromvalue = con.find_from(priv.mailaddr); free(loto); status st; @@ -803,6 +829,7 @@ } else { // check the dns based lists + if (debug_syslog) my_syslog(&priv, "checking dns lists"); st = (check_dnsbl(priv, con.get_dnsbl_list(), rejectlist)) ? black : oksofar; } if (st == reject) { @@ -810,7 +837,7 @@ char adr[sizeof "255.255.255.255"]; adr[0] = '\0'; inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr)); - char buf[2000]; + char buf[maxlen]; snprintf(buf, sizeof(buf), rejectlist->message, adr, adr); smfi_setreply(ctx, "550", "5.7.1", buf); return SMFIS_REJECT; @@ -822,6 +849,7 @@ } else { // accept the recipient + if (debug_syslog) my_syslog(&priv, "checking content filtering"); if (!con.get_content_filtering()) st = white; if (st == oksofar) { // but remember the non-whites @@ -838,6 +866,7 @@ sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len) { mlfiPriv &priv = *MLFIPRIV; + if (debug_syslog) my_syslog(&priv, "mlfi_body"); if (priv.authenticated) return SMFIS_CONTINUE; if (priv.only_whites) return SMFIS_CONTINUE; priv.scanner->scan(data, len); @@ -846,8 +875,10 @@ sfsistat mlfi_eom(SMFICTX *ctx) { + if (debug_syslog) my_syslog("mlfi_eom"); sfsistat rc; mlfiPriv &priv = *MLFIPRIV; + if (debug_syslog) my_syslog(&priv, "mlfi_eom"); CONFIG &dc = *priv.pc; char *host = NULL; int ip; @@ -855,6 +886,8 @@ // process end of message if (priv.authenticated || priv.only_whites) rc = SMFIS_CONTINUE; else { + // assert env_to not empty + char buf[maxlen]; char *msg = NULL; string_set alive; bool random = false; @@ -872,22 +905,21 @@ limit = max(limit, con.get_host_limit()); } } - bool rejecting = alive.empty(); + bool rejecting = alive.empty(); // if alive is empty, we must have set msg above in acceptable_content() if (!rejecting) { - rejecting = check_hosts(priv, random, limit, host, ip); - if (rejecting) { - static char buf[2000]; + if (check_hosts(priv, random, limit, host, ip)) { char adr[sizeof "255.255.255.255"]; adr[0] = '\0'; inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); - snprintf(buf, sizeof(buf), dc.get_content_message(), host, adr); + snprintf(buf, sizeof(buf), priv.content_message, host, adr); msg = buf; + rejecting = true; } } if (!rejecting) { rc = SMFIS_CONTINUE; } - else if (!priv.have_whites && alive.empty()) { + else if (!priv.have_whites) { // can reject the entire message smfi_setreply(ctx, "550", "5.7.1", msg); rc = SMFIS_REJECT; @@ -909,6 +941,7 @@ sfsistat mlfi_abort(SMFICTX *ctx) { mlfiPriv &priv = *MLFIPRIV; + if (debug_syslog) my_syslog(&priv, "mlfi_abort"); priv.reset(); return SMFIS_CONTINUE; } @@ -916,6 +949,7 @@ sfsistat mlfi_close(SMFICTX *ctx) { mlfiPriv *priv = MLFIPRIV; + if (debug_syslog) my_syslog(priv, "mlfi_close"); if (!priv) return SMFIS_CONTINUE; delete priv; smfi_setpriv(ctx, NULL); @@ -949,7 +983,7 @@ pthread_mutex_lock(&config_mutex); newc->generation = generation++; pthread_mutex_unlock(&config_mutex); - char buf[200]; + char buf[maxlen]; snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation); my_syslog(buf); if (load_conf(*newc, "dnsbl.conf")) { @@ -996,7 +1030,7 @@ for (configp_set::iterator i=old_configs.begin(); i!=old_configs.end(); ) { CONFIG *old = *i; if (!old->reference_count) { - char buf[200]; + char buf[maxlen]; snprintf(buf, sizeof(buf), "freeing memory for old configuration generation %d", old->generation); my_syslog(buf); delete old; // destructor does all the work @@ -1133,6 +1167,7 @@ if (check) { use_syslog = false; + debug_syslog = true; CONFIG *conf = new_conf(); if (conf) { conf->dump(); @@ -1167,7 +1202,6 @@ CONFIG *conf = new_conf(); if (conf) { CONTEXTP con = conf->find_context(to); - const int maxlen = 1000; char buf[maxlen]; fprintf(stdout, "envelope to <%s> finds context %s\n", to, con->get_full_name(buf,maxlen)); CONTEXTP fc = con->find_context(from);
--- a/src/dnsbl.h Wed Jul 13 23:04:14 2005 -0700 +++ b/src/dnsbl.h Sat Jul 16 13:47:19 2005 -0700 @@ -28,6 +28,10 @@ context_map env_to; // map each non-whitelisted recipient to their filtering context recorder *memory; // memory for the content scanner url_scanner *scanner; // object to handle body scanning + char *content_suffix; // content filtering parameters + char *content_message; // "" + string_set *content_host_ignore; // "" + mlfiPriv(); ~mlfiPriv();
--- a/src/scanner.cpp Wed Jul 13 23:04:14 2005 -0700 +++ b/src/scanner.cpp Sat Jul 16 13:47:19 2005 -0700 @@ -1148,9 +1148,6 @@ bad_html_tags = 0; binary_tags = 0; } -recorder::~recorder() { - empty(); -} void recorder::empty() { bad_html_tags = 0; binary_tags = 0;
--- a/src/scanner.h Wed Jul 13 23:04:14 2005 -0700 +++ b/src/scanner.h Sat Jul 16 13:47:19 2005 -0700 @@ -17,7 +17,7 @@ public: recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_); - ~recorder(); + ~recorder() { empty(); }; void empty(); void new_url(char *host); void new_tag(char *tag);
--- a/test.bash Wed Jul 13 23:04:14 2005 -0700 +++ b/test.bash Sat Jul 16 13:47:19 2005 -0700 @@ -18,6 +18,8 @@ ########################### # compile the milter # + +if [ "$1" == "build" ]; then rm -f dnsbl.o scanner.o context.o tokenizer.o g++ -c -pthread dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp if [ $? -ne 0 ]; then @@ -29,6 +31,7 @@ echo "linker errors" exit fi +fi if [ "$1" == "comp" ]; then exit
--- a/xml/dnsbl.in Wed Jul 13 23:04:14 2005 -0700 +++ b/xml/dnsbl.in Sat Jul 16 13:47:19 2005 -0700 @@ -80,68 +80,70 @@ <p>DNSBL-LIST - a named list of DNSBLs that will be used for specific recipients or recipient domains. -<p>The envelope to email address is used to find an initial filtering context. -That context then uses the envelope from email address to find the final -filtering context. The envelope from email address is checked in that context -to see if we should whitelist or blacklist the message -two names (a named DNSBL-LIST, and a named ENVELOPE-FROM-MAP). If the -recipient is not found in the configuration, the named DEFAULT -dnsbl-list and DEFAULT envelope-from-map will be used. When mail is -received for that recipient, +<hr> <center>Filtering Procedure</center> + +<p>If the client has authenticated with sendmail, the mail is accepted, +the dns lists are not checked, and the body content is not scanned. +Otherwise, we follow these steps for each recipient. <ol> -<li>If the client has authenticated with sendmail, the mail is accepted, -the dns lists are not checked, and the body content is not scanned. - <li>The envelope to email address is used to find an initial filtering -context. We first look for a context that specified the full email address -in the env_to statement. If that is not found, we look for a context that -specified the entire domain name of the envelope recipient in the env_to -statement. If that is not found, we look for a context that specified the -user@ part of the envelope recipient in the env_to statement. If that is not -found, we use the first top level context defined in the config file. +context. We first look for a context that specified the full email +address in the env_to statement. If that is not found, we look for a +context that specified the entire domain name of the envelope recipient +in the env_to statement. If that is not found, we look for a context +that specified the user@ part of the envelope recipient in the env_to +statement. If that is not found, we use the first top level context +defined in the config file. -<li>The initial filtering context may redirect to a child context based -on the values in the initial context's env_from statement. We look for -[1) the full envelope from email address, 2) the domain name part of the -envelope from address, 3) the user@ part of the envelope from address] -in that context's env_from statement, with values that point to a child -context. If such an entry is found, we switch to that filtering -context. +<br><br><li>The initial filtering context may redirect to a child +context based on the values in the initial context's env_from statement. +We look for [1) the full envelope from email address, 2) the domain name +part of the envelope from address, 3) the user@ part of the envelope +from address] in that context's env_from statement, with values that +point to a child context. If such an entry is found, we switch to that +child filtering context. -<li>We lookup [1) the full envelope from email address, 2) the domain -name part of the envelope from address, 3) the user@ part of the +<br><br><li>We lookup [1) the full envelope from email address, 2) the +domain name part of the envelope from address, 3) the user@ part of the envelope from address] in the filtering context env_from statement. That results in one of (white, black, unknown, inherit). -<li>If the answer is black, mail to this recipient is rejected with "no -such user", and the dns lists are not checked. - -<li>If the answer is white, mail to this recipient is accepted and the -dns lists are not checked. +<br><br><li>If the answer is black, mail to this recipient is rejected +with "no such user", and the dns lists are not checked. -<li>If the answer is unknown, we don't reject yet, but the dns lists -will be checked, and the content may be scanned. +<br><br><li>If the answer is white, mail to this recipient is accepted +and the dns lists are not checked. -<li>If the answer is inherit, we repeat the envelope from search in the -parent context. +<br><br><li>If the answer is unknown, we don't reject yet, but the dns +lists will be checked, and the content may be scanned. -<li>The dns lists specified in the filtering context are checked and the -mail is rejected if any list has an A record for the standard dns based -lookup scheme (reversed octets of the client followed by the dns -suffix). +<br><br><li>If the answer is inherit, we repeat the envelope from search +in the parent context. -<li>If the mail has not been accepted or rejected yet, the body content -is optionally scanned for HTTP URLs (after base64, mime and html entity -decoding), and the first <configurable> host names are checked for -their presence on the SBL. If any host name is on the SBL, and it is -not on the "ignore" list, the mail is rejected. If we are doing body -content scanning, we also scan for excessive bad html tags, and if a -<configurable> limit is exceeded, the mail is rejected. +<br><br><li>The dns lists specified in the filtering context are checked +and the mail is rejected if any list has an A record for the standard +dns based lookup scheme (reversed octets of the client followed by the +dns suffix). + +<br><br><li>If the mail has not been accepted or rejected yet, and the +filtering context enables content filtering, and this is the first such +recipient in this smtp transaction, we set the content filtering parameters +from this context, and enable content filtering for this body. </ol> +<p>If content filtering is enabled for this body, the mail text is +decoded (uuencode, base64, mime, html entity, url encodings), scanned +for HTTP and HTTPS URLs, and the first <configurable> host names +are checked for their presence on the single <configurable> DNSBL. +The only known list that is suitable for this purpose is the SBL. If +any of those host names are on that DNSBL (or have nameservers that are +on that list), and it is not on the <configurable> ignore list, +the mail is rejected. We also scan for excessive bad html tags, and if +a <configurable> limit is exceeded, the mail is rejected. + <hr> <center>Sendmail access vs. DNSBL</center> <p>With the standard sendmail.mc dnsbl FEATURE, the dnsbl checks may be suppressed by entries in the /etc/mail/access database. For example, @@ -241,6 +243,34 @@ processes, 400 milter threads, and 400 dns resolver processes. Of course that steady state is very unlikely to happen. +<hr> <center>Rejected Ideas</center> + +<p>The following ideas have been considered and rejected. + +<p>Add max_recipients for each mail domain to the configuration. +Recipients in excess of that limit will be rejected, and all the +recipients in that domain will be removed if there are some other +whitelisted recipients. Current spammers *very* rarely send more than +ten recipients in a single smtp transaction, so this won't stop +any significant amount of spam. + +<p>Add poison addresses to the configuration. If any recipient is +poison, all recipients are rejected even if they would be whitelisted, +and the data is rejected if sent. I have a collection of spam trap +addresses that would be suitable for such use. Based on my log files, +any mail to those spam trap addresses is rejected based on either dnsbl +lookups or the DCC. So this won't result in blocking any additional +spam. + +<p>Add an option to only allow one recipient if the return path is +empty. Based on my log files, there is no mail that violates this +check. + +<p>Reject the mail if the envelope from domain name contains any MX +records pointing to 127.0.0.0/8. I don't see any significant amount of spam +sent with such domain names. + + <pre> $Id$ </pre>