Mercurial > dnsbl
changeset 75:1142e46be550
start coding on new config syntax
author | carl |
---|---|
date | Wed, 13 Jul 2005 23:04:14 -0700 |
parents | b7449114ebb0 |
children | 81f1e400e8ab |
files | ChangeLog Makefile.test dnsbl.conf dnsbl.spec.in hosts-ignore.conf package.bash src/context.cpp src/context.h src/dnsbl.cpp src/new.cpp src/scanner.cpp src/scanner.h src/tokenizer.cpp src/tokenizer.h xml/dnsbl.in xml/sample.conf |
diffstat | 16 files changed, 573 insertions(+), 1810 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog Sun Jul 10 14:19:00 2005 -0700 +++ b/ChangeLog Wed Jul 13 23:04:14 2005 -0700 @@ -1,5 +1,8 @@ $Id$ +5.0 2005-07-30 + Major changes to the syntax of the config file. + 4.6 2005-04-02 Fix enum compilation error on Fedora Core 3. Discovered by Nigel Horne <njh@bandsman.co.uk>
--- a/Makefile.test Sun Jul 10 14:19:00 2005 -0700 +++ b/Makefile.test Wed Jul 13 23:04:14 2005 -0700 @@ -15,14 +15,6 @@ userdb.db : userdb @makemap btree $@ < $< -access : access.header access.common access.510sg access.davd - cat access.header access.510sg access.common >access - cat access.header access.davd access.common | ssh mail3.davdgrp.com 'cat >/etc/mail/access' - ssh mail3.davdgrp.com '(cd /usr/src; sh makesendmailtable.bat)' - rm -f /home/httpd/html/510sg/bogus.list - cp access /home/httpd/html/510sg/bogus.list - scp access ns1:/home/httpd/html/510sg/bogus.list - %.db : % @makemap hash $@ < $<
--- a/dnsbl.conf Sun Jul 10 14:19:00 2005 -0700 +++ b/dnsbl.conf Wed Jul 13 23:04:14 2005 -0700 @@ -1,37 +1,60 @@ -############################################## -# content scanning parameters -# -content sbl-xbl.spamhaus.org 'Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s' -#host_limit 20 'Mail containing too many host names rejected' -host_soft_limit 20 -#html_limit 20 'Mail containing excessive bad html tags rejected' -include hosts-ignore.conf -include html-tags.conf -include tld.conf +context main { + dnsbl local blackholes.five-ten-sg.com "Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s"; + dnsbl sbl sbl-xbl.spamhaus.org "Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s"; + dnsbl xbl xbl.spamhaus.org "Mail from %s rejected - xbl; see http://www.spamhaus.org/query/bl?ip=%s"; + dnsbl_list local sbl; + content on { + filter sbl-xbl.spamhaus.org "Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s"; + ignore { include "hosts-ignore.conf"; }; + tld { include "tld.conf"; }; + html_tags { include "html-tags.conf"; }; + html_limit off; + host_limit soft 20; + }; -############################################## -# define the dnsbls to use -# -dnsbl LOCAL blackholes.five-ten-sg.com 'Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s' -#dnsbl SPEWS blackholes.spews.org 'Mail from %s rejected - spews; see http://www.spews.org/ask.cgi?x=%s' -dnsbl SBL sbl-xbl.spamhaus.org 'Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s' + env_to { + example.com; # !! replace this with your domain name + # child contexts are not allowed to specify recipient addresses outside these domains + }; + + context whitelist { + content off {}; + env_to { + # dcc_to ok { include "/var/dcc/whitecommon"; }; + }; + env_from white {}; # white forces all unmatched from addresses (everyone in this case) to be whitelisted + # so all mail TO these env_to addresses is accepted + }; - -############################################## -# define the (default and other) lists of dnsbls to use -# -dnsbl_list DEFAULT LOCAL SBL - + context abuse { + dnsbl_list xbl; + content off {}; + env_to { + abuse@ # no content filtering on abuse reports + postmaster@ # "" + }; + env_from unknown {}; # ignore all parent white/black listing + }; -############################################## -# define the (default and other) env_from maps -# + context minimal { + dnsbl_list sbl; + content on {}; + env_to { + }; + }; -############################################## -# specify dnsbl_lists and env_from maps to use for specific recipients -# + context blacklist { + env_to { + # dcc_to many { include "/var/dcc/whitecommon"; }; + }; + env_from black {}; # black forces all unmatched from addresses (everyone in this case) to be blacklisted + # so all mail TO these env_to addresses is rejected + }; -############################################## -# specify dnsbl_lists and env_from maps to use for clients domains -# + env_from unknown { + abuse@ abuse; # replies to abuse reports use the abuse context + # dcc_from { include "/var/dcc/whitecommon"; }; + }; +}; +
--- a/dnsbl.spec.in Sun Jul 10 14:19:00 2005 -0700 +++ b/dnsbl.spec.in Wed Jul 13 23:04:14 2005 -0700 @@ -1,6 +1,6 @@ Summary: DNSBL Sendmail Milter Name: dnsbl -Version: 4.6 +Version: 5.0 Release: 2 Copyright: GPL Group: System Environment/Daemons
--- a/package.bash Sun Jul 10 14:19:00 2005 -0700 +++ b/package.bash Wed Jul 13 23:04:14 2005 -0700 @@ -1,6 +1,6 @@ #!/bin/bash -VER=dnsbl-4.6 +VER=dnsbl-5.0 mkdir $VER target1=/home/httpd/html/510sg/util/dnsbl.tar.gz target2=/home/httpd/html/510sg/dnsbl.conf
--- a/src/context.cpp Sun Jul 10 14:19:00 2005 -0700 +++ b/src/context.cpp Wed Jul 13 23:04:14 2005 -0700 @@ -28,19 +28,22 @@ char *token_include; char *token_inherit; char *token_lbrace; +char *token_mailhost; char *token_many; char *token_off; +char *token_ok2; char *token_ok; -char *token_ok2; char *token_on; char *token_rbrace; char *token_semi; char *token_soft; +char *token_substitute; char *token_tld; char *token_unknown; char *token_white; string_set all_strings; // owns all the strings, only modified by the config loader thread +const int maxlen = 1000; DNSBL::DNSBL(char *n, char *s, char *m) { name = n; @@ -74,23 +77,39 @@ } -CONTEXTP CONFIG::find_context(char *to, char *from) { - CONTEXTP con = NULL; - context_map::iterator i = env_to.find(from); +void CONFIG::add_to(char *to, CONTEXTP con) { + context_map::iterator i = env_to.find(to); if (i != env_to.end()) { - con = (*i).second; - return con->find_from_context(from); + CONTEXTP c = (*i).second; + if ((c != con) && (c != con->get_parent())) { + char oldname[maxlen]; + char newname[maxlen]; + char *oldn = c->get_full_name(oldname, maxlen); + char *newn = con->get_full_name(newname, maxlen); + char buf[maxlen*3]; + snprintf(buf, maxlen*3, "both %s and %s claim envelope to %s, the second one wins", oldn, newn, to); + my_syslog(buf); } + } + env_to[to] = con; +} + + +CONTEXTP CONFIG::find_context(char *to) { + context_map::iterator i = env_to.find(to); + if (i != env_to.end()) return (*i).second; // found user@domain.tld key char *x = strchr(to, '@'); if (x) { x++; i = env_to.find(x); - if (i != env_to.end()) { - con = (*i).second; - return con->find_from_context(from); + if (i != env_to.end()) return (*i).second; // found domain.tld key + char y = *x; + *x = '\0'; + i = env_to.find(to); + *x = y; + if (i != env_to.end()) return (*i).second; // found user@ key } - } - return default_context->find_from_context(from); + return default_context; } @@ -101,6 +120,12 @@ CONTEXTP p = c->get_parent(); if (!p && (c != default_context)) c->dump(); } + char buf[maxlen]; + for (context_map::iterator i=env_to.begin(); i!=env_to.end(); i++) { + char *to = (*i).first; + CONTEXTP con = (*i).second; + printf("// envelope to %s \t-> context %s \n", to, con->get_full_name(buf,maxlen)); + } } @@ -111,10 +136,10 @@ content_filtering = (parent) ? parent->content_filtering : false; content_suffix = NULL; content_message = NULL; - host_limit = 0; + host_limit = (parent) ? parent->host_limit : 0; host_limit_message = NULL; - host_random = false; - tag_limit = 0; + host_random = (parent) ? parent->host_random : false; + tag_limit = (parent) ? parent->tag_limit : 0; tag_limit_message = NULL; } @@ -130,7 +155,6 @@ char *CONTEXT::get_full_name(char *buffer, int size) { if (!parent) return name; - const int maxlen = 1000; char buf[maxlen]; snprintf(buffer, size, "%s.%s", parent->get_full_name(buf, maxlen), name); return buffer; @@ -138,51 +162,51 @@ bool CONTEXT::cover_env_to(char *to) { - const int maxlen = 1000; char buffer[maxlen]; char *x = strchr(to, '@'); if (x) x++; else x = to; + if (*x == '\0') return true; // always allow covering addresses with no domain name, eg abuse@ string_set::iterator i = env_to.find(x); if (i != env_to.end()) return true; - return (parent) ? parent->cover_env_to(to) : false; + return false; } char *CONTEXT::find_from(char *from) { - // do we have a white/black/unknown for this full from value? string_map::iterator i = env_from.find(from); - if (i != env_from.end()) return (*i).second; - // do we have a white/black/unknown for the source domain name? + if (i != env_from.end()) return (*i).second; // found user@domain.tld key char *x = strchr(from, '@'); if (x) { x++; i = env_from.find(x); - if (i != env_from.end()) return (*i).second; + if (i != env_from.end()) return (*i).second; // found domain.tld key + char y = *x; + *x = '\0'; + i = env_from.find(from); + *x = y; + if (i != env_from.end()) return (*i).second; // found user@ key } if ((env_from_default == token_inherit) && parent) { return parent->find_from(from); } - return env_from_default; + return (env_from_default == token_inherit) ? token_unknown : env_from_default; } -CONTEXTP CONTEXT::find_from_context(char *from) { - // do we have a special child context for this full from value? - context_map::iterator j = env_from_context.find(from); - if (j != env_from_context.end()) { - CONTEXTP con = (*j).second; - return con->find_from_context(from); - } +CONTEXTP CONTEXT::find_context(char *from) { + context_map::iterator i = env_from_context.find(from); + if (i != env_from_context.end()) return (*i).second; // found user@domain.tld key char *x = strchr(from, '@'); if (x) { x++; - // do we have a special context for the source domain name? - j = env_from_context.find(x); - if (j != env_from_context.end()) { - CONTEXTP con = (*j).second; - return con->find_from_context(from); - } + i = env_from_context.find(x); + if (i != env_from_context.end()) return (*i).second; // found domain.tld key + char y = *x; + *x = '\0'; + i = env_from_context.find(from); + *x = y; + if (i != env_from_context.end()) return (*i).second; // found user@ key } return this; } @@ -216,12 +240,13 @@ void CONTEXT::dump(int level) { - const int maxlen = 1000; char indent[maxlen]; int i = min(maxlen-1, level*4); memset(indent, ' ', i); indent[i] = '\0'; - printf("%s context %s { \n", indent, name); + char buf[maxlen]; + char *fullname = get_full_name(buf,maxlen); + printf("%s context %s { \t// %s\n", indent, name, fullname); for (dnsblp_map::iterator i=dnsbl_names.begin(); i!=dnsbl_names.end(); i++) { char *n = (*i).first; @@ -276,10 +301,10 @@ printf("%s host_limit off; \n", indent); } if (tag_limit_message) { - printf("%s tag_limit on %d \"%s\"; \n", indent, tag_limit, tag_limit_message); + printf("%s html_limit on %d \"%s\"; \n", indent, tag_limit, tag_limit_message); } else { - printf("%s tag_limit off; \n", indent); + printf("%s html_limit off; \n", indent); } printf("%s }; \n", indent); } @@ -287,7 +312,7 @@ printf("%s content off {}; \n", indent, env_from_default); } - printf("%s env_to { \n", indent); + printf("%s env_to { \t// %s\n", indent, fullname); for (string_set::iterator i=env_to.begin(); i!=env_to.end(); i++) { printf("%s %s; \n", indent, *i); } @@ -298,7 +323,7 @@ c->dump(level+1); } - printf("%s env_from %s { \n", indent, env_from_default); + printf("%s env_from %s { \t// %s\n", indent, env_from_default, fullname); if (!env_from.empty()) { printf("%s // white/black/unknown \n", indent); for (string_map::iterator i=env_from.begin(); i!=env_from.end(); i++) { @@ -402,6 +427,7 @@ // bool parse_content(TOKEN &tok, CONFIG &dc, CONTEXT &me); bool parse_content(TOKEN &tok, CONFIG &dc, CONTEXT &me) { + bool topdefault = (!me.get_parent()) && (!dc.default_context); char *setting = tok.next(); if (setting == token_on) { me.set_content_filtering(true); @@ -418,9 +444,14 @@ char *have = tok.next(); if (!have) break; if (have == token_filter) { - me.set_content_suffix(tok.next()); - me.set_content_message(tok.next()); + char *suffix = tok.next(); + char *messag = tok.next(); + if (topdefault) { + me.set_content_suffix(suffix); + me.set_content_message(messag); + } if (!tsa(tok, token_semi)) return false; + if (!topdefault) tok.token_error("content filters may only be speciried in the top default context"); } else if (have == token_ignore) { if (!tsa(tok, token_lbrace)) return false; @@ -445,10 +476,11 @@ break; // done } else { - me.add_tld(have); + if (topdefault) me.add_tld(have); } } if (!tsa(tok, token_semi)) return false; + if (!topdefault) tok.token_error("tld values may only be specified in the top default context"); } else if (have == token_html_limit) { have = tok.next(); @@ -475,10 +507,11 @@ break; // done } else { - me.add_tag(have); + if (topdefault) me.add_tag(have); } } if (!tsa(tok, token_semi)) return false; + if (!topdefault) tok.token_error("html tags may only be specified in the top default context"); } else if (have == token_host_limit) { have = tok.next(); @@ -552,6 +585,17 @@ } } } + else if (have == token_substitute) { + if (tok.next() == token_mailhost) { + have = tok.next(); + if (keeping) { + if (me.allow_env_to(have)) { + me.add_to(have); + dc.add_to(have, &me); + } + } + } + } tok.skipeol(); } } @@ -560,7 +604,7 @@ dc.add_to(have, &me); } else { - tok.token_error("valid env_to address or domain name", have); + tok.token_error("user@ or user@domain.tld or domain.tld where domain.tld allowed by parent context", have); return false; } } @@ -573,7 +617,7 @@ bool parse_envfrom(TOKEN &tok, CONFIG &dc, CONTEXT &me); bool parse_envfrom(TOKEN &tok, CONFIG &dc, CONTEXT &me) { char *st = tok.next(); - if ((st == token_black) || (st == token_white) || (st == token_unknown)) { + if ((st == token_black) || (st == token_white) || (st == token_unknown) || (st == token_inherit)) { me.set_from_default(st); } else { @@ -615,6 +659,12 @@ me.add_from(have, (many) ? token_black : token_white); } } + else if (have == token_substitute) { + if (tok.next() == token_mailhost) { + have = tok.next(); + me.add_from(have, (many) ? token_black : token_white); + } + } tok.skipeol(); } } @@ -730,6 +780,7 @@ token_include = register_string("include"); token_inherit = register_string("inherit"); token_lbrace = register_string("{"); + token_mailhost = register_string("mail_host"); token_many = register_string("many"); token_off = register_string("off"); token_ok = register_string("ok"); @@ -738,6 +789,7 @@ token_rbrace = register_string("}"); token_semi = register_string(";"); token_soft = register_string("soft"); + token_substitute = register_string("substitute"); token_tld = register_string("tld"); token_unknown = register_string("unknown"); token_white = register_string("white");
--- a/src/context.h Sun Jul 10 14:19:00 2005 -0700 +++ b/src/context.h Wed Jul 13 23:04:14 2005 -0700 @@ -8,9 +8,7 @@ enum status {oksofar, // not rejected yet white, // whitelisted black, // blacklisted - reject, // rejected by a dns list - reject_tag, // too many bad html tags - reject_host}; // too many hosts/urls in body + reject}; // rejected by a dns list class DNSBL; class CONTEXT; @@ -70,7 +68,7 @@ void add_from_context(char *from, CONTEXTP con) {env_from_context[from] = con;}; void set_from_default(char *status) {env_from_default = status;}; char* find_from(char *from); - CONTEXTP find_from_context(char *from); + CONTEXTP find_context(char *from); CONTEXTP find_from_context_name(char *name); void set_content_filtering(bool filter) {content_filtering = filter;}; @@ -122,8 +120,8 @@ CONFIG(); ~CONFIG(); void add_context(CONTEXTP con); - void add_to(char *to, CONTEXTP con) {env_to[to] = con;}; - CONTEXTP find_context(char *to, char *from); + void add_to(char *to, CONTEXTP con); + CONTEXTP find_context(char *to); char* get_content_suffix() {return default_context->get_content_suffix() ;}; char* get_content_message() {return default_context->get_content_message() ;}; @@ -152,6 +150,7 @@ extern char *token_include; extern char *token_inherit; extern char *token_lbrace; +extern char *token_mailhost; extern char *token_many; extern char *token_off; extern char *token_ok; @@ -160,6 +159,7 @@ extern char *token_rbrace; extern char *token_semi; extern char *token_soft; +extern char *token_substitute; extern char *token_tld; extern char *token_unknown; extern char *token_white;
--- a/src/dnsbl.cpp Sun Jul 10 14:19:00 2005 -0700 +++ b/src/dnsbl.cpp Wed Jul 13 23:04:14 2005 -0700 @@ -91,6 +91,7 @@ bool debug_syslog = false; bool syslog_opened = false; +bool use_syslog = true; // false to printf bool loader_run = true; // used to stop the config loader thread CONFIG * config = NULL; // protected by the config_mutex int generation = 0; // protected by the config_mutex @@ -382,6 +383,7 @@ snprintf(buf, sizeof(buf), "%s: %s", priv->queueid, text); text = buf; } + if (use_syslog) { pthread_mutex_lock(&syslog_mutex); if (!syslog_opened) { openlog("dnsbl", LOG_PID, LOG_MAIL); @@ -390,6 +392,10 @@ syslog(LOG_NOTICE, "%s", text); pthread_mutex_unlock(&syslog_mutex); } + else { + printf("%s \n", text); + } +} void my_syslog(char *text) { my_syslog(NULL, text); @@ -584,7 +590,7 @@ bool check_single(mlfiPriv &priv, int ip, char *suffix) { // make a dns question const u_char *src = (const u_char *)&ip; - if (src[0] == 127) return oksofar; // don't do dns lookups on localhost + if (src[0] == 127) return false; // don't do dns lookups on localhost #ifdef NS_MAXDNAME char question[NS_MAXDNAME]; #else @@ -610,7 +616,7 @@ // bool check_dnsbl(mlfiPriv &priv, dnsblp_list &dnsbll, DNSBLP &rejectlist); bool check_dnsbl(mlfiPriv &priv, dnsblp_list &dnsbll, DNSBLP &rejectlist) { - if (priv.authenticated) return oksofar; + if (priv.authenticated) return false; for (dnsblp_list::iterator i=dnsbll.begin(); i!=dnsbll.end(); i++) { DNSBLP dp = *i; // non null by construction bool st; @@ -639,6 +645,7 @@ CONFIG &dc = *priv.pc; string_set &hosts = priv.memory->get_hosts(); string_set &ignore = dc.get_content_host_ignore(); + char *suffix = dc.get_content_suffix(); int count = 0; int cnt = hosts.size(); // number of hosts we could look at @@ -680,7 +687,7 @@ int_set::iterator i = ips.find(ip); if (i == ips.end()) { ips.insert(ip); - if (check_single(priv, ip, dc.get_content_suffix())) { + if (check_single(priv, ip, suffix)) { return true; } } @@ -713,7 +720,7 @@ int_set::iterator i = ips.find(ip); if (i == ips.end()) { ips.insert(ip); - if (check_single(priv, ip, dc.get_content_suffix())) { + if (check_single(priv, ip, suffix)) { string_map::iterator j = nameservers.ns_host.find(host); if (j != nameservers.ns_host.end()) { char *refer = (*j).second; @@ -734,6 +741,23 @@ //////////////////////////////////////////////// +// this email address is passed in from sendmail, and will +// always be enclosed in <>. It may have mixed case, just +// as the mail client sent it. We dup the string and convert +// the duplicate to lower case. +// +char *to_lower_string(char *email); +char *to_lower_string(char *email) { + int n = strlen(email)-2; + if (n < 1) return strdup(email); + char *key = strdup(email+1); + key[n] = '\0'; + for (int i=0; i<n; i++) key[i] = tolower(key[i]); + return key; +} + + +//////////////////////////////////////////////// // start of sendmail milter interfaces // sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr) @@ -754,7 +778,7 @@ sfsistat mlfi_envfrom(SMFICTX *ctx, char **from) { mlfiPriv &priv = *MLFIPRIV; - priv.mailaddr = strdup(from[0]); + priv.mailaddr = to_lower_string(from[0]); priv.authenticated = (smfi_getsymval(ctx, "{auth_authen}") != NULL); return SMFIS_CONTINUE; } @@ -766,8 +790,10 @@ CONFIG &dc = *priv.pc; if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i")); char *rcptaddr = rcpt[0]; - CONTEXT &con = *(dc.find_context(rcptaddr, priv.mailaddr)); + char *loto = to_lower_string(rcptaddr); + CONTEXT con = *(dc.find_context(loto)->find_context(priv.mailaddr)); char *fromvalue = con.find_from(priv.mailaddr); + free(loto); status st; if (fromvalue == token_black) { st = black; @@ -986,7 +1012,7 @@ void usage(char *prog); void usage(char *prog) { - fprintf(stderr, "Usage: %s [-d] [-c] -r port -p sm-sock-addr [-t timeout]\n", prog); + fprintf(stderr, "Usage: %s [-d] [-c] [-s] [-e from|to] -r port -p sm-sock-addr [-t timeout]\n", prog); fprintf(stderr, "where port is for the connection to our own dns resolver processes\n"); fprintf(stderr, " and should be local-domain-socket-file-name\n"); fprintf(stderr, "where sm-sock-addr is for the connection to sendmail\n"); @@ -994,7 +1020,12 @@ fprintf(stderr, " inet:port@ip-address\n"); fprintf(stderr, " local:local-domain-socket-file-name\n"); fprintf(stderr, "-c will load and dump the config to stdout\n"); + fprintf(stderr, "-s will stress test the config loading code by repeating the load/free cycle\n"); + fprintf(stderr, " in an infinte loop.\n"); fprintf(stderr, "-d will add some syslog debug messages\n"); + fprintf(stderr, "-e will print the results of looking up the from and to addresses in the\n"); + fprintf(stderr, " current config. The | character is used to separate the from and to\n"); + fprintf(stderr, " addresses in the argument to the -e switch\n"); } @@ -1030,10 +1061,12 @@ { token_init(); bool check = false; + bool stress = false; bool setconn = false; bool setreso = false; + char *email = NULL; int c; - const char *args = "r:p:t:hcd"; + const char *args = "r:p:t:e:cdhs"; extern char *optarg; // Process command line options @@ -1074,10 +1107,19 @@ } break; + case 'e': + if (email) free(email); + email = strdup(optarg); + break; + case 'c': check = true; break; + case 's': + stress = true; + break; + case 'd': debug_syslog = true; break; @@ -1090,6 +1132,7 @@ } if (check) { + use_syslog = false; CONFIG *conf = new_conf(); if (conf) { conf->dump(); @@ -1101,6 +1144,42 @@ } } + if (stress) { + fprintf(stdout, "stress testing\n"); + while (1) { + for (int i=0; i<10; i++) { + CONFIG *conf = new_conf(); + if (conf) delete conf; + } + fprintf(stdout, "."); + fflush(stdout); + sleep(1); + } + } + + if (email) { + char *x = strchr(email, '|'); + if (x) { + *x = '\0'; + char *from = strdup(email); + char *to = strdup(x+1); + use_syslog = false; + CONFIG *conf = new_conf(); + if (conf) { + CONTEXTP con = conf->find_context(to); + const int maxlen = 1000; + char buf[maxlen]; + fprintf(stdout, "envelope to <%s> finds context %s\n", to, con->get_full_name(buf,maxlen)); + CONTEXTP fc = con->find_context(from); + fprintf(stdout, "envelope from <%s> finds context %s\n", from, fc->get_full_name(buf,maxlen)); + char *st = fc->find_from(from); + fprintf(stdout, "envelope from <%s> finds status %s\n", from, st); + delete conf; + } + } + return 0; + } + if (!setconn) { fprintf(stderr, "%s: Missing required -p argument\n", argv[0]); usage(argv[0]);
--- a/src/new.cpp Sun Jul 10 14:19:00 2005 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1389 +0,0 @@ -/* - -Copyright (c) 2004, 2005 Carl Byington - 510 Software Group, released -under the GPL version 2 or any later version at your choice available at -http://www.fsf.org/licenses/gpl.txt - -Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its -suppliers. Inspired by the DCC by Rhyolite Software - --r port The port used to talk to our internal dns resolver processes --p port The port through which the MTA will connect to this milter. --t sec The timeout value. --c Check the config, and print a copy to stdout. Don't start the - milter or do anything with the socket. --d Add debug syslog entries - - -TODO: -1) Add config for max_recipients for each mail domain. Recipients in -excess of that limit will be rejected, and the entire data will be -rejected if it is sent. - -2) Add config for poison addresses. If any recipient is poison, all -recipients are rejected even if they would be whitelisted, and the -data is rejected if sent. - -3) Add option to only allow one recipient if the return path is empty. - -4) Check if the envelope from domain name primary MX points 127.0.0.0/8 - -5) Add option for using smtp connections to verify addresses from backup -mx machines. This allows the backup mx to learn the valid addresses -on the primary machine. - -*/ - - -// from sendmail sample -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#include <sysexits.h> -#include <unistd.h> - -// needed for socket io -#include <sys/ioctl.h> -#include <net/if.h> -#include <arpa/inet.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <netdb.h> -#include <sys/socket.h> -#include <sys/un.h> - -// needed for thread -#include <pthread.h> - -// needed for std c++ collections -#include <set> -#include <map> -#include <list> - -// for the dns resolver -#include <netinet/in.h> -#include <arpa/nameser.h> -#include <resolv.h> - -// misc stuff needed here -#include <ctype.h> -#include <syslog.h> -#include <pwd.h> -#include <sys/wait.h> /* header for waitpid() and various macros */ -#include <signal.h> /* header for signal functions */ - -#include "context.h" - -static char* dnsbl_version="$Id:"; - -extern "C" { - #include "libmilter/mfapi.h" - sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr); - sfsistat mlfi_envfrom(SMFICTX *ctx, char **argv); - sfsistat mlfi_envrcpt(SMFICTX *ctx, char **argv); - sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len); - sfsistat mlfi_eom(SMFICTX *ctx); - sfsistat mlfi_abort(SMFICTX *ctx); - sfsistat mlfi_close(SMFICTX *ctx); - void sig_chld(int signo); -} - -struct ns_map { - // all the strings are owned by the keys/values in the ns_host string map - string_map ns_host; // nameserver name -> host name that uses this name server - ns_mapper ns_ip; // nameserver name -> ip address of the name server -}; - -static bool debug_syslog = false; -static bool loader_run = true; // used to stop the config loader thread -static CONFIG * config = NULL; // protected by the config_mutex -static int generation = 0; // protected by the config_mutex - -static pthread_mutex_t config_mutex; -static pthread_mutex_t syslog_mutex; -static pthread_mutex_t resolve_mutex; -static pthread_mutex_t fd_pool_mutex; - -static std::set<int> fd_pool; -static int NULL_SOCKET = -1; -static char *resolver_port = NULL; // unix domain socket to talk to the dns resolver process -static int resolver_socket = NULL_SOCKET; // socket used to listen for resolver requests -static time_t ERROR_SOCKET_TIME = 60; // number of seconds between attempts to open the spam filter socket -static time_t last_error_time; -static int resolver_sock_count = 0; // protected with fd_pool_mutex -static int resolver_pool_size = 0; // protected with fd_pool_mutex - - -// packed structure to allow a single socket write to dump the -// length and the following answer. The packing attribute is gcc specific. -struct glommer { - int length; - #ifdef NS_PACKETSZ - u_char answer[NS_PACKETSZ]; // with a resolver, we return resolver answers - #else - int answer; // without a resolver, we return a single ip4 address, 0 == no answer - #endif -} __attribute__ ((packed)); - -struct mlfiPriv; - - -//////////////////////////////////////////////// -// helper to discard the strings and objects held by an ns_map -// -static void discard(ns_map &s); -static void discard(ns_map &s) { - for (string_map::iterator i=s.ns_host.begin(); i!=s.ns_host.end(); i++) { - char *x = (*i).first; - char *y = (*i).second; - free(x); - free(y); - } - s.ns_ip.clear(); - s.ns_host.clear(); -} - -//////////////////////////////////////////////// -// helper to register a string in an ns_map -// -static void register_string(ns_map &s, char *name, char *refer); -static void register_string(ns_map &s, char *name, char *refer) { - string_map::iterator i = s.ns_host.find(name); - if (i != s.ns_host.end()) return; - char *x = strdup(name); - char *y = strdup(refer); - s.ns_ip[x] = 0; - s.ns_host[x] = y; - -} - -//////////////////////////////////////////////// -// syslog a message -// -static void my_syslog(mlfiPriv *priv, char *text); - - -// include the content scanner -#include "scanner.cpp" - - -//////////////////////////////////////////////// -// disconnect the fd from the dns resolver process -// -void my_disconnect(int sock, bool decrement = true); -void my_disconnect(int sock, bool decrement) -{ - if (sock != NULL_SOCKET) { - if (decrement) { - pthread_mutex_lock(&fd_pool_mutex); - resolver_sock_count--; - pthread_mutex_unlock(&fd_pool_mutex); - } - shutdown(sock, SHUT_RDWR); - close(sock); - } -} - - -//////////////////////////////////////////////// -// return fd connected to the dns resolver process -// -int my_connect(); -int my_connect() -{ - // if we have had recent errors, don't even try to open the socket - time_t now = time(NULL); - if ((now - last_error_time) < ERROR_SOCKET_TIME) return NULL_SOCKET; - - // nothing recent, maybe this time it will work - int sock = NULL_SOCKET; - sockaddr_un server; - memset(&server, '\0', sizeof(server)); - server.sun_family = AF_UNIX; - strncpy(server.sun_path, resolver_port, sizeof(server.sun_path)-1); - sock = socket(AF_UNIX, SOCK_STREAM, 0); - if (sock != NULL_SOCKET) { - bool rc = (connect(sock, (sockaddr *)&server, sizeof(server)) == 0); - if (!rc) { - my_disconnect(sock, false); - sock = NULL_SOCKET; - last_error_time = now; - } - } - else last_error_time = now; - if (sock != NULL_SOCKET) { - pthread_mutex_lock(&fd_pool_mutex); - resolver_sock_count++; - pthread_mutex_unlock(&fd_pool_mutex); - } - return sock; -} - - -//////////////////////////////////////////////// -// mail filter private data, held for us by sendmail -// -struct mlfiPriv -{ - // connection specific data - CONFIG *pc; // global context with our maps - int fd; // to talk to dns resolvers process - bool err; // did we get any errors on the resolver socket? - int ip; // ip4 address of the smtp client - map<DNSBLP, status> checked; // status from those lists - // message specific data - char *mailaddr; // envelope from value - char *queueid; // sendmail queue id - bool authenticated; // client authenticated? if so, suppress all dnsbl checks - bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails - bool only_whites; // every recipient is whitelisted? - string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be - recorder *memory; // memory for the content scanner - url_scanner *scanner; // object to handle body scanning - mlfiPriv(); - ~mlfiPriv(); - void reset(bool final = false); // for a new message - void get_fd(); - void return_fd(); - int my_read(char *buf, int len); - int my_write(char *buf, int len); -}; - -mlfiPriv::mlfiPriv() { - pthread_mutex_lock(&config_mutex); - pc = config; - pc->reference_count++; - pthread_mutex_unlock(&config_mutex); - get_fd(); - ip = 0; - mailaddr = NULL; - queueid = NULL; - authenticated = false; - have_whites = false; - only_whites = true; - memory = new recorder(this, &pc->html_tags, &pc->tlds); - scanner = new url_scanner(memory); -} - -mlfiPriv::~mlfiPriv() { - return_fd(); - pthread_mutex_lock(&config_mutex); - pc->reference_count--; - pthread_mutex_unlock(&config_mutex); - reset(true); -} - -void mlfiPriv::reset(bool final) { - if (mailaddr) free(mailaddr); - if (queueid) free(queueid); - discard(non_whites); - delete memory; - delete scanner; - if (!final) { - mailaddr = NULL; - queueid = NULL; - authenticated = false; - have_whites = false; - only_whites = true; - memory = new recorder(this, &pc->html_tags, &pc->tlds); - scanner = new url_scanner(memory); - } -} - -void mlfiPriv::get_fd() -{ - err = true; - fd = NULL_SOCKET; - int result = pthread_mutex_lock(&fd_pool_mutex); - if (!result) { - std::set<int>::iterator i; - i = fd_pool.begin(); - if (i != fd_pool.end()) { - // have at least one fd in the pool - err = false; - fd = *i; - fd_pool.erase(fd); - resolver_pool_size--; - pthread_mutex_unlock(&fd_pool_mutex); - } - else { - // pool is empty, get a new fd - pthread_mutex_unlock(&fd_pool_mutex); - fd = my_connect(); - err = (fd == NULL_SOCKET); - } - } - else { - // cannot lock the pool, just get a new fd - fd = my_connect(); - err = (fd == NULL_SOCKET); - } -} - -void mlfiPriv::return_fd() -{ - if (err) { - // this fd got a socket error, so close it, rather than returning it to the pool - my_disconnect(fd); - } - else { - int result = pthread_mutex_lock(&fd_pool_mutex); - if (!result) { - if ((resolver_sock_count > resolver_pool_size*5) || (resolver_pool_size < 5)) { - // return the fd to the pool - fd_pool.insert(fd); - resolver_pool_size++; - pthread_mutex_unlock(&fd_pool_mutex); - } - else { - // more than 20% of the open resolver sockets are in the pool, and the - // pool as at least 5 sockets. that is enough, so just close this one. - pthread_mutex_unlock(&fd_pool_mutex); - my_disconnect(fd); - } - } - else { - // could not lock the pool, so just close the fd - my_disconnect(fd); - } - } -} - -int mlfiPriv::my_write(char *buf, int len) -{ - if (err) return 0; - int rs = 0; - while (len) { - int ws = write(fd, buf, len); - if (ws > 0) { - rs += ws; - len -= ws; - buf += ws; - } - else { - // peer closed the socket! - rs = 0; - err = true; - break; - } - } - return rs; -} - -int mlfiPriv::my_read(char *buf, int len) -{ - if (err) return 0; - int rs = 0; - while (len > 1) { - int ws = read(fd, buf, len); - if (ws > 0) { - rs += ws; - len -= ws; - buf += ws; - } - else { - // peer closed the socket! - rs = 0; - err = true; - break; - } - } - return rs; -} - -#define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx)) - - -//////////////////////////////////////////////// -// syslog a message -// -static void my_syslog(mlfiPriv *priv, char *text) { - char buf[1000]; - if (priv) { - snprintf(buf, sizeof(buf), "%s: %s", priv->queueid, text); - text = buf; - } - pthread_mutex_lock(&syslog_mutex); - openlog("dnsbl", LOG_PID, LOG_MAIL); - syslog(LOG_NOTICE, "%s", text); - closelog(); - pthread_mutex_unlock(&syslog_mutex); -} - -static void my_syslog(char *text); -static void my_syslog(char *text) { - my_syslog(NULL, text); -} - -//////////////////////////////////////////////// -// register a global string -// -static char* register_string(char *name); -static char* register_string(char *name) { - return register_string(all_strings, name); -} - - -static char* next_token(char *delim); -static char* next_token(char *delim) { - char *name = strtok(NULL, delim); - if (!name) return name; - return register_string(name); -} - - -//////////////////////////////////////////////// -// lookup an email address in the env_from or env_to maps -// -static char* lookup1(char *email, string_map map); -static char* lookup1(char *email, string_map map) { - string_map::iterator i = map.find(email); - if (i != map.end()) return (*i).second; - char *x = strchr(email, '@'); - if (!x) return DEFAULT; - x++; - i = map.find(x); - if (i != map.end()) return (*i).second; - return DEFAULT; -} - - -//////////////////////////////////////////////// -// lookup an email address in the env_from or env_to maps -// this email address is passed in from sendmail, and will -// always be enclosed in <>. It may have mixed case, just -// as the mail client sent it. -// -static char* lookup(char* email, string_map map); -static char* lookup(char* email, string_map map) { - int n = strlen(email)-2; - if (n < 1) return DEFAULT; // malformed - char *key = strdup(email+1); - key[n] = '\0'; - for (int i=0; i<n; i++) key[i] = tolower(key[i]); - char *rc = lookup1(key, map); - free(key); - return rc; -} - - -//////////////////////////////////////////////// -// find the dnsbl with a specific name -// -static DNSBLP find_dnsbl(CONFIG &dc, char *name); -static DNSBLP find_dnsbl(CONFIG &dc, char *name) { - dnsblp_map::iterator i = dc.dnsbls.find(name); - if (i == dc.dnsbls.end()) return NULL; - return (*i).second; -} - - -//////////////////////////////////////////////// -// find the dnsbll with a specific name -// -static DNSBLLP find_dnsbll(CONFIG &dc, char *name); -static DNSBLLP find_dnsbll(CONFIG &dc, char *name) { - dnsbllp_map::iterator i = dc.dnsblls.find(name); - if (i == dc.dnsblls.end()) return NULL; - return (*i).second; -} - - -//////////////////////////////////////////////// -// find the envfrom map with a specific name -// -static string_map* find_from_map(CONFIG &dc, char *name); -static string_map* find_from_map(CONFIG &dc, char *name) { - from_map::iterator i = dc.env_from.find(name); - if (i == dc.env_from.end()) return NULL; - return (*i).second; -} - - -static string_map& really_find_from_map(CONFIG &dc, char *name); -static string_map& really_find_from_map(CONFIG &dc, char *name) { - string_map *sm = find_from_map(dc, name); - if (!sm) { - sm = new string_map; - dc.env_from[name] = sm; - } - return *sm; -} - - - -//////////////////////////////////////////////// -// read a resolver request from the socket, process it, and -// write the result back to the socket. - -static void process_resolver_requests(int socket); -static void process_resolver_requests(int socket) { -#ifdef NS_MAXDNAME - char question[NS_MAXDNAME]; -#else - char question[1000]; -#endif - glommer glom; - - int maxq = sizeof(question); - while (true) { - // read a question - int rs = 0; - while (true) { - int ns = read(socket, question+rs, maxq-rs); - if (ns > 0) { - rs += ns; - if (question[rs-1] == '\0') { - // last byte read was the null terminator, we are done - break; - } - } - else { - // peer closed the socket - //my_syslog("!!child worker process, peer closed socket while reading question"); - shutdown(socket, SHUT_RDWR); - close(socket); - return; - } - } - - // find the answer -#ifdef NS_PACKETSZ - //char text[1000]; - //snprintf(text, sizeof(text), "!!child worker process has a question %s", question); - //my_syslog(text); - glom.length = res_search(question, ns_c_in, ns_t_a, glom.answer, sizeof(glom.answer)); - if (glom.length < 0) glom.length = 0; // represent all errors as zero length answers -#else - glom.length = sizeof(glom.answer); - glom.answer = 0; - struct hostent *host = gethostbyname(question); - if (host && (host->h_addrtype == AF_INET)) { - memcpy(&glom.answer, host->h_addr, sizeof(glom.answer)); - } -#endif - - // write the answer - char *buf = (char *)&glom; - int len = glom.length + sizeof(glom.length); - //snprintf(text, sizeof(text), "!!child worker process writing answer length %d for total %d", glom.length, len); - //my_syslog(text); - int ws = 0; - while (len > ws) { - int ns = write(socket, buf+ws, len-ws); - if (ns > 0) { - ws += ns; - } - else { - // peer closed the socket! - //my_syslog("!!child worker process, peer closed socket while writing answer"); - shutdown(socket, SHUT_RDWR); - close(socket); - return; - } - } - } -} - - -//////////////////////////////////////////////// -// ask a dns question and get an A record answer - we don't try -// very hard, just using the default resolver retry settings. -// If we cannot get an answer, we just accept the mail. -// -// -static int dns_interface(mlfiPriv &priv, char *question, bool maybe_ip, ns_map *nameservers); -static int dns_interface(mlfiPriv &priv, char *question, bool maybe_ip, ns_map *nameservers) { - // this part can be done without locking the resolver mutex. Each - // milter thread is talking over its own socket to a separate resolver - // process, which does the actual dns resolution. - if (priv.err) return 0; // cannot ask more questions on this socket. - priv.my_write(question, strlen(question)+1); // write the question including the null terminator - glommer glom; - char *buf = (char *)&glom; - priv.my_read(buf, sizeof(glom.length)); - buf += sizeof(glom.length); - ///char text[1000]; - ///snprintf(text, sizeof(text), "!!milter thread wrote question %s and has answer length %d", question, glom.length); - ///my_syslog(text); - if ((glom.length < 0) || (glom.length > sizeof(glom.answer))) { - priv.err = true; - return 0; // cannot process overlarge answers - } - priv.my_read(buf, glom.length); - -#ifdef NS_PACKETSZ - // now we need to lock the resolver mutex to keep the milter threads from - // stepping on each other while parsing the dns answer. - int ret_address = 0; - pthread_mutex_lock(&resolve_mutex); - if (glom.length > 0) { - // parse the answer - ns_msg handle; - ns_rr rr; - if (ns_initparse(glom.answer, glom.length, &handle) == 0) { - // look for ns names - if (nameservers) { - ns_map &ns = *nameservers; - int rrnum = 0; - while (ns_parserr(&handle, ns_s_ns, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_ns) { - char nam[NS_MAXDNAME+1]; - char *n = nam; - const u_char *p = ns_rr_rdata(rr); - while (((n-nam) < NS_MAXDNAME) && ((p-glom.answer) < glom.length) && *p) { - size_t s = *(p++); - if (s > 191) { - // compression pointer - s = (s-192)*256 + *(p++); - if (s >= glom.length) break; // pointer outside bounds of answer - p = glom.answer + s; - s = *(p++); - } - if (s > 0) { - if ((n-nam) >= (NS_MAXDNAME-s)) break; // destination would overflow name buffer - if ((p-glom.answer) >= (glom.length-s)) break; // source outside bounds of answer - memcpy(n, p, s); - n += s; - p += s; - *(n++) = '.'; - } - } - if (n-nam) n--; // remove trailing . - *n = '\0'; // null terminate it - register_string(ns, nam, question); // ns host to lookup later - } - } - rrnum = 0; - while (ns_parserr(&handle, ns_s_ar, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_a) { - char* nam = (char*)ns_rr_name(rr); - ns_mapper::iterator i = ns.ns_ip.find(nam); - if (i != ns.ns_ip.end()) { - // we want this ip address - int address; - memcpy(&address, ns_rr_rdata(rr), sizeof(address)); - ns.ns_ip[nam] = address; - } - } - } - } - int rrnum = 0; - while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_a) { - int address; - memcpy(&address, ns_rr_rdata(rr), sizeof(address)); - ret_address = address; - } - } - } - } - if (maybe_ip && !ret_address) { - // might be a bare ip address - in_addr ip; - if (inet_aton(question, &ip)) { - ret_address = ip.s_addr; - } - } - pthread_mutex_unlock(&resolve_mutex); - return ret_address; -#else - return glom.answer; -#endif -} - - -//////////////////////////////////////////////// -// check a single dnsbl -// -static status check_single(mlfiPriv &priv, int ip, char *suffix); -static status check_single(mlfiPriv &priv, int ip, char *suffix) { - // make a dns question - const u_char *src = (const u_char *)&ip; - if (src[0] == 127) return oksofar; // don't do dns lookups on localhost -#ifdef NS_MAXDNAME - char question[NS_MAXDNAME]; -#else - char question[1000]; -#endif - snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix); - // ask the question, if we get an A record it implies a blacklisted ip address - return (dns_interface(priv, question, false, NULL)) ? reject : oksofar; -} - - -//////////////////////////////////////////////// -// check a single dnsbl -// -static status check_single(mlfiPriv &priv, int ip, DNSBL &bl); -static status check_single(mlfiPriv &priv, int ip, DNSBL &bl) { - return check_single(priv, ip, bl.suffix); -} - - -//////////////////////////////////////////////// -// check the dnsbls specified for this recipient -// -static status check_dnsbl(mlfiPriv &priv, DNSBLLP dnsbllp, DNSBLP &rejectlist); -static status check_dnsbl(mlfiPriv &priv, DNSBLLP dnsbllp, DNSBLP &rejectlist) { - if (priv.authenticated) return oksofar; - if (!dnsbllp) return oksofar; - DNSBLL &dnsbll = *dnsbllp; - for (DNSBLL::iterator i=dnsbll.begin(); i!=dnsbll.end(); i++) { - DNSBLP dp = *i; // non null by construction - status st; - map<DNSBLP, status>::iterator f = priv.checked.find(dp); - if (f == priv.checked.end()) { - // have not checked this list yet - st = check_single(priv, priv.ip, *dp); - rejectlist = dp; - priv.checked[dp] = st; - } - else { - st = (*f).second; - rejectlist = (*f).first; - } - if (st == reject) return st; - } - return oksofar; -} - - -//////////////////////////////////////////////// -// check the hosts from the body against the content dnsbl -// -static status check_hosts(mlfiPriv &priv, char *&host, int &ip); -static status check_hosts(mlfiPriv &priv, char *&host, int &ip) { - CONFIG &dc = *priv.pc; - int count = 0; - ns_map nameservers; - bool ran = priv.pc->host_random; - int lim = priv.pc->host_limit; // we should not look at more than this many hosts - int cnt = priv.memory->hosts.size(); // number of hosts we could look at - int_set ips; // remove duplicate ip addresses - for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) { - host = *i; // a reference into priv.memory->hosts, which will live until this smtp transaction is closed - - // don't bother looking up hosts on the ignore list - string_set::iterator j = priv.pc->content_host_ignore.find(host); - if (j != priv.pc->content_host_ignore.end()) continue; - - // try to only look at lim/cnt fraction of the available cnt host names in random mode - if ((cnt > lim) && (lim > 0) && ran) { - int r = rand() % cnt; - if (r >= lim) { - char buf[1000]; - snprintf(buf, sizeof(buf), "host %s skipped", host); - my_syslog(&priv, buf); - continue; - } - } - count++; - if ((count > lim) && (lim > 0) && (!ran)) { - discard(nameservers); - return reject_host; - } - ip = dns_interface(priv, host, true, &nameservers); - if (debug_syslog) { - char buf[1000]; - if (ip) { - char adr[sizeof "255.255.255.255"]; - adr[0] = '\0'; - inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); - snprintf(buf, sizeof(buf), "host %s found at %s", host, adr); - } - else { - snprintf(buf, sizeof(buf), "host %s not found", host); - } - my_syslog(&priv, buf); - } - if (ip) { - int_set::iterator i = ips.find(ip); - if (i == ips.end()) { - ips.insert(ip); - status st = check_single(priv, ip, dc.content_suffix); - if (st == reject) { - discard(nameservers); - return st; - } - } - } - } - lim *= 4; // allow average of 3 ns per host name - for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) { - count++; - if ((count > lim) && (lim > 0)) { - if (ran) continue; // don't complain - discard(nameservers); - return reject_host; - } - host = (*i).first; // a transient reference that needs to be replaced before we return it - ip = (*i).second; - if (!ip) ip = dns_interface(priv, host, false, NULL); - if (debug_syslog) { - char buf[200]; - if (ip) { - char adr[sizeof "255.255.255.255"]; - adr[0] = '\0'; - inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); - snprintf(buf, sizeof(buf), "ns %s found at %s", host, adr); - } - else { - snprintf(buf, sizeof(buf), "ns %s not found", host); - } - my_syslog(&priv, buf); - } - if (ip) { - int_set::iterator i = ips.find(ip); - if (i == ips.end()) { - ips.insert(ip); - status st = check_single(priv, ip, dc.content_suffix); - if (st == reject) { - string_map::iterator j = nameservers.ns_host.find(host); - if (j != nameservers.ns_host.end()) { - char *refer = (*j).second; - char buf[1000]; - snprintf(buf, sizeof(buf), "%s with nameserver %s", refer, host); - host = register_string(priv.memory->hosts, buf); // put a copy into priv.memory->hosts, and return that reference - } - else { - host = register_string(priv.memory->hosts, host); // put a copy into priv.memory->hosts, and return that reference - } - discard(nameservers); - return st; - } - } - } - } - discard(nameservers); - host = NULL; - int bin = priv.memory->binary_tags; - int bad = priv.memory->bad_html_tags; - lim = priv.pc->tag_limit; - if (3*bin > bad) return oksofar; // probably .zip or .tar.gz with random content - if ((bad > lim) && (lim > 0)) return reject_tag; - return oksofar; -} - - -//////////////////////////////////////////////// -// start of sendmail milter interfaces -// -sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr) -{ - // allocate some private memory - mlfiPriv *priv = new mlfiPriv; - if (hostaddr->sa_family == AF_INET) { - priv->ip = ((struct sockaddr_in *)hostaddr)->sin_addr.s_addr; - } - - // save the private data - smfi_setpriv(ctx, (void*)priv); - - // continue processing - return SMFIS_CONTINUE; -} - -sfsistat mlfi_envfrom(SMFICTX *ctx, char **from) -{ - mlfiPriv &priv = *MLFIPRIV; - priv.mailaddr = strdup(from[0]); - priv.authenticated = (smfi_getsymval(ctx, "{auth_authen}") != NULL); - return SMFIS_CONTINUE; -} - -sfsistat mlfi_envrcpt(SMFICTX *ctx, char **rcpt) -{ - DNSBLP rejectlist = NULL; // list that caused the reject - status st = oksofar; - mlfiPriv &priv = *MLFIPRIV; - CONFIG &dc = *priv.pc; - if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i")); - char *rcptaddr = rcpt[0]; - char *dnsname = lookup(rcptaddr, dc.env_to_dnsbll); - char *fromname = lookup(rcptaddr, dc.env_to_chkfrom); - if ((strcmp(dnsname, BLACK) == 0) || - (strcmp(fromname, BLACK) == 0)) { - st = black; // two options to blacklist this recipient - } - else if (strcmp(fromname, WHITE) == 0) { - st = white; - } - else { - // check an env_from map - string_map *sm = find_from_map(dc, fromname); - if (sm != NULL) { - fromname = lookup(priv.mailaddr, *sm); // returns default if name not in map - if (strcmp(fromname, BLACK) == 0) { - st = black; // blacklist this envelope from value - } - if (strcmp(fromname, WHITE) == 0) { - st = white; // blacklist this envelope from value - } - } - } - if ((st == oksofar) && (strcmp(dnsname, WHITE) != 0)) { - // check dns lists - st = check_dnsbl(priv, find_dnsbll(dc, dnsname), rejectlist); - } - - if (st == reject) { - // reject the recipient based on some dnsbl - char adr[sizeof "255.255.255.255"]; - adr[0] = '\0'; - inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr)); - char buf[2000]; - snprintf(buf, sizeof(buf), rejectlist->message, adr, adr); - smfi_setreply(ctx, "550", "5.7.1", buf); - return SMFIS_REJECT; - } - else if (st == black) { - // reject the recipient based on blacklisting either from or to - smfi_setreply(ctx, "550", "5.7.1", "no such user"); - return SMFIS_REJECT; - } - else { - // accept the recipient - if (st == oksofar) { - // but remember the non-whites - register_string(priv.non_whites, rcptaddr); - priv.only_whites = false; - } - if (st == white) { - priv.have_whites = true; - } - return SMFIS_CONTINUE; - } -} - -sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len) -{ - mlfiPriv &priv = *MLFIPRIV; - if (priv.authenticated) return SMFIS_CONTINUE; - if (priv.only_whites) return SMFIS_CONTINUE; - if (!priv.pc->content_suffix) return SMFIS_CONTINUE; - priv.scanner->scan(data, len); - return SMFIS_CONTINUE; -} - -sfsistat mlfi_eom(SMFICTX *ctx) -{ - sfsistat rc; - mlfiPriv &priv = *MLFIPRIV; - char *host = NULL; - int ip; - status st; - // process end of message - if (priv.authenticated || - priv.only_whites || - (!priv.pc->content_suffix) || - ((st=check_hosts(priv, host, ip)) == oksofar)) rc = SMFIS_CONTINUE; - else { - if (!priv.have_whites) { - // can reject the entire message - char buf[2000]; - if (st == reject_tag) { - // rejected due to excessive bad html tags - snprintf(buf, sizeof(buf), priv.pc->tag_limit_message); - } - else if (st == reject_host) { - // rejected due to excessive unique host/urls - snprintf(buf, sizeof(buf), priv.pc->host_limit_message); - } - else { - char adr[sizeof "255.255.255.255"]; - adr[0] = '\0'; - inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); - snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr); - } - smfi_setreply(ctx, "550", "5.7.1", buf); - rc = SMFIS_REJECT; - } - else { - // need to accept it but remove the recipients that don't want it - for (string_set::iterator i=priv.non_whites.begin(); i!=priv.non_whites.end(); i++) { - char *rcpt = *i; - smfi_delrcpt(ctx, rcpt); - } - rc = SMFIS_CONTINUE; - } - } - // reset for a new message on the same connection - mlfi_abort(ctx); - return rc; -} - -sfsistat mlfi_abort(SMFICTX *ctx) -{ - mlfiPriv &priv = *MLFIPRIV; - priv.reset(); - return SMFIS_CONTINUE; -} - -sfsistat mlfi_close(SMFICTX *ctx) -{ - mlfiPriv *priv = MLFIPRIV; - if (!priv) return SMFIS_CONTINUE; - delete priv; - smfi_setpriv(ctx, NULL); - return SMFIS_CONTINUE; -} - -struct smfiDesc smfilter = -{ - "DNSBL", // filter name - SMFI_VERSION, // version code -- do not change - SMFIF_DELRCPT, // flags - mlfi_connect, // connection info filter - NULL, // SMTP HELO command filter - mlfi_envfrom, // envelope sender filter - mlfi_envrcpt, // envelope recipient filter - NULL, // header filter - NULL, // end of header - mlfi_body, // body block filter - mlfi_eom, // end of message - mlfi_abort, // message aborted - mlfi_close, // connection cleanup -}; - - -//////////////////////////////////////////////// -// reload the config -// -static CONFIG* new_conf(); -static CONFIG* new_conf() { - CONFIG *newc = new CONFIG; - pthread_mutex_lock(&config_mutex); - newc->generation = generation++; - pthread_mutex_unlock(&config_mutex); - char buf[200]; - snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation); - my_syslog(buf); - if (load_conf(*newc, "dnsbl.conf") { - newc->load_time = time(NULL); - return newc; - } - delete newc; - return NULL; -} - - -//////////////////////////////////////////////// -// thread to watch the old config files for changes -// and reload when needed. we also cleanup old -// configs whose reference count has gone to zero. -// -static void* config_loader(void *arg); -static void* config_loader(void *arg) { - typedef set<CONFIG *> configp_set; - configp_set old_configs; - while (loader_run) { - sleep(180); // look for modifications every 3 minutes - if (!loader_run) break; - CONFIG &dc = *config; - time_t then = dc.load_time; - struct stat st; - bool reload = false; - for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { - char *fn = *i; - if (stat(fn, &st)) reload = true; // file disappeared - else if (st.st_mtime > then) reload = true; // file modified - if (reload) break; - } - if (reload) { - CONFIG *newc = new_conf(); - // replace the global config pointer - pthread_mutex_lock(&config_mutex); - CONFIG *old = config; - config = newc; - pthread_mutex_unlock(&config_mutex); - if (old) old_configs.insert(old); - } - // now look for old configs with zero ref counts - for (configp_set::iterator i=old_configs.begin(); i!=old_configs.end(); ) { - CONFIG *old = *i; - if (!old->reference_count) { - char buf[200]; - snprintf(buf, sizeof(buf), "freeing memory for old configuration generation %d", old->generation); - my_syslog(buf); - delete old; // destructor does all the work - old_configs.erase(i++); - } - else i++; - } - } - return NULL; -} - - -static void usage(char *prog); -static void usage(char *prog) -{ - fprintf(stderr, "Usage: %s [-d] [-c] -r port -p sm-sock-addr [-t timeout]\n", prog); - fprintf(stderr, "where port is for the connection to our own dns resolver processes\n"); - fprintf(stderr, " and should be local-domain-socket-file-name\n"); - fprintf(stderr, "where sm-sock-addr is for the connection to sendmail\n"); - fprintf(stderr, " and should be one of\n"); - fprintf(stderr, " inet:port@ip-address\n"); - fprintf(stderr, " local:local-domain-socket-file-name\n"); - fprintf(stderr, "-c will load and dump the config to stdout\n"); - fprintf(stderr, "-d will add some syslog debug messages\n"); -} - - - -static void setup_socket(char *sock); -static void setup_socket(char *sock) { - unlink(sock); - // sockaddr_un addr; - // memset(&addr, '\0', sizeof addr); - // addr.sun_family = AF_UNIX; - // strncpy(addr.sun_path, sock, sizeof(addr.sun_path)-1); - // int s = socket(AF_UNIX, SOCK_STREAM, 0); - // bind(s, (sockaddr*)&addr, sizeof(addr)); - // close(s); -} - - -/* - * The signal handler function -- only gets called when a SIGCHLD - * is received, ie when a child terminates - */ -void sig_chld(int signo) -{ - int status; - /* Wait for any child without blocking */ - while (waitpid(-1, &status, WNOHANG) > 0) { - // ignore child exit status, we only do this to cleanup zombies - } -} - - -int main(int argc, char**argv) -{ - token_init(); - bool check = false; - bool setconn = false; - bool setreso = false; - int c; - const char *args = "r:p:t:hcd"; - extern char *optarg; - - // Process command line options - while ((c = getopt(argc, argv, args)) != -1) { - switch (c) { - case 'r': - if (optarg == NULL || *optarg == '\0') { - fprintf(stderr, "Illegal resolver socket: %s\n", optarg); - exit(EX_USAGE); - } - resolver_port = strdup(optarg); - setup_socket(resolver_port); - setreso = true; - break; - - case 'p': - if (optarg == NULL || *optarg == '\0') { - fprintf(stderr, "Illegal sendmail socket: %s\n", optarg); - exit(EX_USAGE); - } - if (smfi_setconn(optarg) == MI_FAILURE) { - fprintf(stderr, "smfi_setconn failed\n"); - exit(EX_SOFTWARE); - } - if (strncasecmp(optarg, "unix:", 5) == 0) setup_socket(optarg + 5); - else if (strncasecmp(optarg, "local:", 6) == 0) setup_socket(optarg + 6); - setconn = true; - break; - - case 't': - if (optarg == NULL || *optarg == '\0') { - fprintf(stderr, "Illegal timeout: %s\n", optarg); - exit(EX_USAGE); - } - if (smfi_settimeout(atoi(optarg)) == MI_FAILURE) { - fprintf(stderr, "smfi_settimeout failed\n"); - exit(EX_SOFTWARE); - } - break; - - case 'c': - check = true; - break; - - case 'd': - debug_syslog = true; - break; - - case 'h': - default: - usage(argv[0]); - exit(EX_USAGE); - } - } - - if (check) { - CONFIG *conf = new_conf(); - if (conf) { - conf->dump(); - delete conf; - return 0; - } - else { - return 1; // config failed to load - } - } - - if (!setconn) { - fprintf(stderr, "%s: Missing required -p argument\n", argv[0]); - usage(argv[0]); - exit(EX_USAGE); - } - - if (!setreso) { - fprintf(stderr, "%s: Missing required -r argument\n", argv[0]); - usage(argv[0]); - exit(EX_USAGE); - } - - if (smfi_register(smfilter) == MI_FAILURE) { - fprintf(stderr, "smfi_register failed\n"); - exit(EX_UNAVAILABLE); - } - - // switch to background mode - if (daemon(1,0) < 0) { - fprintf(stderr, "daemon() call failed\n"); - exit(EX_UNAVAILABLE); - } - - // write the pid - const char *pidpath = "/var/run/dnsbl.pid"; - unlink(pidpath); - FILE *f = fopen(pidpath, "w"); - if (f) { -#ifdef linux - // from a comment in the DCC source code: - // Linux threads are broken. Signals given the - // original process are delivered to only the - // thread that happens to have that PID. The - // sendmail libmilter thread that needs to hear - // SIGINT and other signals does not, and that breaks - // scripts that need to stop milters. - // However, signaling the process group works. - fprintf(f, "-%d\n", (u_int)getpgrp()); -#else - fprintf(f, "%d\n", (u_int)getpid()); -#endif - fclose(f); - } - - // initialize the thread sync objects - pthread_mutex_init(&config_mutex, 0); - pthread_mutex_init(&syslog_mutex, 0); - pthread_mutex_init(&resolve_mutex, 0); - pthread_mutex_init(&fd_pool_mutex, 0); - - // drop root privs - struct passwd *pw = getpwnam("dnsbl"); - if (pw) { - if (setgid(pw->pw_gid) == -1) { - my_syslog("failed to switch to group dnsbl"); - } - if (setuid(pw->pw_uid) == -1) { - my_syslog("failed to switch to user dnsbl"); - } - } - - // fork off the resolver listener process - pid_t child = fork(); - if (child < 0) { - my_syslog("failed to create resolver listener process"); - exit(0); - } - if (child == 0) { - // we are the child - dns resolver listener process - resolver_socket = socket(AF_UNIX, SOCK_STREAM, 0); - if (resolver_socket < 0) { - my_syslog("child failed to create resolver socket"); - exit(0); // failed - } - sockaddr_un server; - memset(&server, '\0', sizeof(server)); - server.sun_family = AF_UNIX; - strncpy(server.sun_path, resolver_port, sizeof(server.sun_path)-1); - //try to bind the address to the socket. - if (bind(resolver_socket, (sockaddr *)&server, sizeof(server)) < 0) { - // bind failed - shutdown(resolver_socket, SHUT_RDWR); - close(resolver_socket); - my_syslog("child failed to bind resolver socket"); - exit(0); // failed - } - //listen on the socket. - if (listen(resolver_socket, 10) < 0) { - // listen failed - shutdown(resolver_socket, SHUT_RDWR); - close(resolver_socket); - my_syslog("child failed to listen to resolver socket"); - exit(0); // failed - } - // setup sigchld handler to prevent zombies - struct sigaction act; - act.sa_handler = sig_chld; // Assign sig_chld as our SIGCHLD handler - sigemptyset(&act.sa_mask); // We don't want to block any other signals in this example - act.sa_flags = SA_NOCLDSTOP; // only want children that have terminated - if (sigaction(SIGCHLD, &act, NULL) < 0) { - my_syslog("child failed to setup SIGCHLD handler"); - exit(0); // failed - } - while (true) { - sockaddr_un client; - socklen_t clientlen = sizeof(client); - int s = accept(resolver_socket, (sockaddr *)&client, &clientlen); - if (s > 0) { - // accept worked, it did not get cancelled before we could accept it - // fork off a process to handle this connection - int newchild = fork(); - if (newchild == 0) { - // this is the worker process - // child does not need the listening socket - close(resolver_socket); - //my_syslog("child forked a worker process"); - process_resolver_requests(s); - //my_syslog("child terminated a worker process"); - exit(0); - } - else { - // this is the parent - // parent does not need the accepted socket - close(s); - } - } - } - exit(0); // make sure we don't fall thru. - } - else { - sleep(2); // allow child to get started - } - - // load the initial config - config = new_conf(); - - // only create threads after the fork() in daemon - pthread_t tid; - if (pthread_create(&tid, 0, config_loader, 0)) - my_syslog("failed to create config loader thread"); - if (pthread_detach(tid)) - my_syslog("failed to detach config loader thread"); - - time_t starting = time(NULL); - int rc = smfi_main(); - if ((rc != MI_SUCCESS) && (time(NULL) > starting+5*60)) { - my_syslog("trying to restart after smfi_main()"); - loader_run = false; // eventually the config loader thread will terminate - execvp(argv[0], argv); - } - exit((rc == MI_SUCCESS) ? 0 : EX_UNAVAILABLE); -} -
--- a/src/scanner.cpp Sun Jul 10 14:19:00 2005 -0700 +++ b/src/scanner.cpp Wed Jul 13 23:04:14 2005 -0700 @@ -10,6 +10,92 @@ static char* scanner_version="$Id$"; +//////////////////////////////////////////////// +// finite state machine +// +enum state {// host name recognizer states + h_init, + h_host, + + // html tag discarder states + t_init, + t_tag1, // seen opening < + t_tag2, // not comment + t_com1, // seen ! + t_com2, // seen first - + t_com3, // seen second -, looking for --> + t_com4, // seen first - + t_com5, // seen second - + t_disc, // looking for closing > + + // url recognizer states + u_init, + u_http, + u_sla, + u_url, + + // url decoder states %xx + d_init, + d_pcnt, + d_1, + + // html entity decoder states &#nnn; + e_init, + e_amp, + e_num, + + // mime decoder states =xx + m_init, + m_eq, + m_1, + + // base64 decoder states + b_init, + b_lf, + b_lf2, + b_64, + + // uuencoding decoder states + uu_init, + uu_lf, + uu_lf2, + uu_64, + + // counter for number of columns in the table + end_state, + + // temporary states + h_end, + t_bin, + t_end, + u_reco, + d_2, + e_semi, + m_2, + m_cr, + m_nl, + b_cr, + uu_cr + }; + +#define PENDING_LIMIT 100 +class fsa { + u_char pending[PENDING_LIMIT]; + int count; + state st; + state init; + fsa *next1; + fsa *next2; + recorder *memory; + +public: + fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); + void push(u_char *buf, int len); + void pusher(); + void error(char *err); +}; + + typedef state PARSE[end_state]; static PARSE parse_table[256] = {
--- a/src/scanner.h Sun Jul 10 14:19:00 2005 -0700 +++ b/src/scanner.h Wed Jul 13 23:04:14 2005 -0700 @@ -31,94 +31,9 @@ //////////////////////////////////////////////// -// finite state machine -// -enum state {// host name recognizer states - h_init, - h_host, - - // html tag discarder states - t_init, - t_tag1, // seen opening < - t_tag2, // not comment - t_com1, // seen ! - t_com2, // seen first - - t_com3, // seen second -, looking for --> - t_com4, // seen first - - t_com5, // seen second - - t_disc, // looking for closing > - - // url recognizer states - u_init, - u_http, - u_sla, - u_url, - - // url decoder states %xx - d_init, - d_pcnt, - d_1, - - // html entity decoder states &#nnn; - e_init, - e_amp, - e_num, - - // mime decoder states =xx - m_init, - m_eq, - m_1, - - // base64 decoder states - b_init, - b_lf, - b_lf2, - b_64, - - // uuencoding decoder states - uu_init, - uu_lf, - uu_lf2, - uu_64, - - // counter for number of columns in the table - end_state, - - // temporary states - h_end, - t_bin, - t_end, - u_reco, - d_2, - e_semi, - m_2, - m_cr, - m_nl, - b_cr, - uu_cr - }; - -#define PENDING_LIMIT 100 -class fsa { - u_char pending[PENDING_LIMIT]; - int count; - state st; - state init; - fsa *next1; - fsa *next2; - recorder *memory; - -public: - fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); - void push(u_char *buf, int len); - void pusher(); - void error(char *err); -}; - - -//////////////////////////////////////////////// // the content scanner // +class fsa; class url_scanner { fsa *host_parser; fsa *tags_parser;
--- a/src/tokenizer.cpp Sun Jul 10 14:19:00 2005 -0700 +++ b/src/tokenizer.cpp Wed Jul 13 23:04:14 2005 -0700 @@ -71,11 +71,11 @@ { s_single, s_term, s_string, s_single, s_eol, }, // 0x28 ( { s_single, s_term, s_string, s_single, s_eol, }, // 0x29 ) { s_single, s_term, s_string, s_single, s_eol, }, // 0x2A * - { s_single, s_term, s_string, s_single, s_eol, }, // 0x2B + + { s_single, s_token, s_string, s_single, s_eol, }, // 0x2B + { s_single, s_term, s_string, s_single, s_eol, }, // 0x2C , { s_single, s_token, s_string, s_single, s_eol, }, // 0x2D - { s_single, s_token, s_string, s_single, s_eol, }, // 0x2E . - { s_slash, s_term, s_string, s_slash, s_eol, }, // 0x2F / + { s_slash, s_token, s_string, s_slash, s_eol, }, // 0x2F / { s_token, s_token, s_string, s_token, s_eol, }, // 0x30 0 { s_token, s_token, s_string, s_token, s_eol, }, // 0x31 1 { s_token, s_token, s_string, s_token, s_eol, }, // 0x32 2 @@ -89,7 +89,7 @@ { s_single, s_term, s_string, s_single, s_eol, }, // 0x3A : { s_single, s_term, s_string, s_single, s_eol, }, // 0x3B ; { s_single, s_term, s_string, s_single, s_eol, }, // 0x3C < - { s_single, s_term, s_string, s_single, s_eol, }, // 0x3D = + { s_single, s_token, s_string, s_single, s_eol, }, // 0x3D = { s_single, s_term, s_string, s_single, s_eol, }, // 0x3E > { s_single, s_term, s_string, s_single, s_eol, }, // 0x3F ? { s_single, s_token, s_string, s_single, s_eol, }, // 0x40 @ @@ -319,7 +319,7 @@ bool TOKEN::next_char(u_char &uc) { if (pushed) { - uc = pushed_char; + uc = (u_char)tolower((char)pushed_char); pushed = false; return true; } @@ -334,6 +334,7 @@ int &line = linenumbers.front(); line++; } + uc = (u_char)tolower((char)uc); return true; } @@ -500,13 +501,22 @@ } -void TOKEN::token_error(const char *token, const char *have) { +void TOKEN::token_error(const char *want, const char *have) { token_error(); - token_error("expecting %s, found %s \n", token, have); + token_error("expecting %s, found %s \n", want, have); } void TOKEN::token_error() { token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn()); + line_list::iterator j = linenumbers.begin(); + string_list::iterator i = filenames.begin(); + for (; i!=filenames.end(); i++,j++) { + if (i != filenames.begin()) { + char *fn = (*i); + int li = (*j); + token_error("\n included from line %d in file %s -- ", li, fn); + } + } }
--- a/src/tokenizer.h Sun Jul 10 14:19:00 2005 -0700 +++ b/src/tokenizer.h Wed Jul 13 23:04:14 2005 -0700 @@ -46,7 +46,7 @@ void token_error(const char *err); void token_error(const char *fmt, int d, const char *s); void token_error(const char *fmt, const char *t, const char *h); - void token_error(const char *token, const char *have); + void token_error(const char *want, const char *have); void token_error(); };
--- a/xml/dnsbl.in Sun Jul 10 14:19:00 2005 -0700 +++ b/xml/dnsbl.in Wed Jul 13 23:04:14 2005 -0700 @@ -2,7 +2,7 @@ <head> <meta http-equiv="Content-Type" content="text/html; charset=windows-1252"> -<title>DNSBL Sendmail milter - Version 4.6</title> +<title>DNSBL Sendmail milter - Version 5.0</title> </head> <center>Introduction</center> @@ -49,13 +49,11 @@ feature that the mail is rejected earlier (at RCPT TO time), and the sending machine just gets a generic "550 5.7.1 no such user" message. -<p>There is an option to reference the DCC whiteclnt file (via an -include_dcc line) in the DNSBL milter config. This will import the -(env_to, env_from, and substitute mail_host) entries from the DCC config -into the DNSBL config. This allows using the DCC config as the single -point for white/blacklisting. When used in this manner, the whitelist -env_to entries from the DCC config become global whitelist entries in -the DNSBL config. +<p>The DCC whiteclnt file can be included in the DNSBL milter config by +the dcc_to and dcc_from statements. This will import the (env_to, +env_from, and substitute mail_host) entries from the DCC config into the +DNSBL config. This allows using the DCC config as the single point for +white/blacklisting. <p>Consider the case where you have multiple clients, each with their own mail servers, and each running their own DCC milters. Each client @@ -63,12 +61,17 @@ Presumably you can use rsync or scp to fetch copies of your clients DCC whiteclnt files on a regular basis. Your mail server, acting as a backup MX for your clients, can use the DNSBL milter, and include those -client DCC config files. The envelope to white/blacklisting will be -global for your system, but the envelope from white/blacklisting will be -appropriately tagged and used only for the domains controlled by each of -those clients. +client DCC config files. The envelope from/to white/blacklisting will +be appropriately tagged and used only for the domains controlled by each +of those clients. <hr> <center>Definitions</center> + +<p>CONTEXT - a collection of parameters that defines the filtering +context to be used for a collection of envelope recipient addresses. +The context includes such things as the list of DNSBLs to be used, and +the various content filtering parameters. + <p>DNSBL - a named DNS based blocking list is defined by a dns suffix (e.g. sbl-xbl.spamhaus.org) and a message string that is used to generate the "550 5.7.1" smtp error return code. The names of these @@ -77,12 +80,10 @@ <p>DNSBL-LIST - a named list of DNSBLs that will be used for specific recipients or recipient domains. -<p>ENVELOPE-FROM-MAP - a named collection of mappings (key->value pairs) -from envelope-from values to the WHITE, BLACK, or DEFAULT keywords. The -names of these maps will be used for specific recipients or recipient -domains. - -<p>The configuration file maps each recipient (or recipient domain) to +<p>The envelope to email address is used to find an initial filtering context. +That context then uses the envelope from email address to find the final +filtering context. The envelope from email address is checked in that context +to see if we should whitelist or blacklist the message two names (a named DNSBL-LIST, and a named ENVELOPE-FROM-MAP). If the recipient is not found in the configuration, the named DEFAULT dnsbl-list and DEFAULT envelope-from-map will be used. When mail is @@ -90,22 +91,44 @@ <ol> -<li>If the client has authenticated with sendmail, the mail is accepted -and the dns lists are not checked. +<li>If the client has authenticated with sendmail, the mail is accepted, +the dns lists are not checked, and the body content is not scanned. + +<li>The envelope to email address is used to find an initial filtering +context. We first look for a context that specified the full email address +in the env_to statement. If that is not found, we look for a context that +specified the entire domain name of the envelope recipient in the env_to +statement. If that is not found, we look for a context that specified the +user@ part of the envelope recipient in the env_to statement. If that is not +found, we use the first top level context defined in the config file. -<li>If either one is BLACK, mail to this recipient is rejected with "no +<li>The initial filtering context may redirect to a child context based +on the values in the initial context's env_from statement. We look for +[1) the full envelope from email address, 2) the domain name part of the +envelope from address, 3) the user@ part of the envelope from address] +in that context's env_from statement, with values that point to a child +context. If such an entry is found, we switch to that filtering +context. + +<li>We lookup [1) the full envelope from email address, 2) the domain +name part of the envelope from address, 3) the user@ part of the +envelope from address] in the filtering context env_from statement. +That results in one of (white, black, unknown, inherit). + +<li>If the answer is black, mail to this recipient is rejected with "no such user", and the dns lists are not checked. -<li>If the envelope-from-map name is WHITE, mail to this recipient is -accepted and the dns lists are not checked. +<li>If the answer is white, mail to this recipient is accepted and the +dns lists are not checked. + +<li>If the answer is unknown, we don't reject yet, but the dns lists +will be checked, and the content may be scanned. -<li>If the envelope-from-map exists, the map is checked for the presence -of the sender. A WHITE or BLACK answer is definitive and the dns lists -are not checked. +<li>If the answer is inherit, we repeat the envelope from search in the +parent context. -<li>If the dnsbl-list name is WHITE, the dns lists are not checked and -the mail is accepted. Otherwise, the dns lists are checked and the mail -is rejected if any list has an A record for the standard dns based +<li>The dns lists specified in the filtering context are checked and the +mail is rejected if any list has an A record for the standard dns based lookup scheme (reversed octets of the client followed by the dns suffix). @@ -145,7 +168,7 @@ OK entries in the sendmail access database will override all the dnsbl checks. With this DNSBL milter, you will need to have the local users authenticate with smtp-auth to get the same effect. You might find <a -href="http://www.ists.dartmouth.edu/IRIA/knowledge_base/linuxinfo/sendmail-ssh-how-to.htm"> +href="http://www.lists.dartmouth.edu/IRIA/knowledge_base/linuxinfo/sendmail-ssl-how-to.htm"> these directions</a> helpful for setting up smtp-auth if you are on RH Linux. @@ -185,6 +208,15 @@ /usr/sbin/dnsbl -c </pre> +You can check a specific envelope from/to pair with + +<pre> +cd /etc/dnsbl +from="$1" # or your from address +to="$2" # or your to address +/usr/sbin/dnsbl -e "$from"'|'"$to" +</pre> + <hr> <center>Performance issues</center> <p>Consider a high volume high performance machine running sendmail.
--- a/xml/sample.conf Sun Jul 10 14:19:00 2005 -0700 +++ b/xml/sample.conf Wed Jul 13 23:04:14 2005 -0700 @@ -1,192 +1,153 @@ # $Id$ # -# lines start with a command token, following by argument tokens -# tokens are separated by spaces or tabs -# -# -# tld: -# second token is the tld suffix - com, net, org, etc -# -# -# content: -# second token is the dns suffix used for the actual lookups -# third token? is a string enclosed in single quotes, so it -# is not really a token. This is the error message, with -# up to two %s parameters for the offending host name and -# client ip address respectively. -# -# If this command is not present, there is no body scanning -# for host names or bad html tags. -# -# -# ignore: -# second token is a host name that is allowed in the body even -# if it would otherwise be rejected by the content scanning -# above. -# -# -# host_limit: -# second token is the integer count of the number of host names -# or urls that are allowed in any one mail body. Zero is -# unlimited. If the actual number of host names in the message -# is larger than this limit, the message is rejected. -# third token? is a string enclosed in single quotes, so it -# is not really a token. This is the error message supplied -# to the smtp client. -# -# -# host_soft_limit: -# second token is the integer count of the number of host names -# or urls that are checked in any one mail body. Zero is -# unlimited. If the actual number of host names in the message -# is larger than this limit, only a random selection of them -# are checked against the dnsbl. -# -# -# html_limit: -# second token is the integer count of the number of bad html tags -# that are allowed in any one mail body. Zero is unlimited. -# third token? is a string enclosed in single quotes, so it -# is not really a token. This is the error message supplied -# to the smtp client. -# -# -# html_tag: -# second token is a valid html tag, that is added to the list -# of valid tags. Any html tag seen in the mail bodies that -# that is not in this list is presumed to be invalid. -# -# -# dnsbl: -# second token is the name of this dnsbl -# third token is the dns suffix used for the actual lookups -# fourth token? is a string enclosed in single quotes, so it -# is not really a token. This is the error message, with -# up to two %s parameters for the client ip address. -# + +# partial bnf description of this configuration language # -# dnsbl_list: -# second token is the name of this list of dnsbls -# subsequent tokes are the names of the previously defined dnsbls -# -# -# env_from: -# second token is the name of this envelope-from-map. There will -# generally be multiple lines with the same name. -# third token is the envelope from value from the smtp conversation, -# or just the domain part that follows the @ symbol. -# fourth token is BLACK, WHITE, or the name of a previously defined -# envelope-from-map. BLACK causes mail from this sender to be -# rejected with "no such user". WHITE causes mail to be accepted -# and the dns based lists are ignored. DEFAULT may be used to override -# the contents of other maps that are copied into this map, and -# set that sender back to the default (not white or black listed, -# and subject to dnsbl lookups). -# -# -# env_to: -# second token is the envelope recipient value from the smtp conversation, -# or just the domain part that follows the @ symbol. -# third token is the name of a dnsbl-list, or WHITE or BLACK. -# fourth token is the name of an envelope-from-map, or WHITE or BLACK. -# -# If either one is BLACK, mail to this recipient is rejected with -# "no such user", and the dns lists are not checked. -# -# If the envelope-from-map name is WHITE, mail to this recipient is accepted -# and the dns lists are not checked. -# -# If the envelope-from-map exists, the map is checked for the presence -# of the sender. A WHITE or BLACK answer is definitive and the dns lists -# are not checked. -# -# If the dnsbl-list name is WHITE, the dns lists are not checked and the -# mail is accepted. Otherwise, the dns lists are checked and the mail -# is rejected if any list has an A record for the standard dns based -# lookup scheme (reversed octets of the client followed by the dns suffix). -# -# -# include: -# second token is the path name of the dnsbl milter config file to be -# included. -# -# -# include_dcc: -# second token is the name of an envelope-from-map (EMAP below). -# third token is the path name of the dcc whiteclnt config file to be -# included. Entries from the dcc config are mapped as: -# ok -> WHITE -# many -> BLACK -# env_from -> env_from EMAP xxx -# env_to -> env_to -# substitute mail_host -> env_from EMAP xxx -# -# -# -############################################## -# content scanning parameters -# -content sbl-xbl.spamhaus.org 'Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s' -host_limit 20 'Mail containing too many host names rejected' -host_soft_limit 20 -html_limit 20 'Mail containing excessive bad html tags rejected' -include hosts-ignore.conf -include html-tags.conf -include tld.conf +# CONFIG = {CONTEXT ";"}+ +# CONTEXT = "context" NAME "{" {STATEMENT}+ "}" +# STATEMENT = (DNSBL | DNSBLLIST | CONTENT | ENV-TO | CONTEXT | ENV-FROM) ";" + +# DNSBL = "dnsbl" NAME DNSPREFIX ERROR-MSG + +# DNSBLLIST = "dnsbl_list" {NAME}+ + +# CONTENT = "content" ("on" | "off") "{" {CONTENT-STATEMENT}+ "}" +# CONTENT-STATEMENT = (FILTER | IGNORE | TLD | HTML-TAGS | HTML-LIMIT | HOST-LIMIT) ";" +# FILTER = "filter" DNSPREFIX ERROR-MSG +# IGNORE = "ignore" "{" {HOSTNAME [";"]}+ "}" +# TLD = "tld" "{" {TLD [";"]}+ "}" +# HTML-TAGS = "html_tags" "{" {HTMLTAG [";"]}+ "}" +# ERROR-MSG = string containing exactly two %s replacement tokens for the client ip address + +# HTML-LIMIT = "html_limit" ("on" INTEGER ERROR-MSG | "off") + +# HOST-LIMIT = "host_limit" ("on" INTEGER ERROR-MSG | "off" | "soft" INTEGER) + +# ENV-TO = "env_to" "{" {(TO-ADDR | DCC-TO)}+ "}" +# TO-ADDR = ADDRESS [";"] +# DCC-TO = "dcc_to" ("ok" | "many") "{" DCCINCLUDEFILE "}" ";" + +# ENV_FROM = "env_from" DEFAULT "{" {(FROM-ADDR | DCC-FROM)}+ "}" +# FROM-ADDR = ADDRESS VALUE [";"] +# DCC-FROM = "dcc_from" "{" DCCINCLUDEFILE "}" ";" +# DEFAULT = ("white" | "black" | "unknown" | "inherit" | "") +# ADDRESS = (USER@ | DOMAIN | USER@DOMAIN) +# VALUE = ("white" | "black" | "unknown" | CHILD-CONTEXT-NAME -############################################## -# define the dnsbls to use -# -dnsbl LOCAL blackholes.five-ten-sg.com 'Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s' -dnsbl SPEWS blackholes.spews.org 'Mail from %s rejected - spews; see http://www.spews.org/ask.cgi?x=%s' -dnsbl SBL sbl-xbl.spamhaus.org 'Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s' +context sample { + dnsbl local blackholes.five-ten-sg.com "Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s"; + dnsbl spews blackholes.spews.org "Mail from %s rejected - spews; see http://www.spews.org/ask.cgi?x=%s"; + dnsbl sbl sbl-xbl.spamhaus.org "Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s"; + dnsbl xbl xbl.spamhaus.org "Mail from %s rejected - xbl; see http://www.spamhaus.org/query/bl?ip=%s"; + dnsbl_list local sbl; + content on { + filter sbl-xbl.spamhaus.org "Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s"; + ignore { include "hosts-ignore.conf"; }; + tld { include "tld.conf"; }; + html_tags { include "html-tags.conf"; }; + html_limit off; + host_limit on 20 "Mail containing excessive bad html tags rejected"; + host_limit soft 20; + }; + + env_to { + mydomain.com; # child contexts are not allowed to specify recipient addresses outside these domains + customer1.com; + customer1a.com; + customer1b.com; + customer2.com; + customer2a.com; + customer2b.com; + }; -############################################## -# define the (default and other) lists of dnsbls to use -# -dnsbl_list DEFAULT LOCAL SPEWS SBL -dnsbl_list SIMPLE SBL -dnsbl_list CUST1 SBL -dnsbl_list CUST2 SPEWS SBL + context whitelist { + content off {}; + env_to { + # dcc_to ok { include "/var/dcc/whitecommon"; }; + }; + env_from white {}; # white forces all unmatched from addresses (everyone in this case) to be whitelisted + # so all mail TO these env_to addresses is accepted + }; + context abuse { + dnsbl_list xbl; + content off {}; + env_to { + abuse@; # no content filtering on abuse reports + postmaster@; # "" + }; + env_from unknown {}; # ignore all parent white/black listing + }; -############################################## -# define the (default and other) env_from maps -# -env_from DEFAULT spammer@example.com BLACK -env_from DEFAULT yahoo.com BLACK + context minimal { + dnsbl_list sbl; + content on {}; + env_to { + sales@mydomain.com; + }; + }; -# special list for the vp -env_from TEST dummy-token DEFAULT # inherit the currently defined DEFAULT env_from mapping -env_from TEST nai.com BLACK # the vp does not like nai -env_from TEST yahoo.com DEFAULT # -env_from TEST mother@spammyisp.com WHITE # suppresses dnsbl checking - + context blacklist { + env_to { + dcc_to many { include "/var/dcc/whitecommon"; }; + old-employee@mydomain.com; + }; + env_from black {}; # black forces all unmatched from addresses (everyone in this case) to be blacklisted + # so all mail TO these env_to addresses is rejected + }; -############################################## -# specify dnsbl_lists and env_from maps to use for specific recipients -# -env_to abuse@mydomain.com WHITE WHITE # no dnsbl, no env_from map -env_to sales@mydomain.com SIMPLE NULL # sbl only, no env_from map -env_to vp@mydomain.com DEFAULT TEST # allow mail from mom -env_to old-emp@mydomain.com BLACK BLACK # return no such user even from backup mx machines + context vp { # special context for the vp + env_to { + vp@mydomain.com; + }; + env_from inherit { + nai.com black; # the vp does not like nai + yahoo.com unknown; # override parent context blacklisting + mother@spammyisp.com white; # suppress dnsbl checking + }; + }; + + context customer1 { + dnsbl_list sbl; + env_to { + customer1.com; + customer1a.com; + customer1b.com; + }; -############################################## -# specify dnsbl_lists and env_from maps to use for clients domains -# -env_to mydomain.com DEFAULT DEFAULT -env_to customer1.com CUST1 DEFAULT # all customer 1 domains use just sbl -env_to customer1a.com CUST1 DEFAULT -env_to customer1b.com CUST1 DEFAULT -env_to customer2.com CUST2 DEFAULT # all customer 2 domains use spews and sbl -env_to customer2a.com CUST2 DEFAULT + context customer1a { + env_to { + customer1a.com; + } + env_from black { # blacklist everything + first@acceptable.com unknown; # except these specific envelope senders + second@another.com unknown; + yahoo.com inherit; # delegate to the parent + }; + }; + + env_from { + yahoo.com black; # no mail from yahoo + first@yahoo.com unknown; # except this one + }; + }; + context customer2 { + dnsbl_list sbl spews; + env_to { + customer2.com; + customer2a.com; + customer2b.com; + }; + }; -############################################## -# you can also include nested config files -# file names are single tokens, no embedded blanks -# -include dnsbl.conf # this will generate a recursive include file syslog error message -include_dcc DEFAULT /var/dcc/whitecommon # this includes the default dcc whitelist file + env_from unknown { + dcc_from { include "/var/dcc/whitecommon"; }; # use the dcc whitecommon list ok/many values to white/black list envelope from values here + abuse@ abuse; # replies to abuse reports use the abuse context + yahoo.com black; # don't take mail from yahoo + spammer@example.com black; + }; +}; +