# HG changeset patch # User carl # Date 1085290245 25200 # Node ID 43a4f6b3e668cc1c5768f1de1ad938bdfe0a6501 # Parent fdae7ab30cfcf9a1eefef08d4cce7875d8e76873 add configurable host name limit and bad html tag limits. diff -r fdae7ab30cfc -r 43a4f6b3e668 sendmail.st Binary file sendmail.st has changed diff -r fdae7ab30cfc -r 43a4f6b3e668 src/dnsbl.cpp --- a/src/dnsbl.cpp Fri May 21 21:55:38 2004 -0700 +++ b/src/dnsbl.cpp Sat May 22 22:30:45 2004 -0700 @@ -71,7 +71,9 @@ enum status {oksofar, // not rejected yet white, // whitelisted by envelope from black, // blacklisted by envelope from or to - reject}; // rejected by a dns list + reject, // rejected by a dns list + reject_tag, // too many bad html tags + reject_host}; // too many hosts/urls in body using namespace std; @@ -125,8 +127,10 @@ string_map env_to_chkfrom; // map recipient to a named from map char * content_suffix; // for sbl url body filtering char * content_message; // "" - char * limit_message; // error message for excessive bad html tags - int bad_tag_limit; // limit on bad html tags + char * host_limit_message; // error message for excessive host names + int host_limit; // limit on host names + char * tag_limit_message; // error message for excessive bad html tags + int tag_limit; // limit on bad html tags string_set html_tags; // set of valid html tags CONFIG(); ~CONFIG(); @@ -136,8 +140,10 @@ load_time = 0; content_suffix = NULL; content_message = NULL; - limit_message = NULL; - bad_tag_limit = 0; + host_limit_message = NULL; + host_limit = 0; + tag_limit_message = NULL; + tag_limit = 0; } CONFIG::~CONFIG() { for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) { @@ -483,7 +489,8 @@ int count = 0; for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) { count++; - if (count > 20) return oksofar; // silly to check too many hosts + int lim = priv.pc->host_limit; + if ((count > lim) && (lim > 0)) return reject_host; host = *i; if (debug_syslog) { char buf[200]; @@ -492,14 +499,14 @@ } ip = protected_dns_interface(host, true); if (ip) { - if (debug_syslog) { - char adr[sizeof "255.255.255.255"]; - adr[0] = '\0'; - inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); - char buf[200]; - snprintf(buf, sizeof(buf), "found host %s at %s", host, adr); - my_syslog(buf); - } + // if (debug_syslog) { + // char adr[sizeof "255.255.255.255"]; + // adr[0] = '\0'; + // inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); + // char buf[200]; + // snprintf(buf, sizeof(buf), "found host %s at %s", host, adr); + // my_syslog(buf); + // } status st = check_single(ip, dc.content_suffix); if (st == reject) return st; } @@ -507,9 +514,9 @@ host = NULL; int bin = priv.memory->binary_tags; int bad = priv.memory->bad_html_tags; - int lim = priv.pc->bad_tag_limit; + int lim = priv.pc->tag_limit; if (bin > bad) return oksofar; // probably .zip or .tar.gz with random content - if ((bad > lim) && (lim > 0)) return reject; + if ((bad > lim) && (lim > 0)) return reject_tag; return oksofar; } @@ -618,17 +625,22 @@ mlfiPriv &priv = *MLFIPRIV; char *host = NULL; int ip; + status st; // process end of message if (priv.authenticated || priv.only_whites || - (check_hosts(priv, host, ip) == oksofar)) rc = SMFIS_CONTINUE; + ((st=check_hosts(priv, host, ip)) == oksofar)) rc = SMFIS_CONTINUE; else { if (!priv.have_whites) { // can reject the entire message char buf[2000]; - if (!host) { - // must be rejected due to excessive bad html tags - snprintf(buf, sizeof(buf), priv.pc->limit_message); + if (st == reject_tag) { + // rejected due to excessive bad html tags + snprintf(buf, sizeof(buf), priv.pc->tag_limit_message); + } + else if (st == reject_host) { + // rejected due to excessive unique host/urls + snprintf(buf, sizeof(buf), priv.pc->host_limit_message); } else { char adr[sizeof "255.255.255.255"]; @@ -730,8 +742,11 @@ if (dc.content_suffix) { fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message); } - if (dc.bad_tag_limit) { - fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.bad_tag_limit, dc.limit_message); + if (dc.host_limit) { + fprintf(stdout, "\ncontent filtering for host names enabled with limit %d %s\n", dc.host_limit, dc.host_limit_message); + } + if (dc.tag_limit) { + fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.tag_limit, dc.tag_limit_message); } fprintf(stdout, "\nfiles\n"); for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { @@ -830,8 +845,9 @@ static void load_conf(CONFIG &dc, char *fn) { dc.config_files.push_back(fn); map commands; - enum {dummy, content, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc}; + enum {dummy, content, hostlimit, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc}; commands["content" ] = content; + commands["host_limit" ] = hostlimit; commands["html_limit" ] = htmllimit; commands["html_tag" ] = htmltag; commands["dnsbl" ] = dnsbl; @@ -875,6 +891,23 @@ processed = true; } break; + case hostlimit: { + char *limit = strtok(NULL, delim); + if (!limit) break; // no integer limit + char *msg = limit + strlen(limit); + if ((msg - line) >= strlen(orig)) break; // line ended with the limit + msg = strchr(msg+1, '\''); + if (!msg) break; // no reply message template + msg++; // move over the leading ' + if ((msg - line) >= strlen(orig)) break; // line ended with the leading quote + char *last = strchr(msg, '\''); + if (!last) break; // no trailing quote + *last = '\0'; // make it a null terminator + dc.host_limit = atoi(limit); + dc.host_limit_message = register_string(msg); + processed = true; + } break; + case htmllimit: { char *limit = strtok(NULL, delim); if (!limit) break; // no integer limit @@ -887,18 +920,20 @@ char *last = strchr(msg, '\''); if (!last) break; // no trailing quote *last = '\0'; // make it a null terminator - dc.bad_tag_limit = atoi(limit); - dc.limit_message = register_string(msg); + dc.tag_limit = atoi(limit); + dc.tag_limit_message = register_string(msg); processed = true; } break; case htmltag: { char *tag = next_token(delim); if (!tag) break; // no html tag value + dc.html_tags.insert(tag); // base version char buf[200]; snprintf(buf, sizeof(buf), "/%s", tag); - dc.html_tags.insert(tag); - dc.html_tags.insert(register_string(buf)); + dc.html_tags.insert(register_string(buf)); // leading / + snprintf(buf, sizeof(buf), "%s/", tag); + dc.html_tags.insert(register_string(buf)); // trailing / processed = true; } break; diff -r fdae7ab30cfc -r 43a4f6b3e668 src/scanner.cpp --- a/src/scanner.cpp Fri May 21 21:55:38 2004 -0700 +++ b/src/scanner.cpp Sat May 22 22:30:45 2004 -0700 @@ -179,7 +179,7 @@ {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_eq, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x3D = {h_init, h_end, t_init, t_end, t_end, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3E > {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3F ? - {h_init, h_end, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @ + {h_init, h_host, t_init, t_disc, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @ {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B {h_host, h_host, t_init, t_tag, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C @@ -379,6 +379,7 @@ ".net", ".org", ".biz", + ".info", NULL }; @@ -950,6 +951,8 @@ // host name recognizer case h_end: { pending[--count] = '\0'; // null terminate host name by overwriting the terminator + if (!strchr((const char *)pending, '@')) { + // not an email address or message id char *tld; for (int i=0; (tld = tlds[i]); i++) { int n = strlen(tld); @@ -960,6 +963,7 @@ } } } + } st = h_init; } // fall thru diff -r fdae7ab30cfc -r 43a4f6b3e668 xml/dnsbl.in --- a/xml/dnsbl.in Fri May 21 21:55:38 2004 -0700 +++ b/xml/dnsbl.in Sat May 22 22:30:45 2004 -0700 @@ -19,12 +19,13 @@ per-recipient basis, so that fred@example.com could use SPEWS and the SBL, where all other users @example.com use only the SBL. -

This milter will also decode (base64, mime, html entity) and scan for -HTTP and HTTPS URLs and bare hostnames in the body of the mail. If any -of those host names have A records on the SBL (or a single configurable -list), the mail will be rejected unless previously whitelisted. This -milter also counts the number of invalid HTML tags, and can reject mail -if that count exceeds your specified limit. +

This milter will also decode (base64, mime, html entity, url +encodings) and scan for HTTP and HTTPS URLs and bare hostnames in the +body of the mail. If any of those host names have A records on the SBL +(or a single configurable DNSBL), the mail will be rejected unless +previously whitelisted. This milter also counts the number of invalid +HTML tags, and can reject mail if that count exceeds your specified +limit.

The DNSBL milter reads a text configuration file (dnsbl.conf) on startup, and whenever the config file (or any of the referenced include diff -r fdae7ab30cfc -r 43a4f6b3e668 xml/sample.conf --- a/xml/sample.conf Fri May 21 21:55:38 2004 -0700 +++ b/xml/sample.conf Sat May 22 22:30:45 2004 -0700 @@ -11,9 +11,17 @@ # up to two %s parameters for the offending host name and # client ip address respectively. # +# host_limit: +# second token is the integer count of the number of host names +# or urls that are allowed in any one mail body. Zero is +# unlimited. +# third token? is a string enclosed in single quotes, so it +# is not really a token. This is the error message supplied +# to the smtp client. +# # html_limit: -# second token is the integer count of the number of bad html -# tags that are allowed in any one mail body. +# second token is the integer count of the number of bad html tags +# that are allowed in any one mail body. Zero is unlimited. # third token? is a string enclosed in single quotes, so it # is not really a token. This is the error message supplied # to the smtp client. @@ -91,7 +99,8 @@ # content scanning parameters # content sbl-xbl.spamhaus.org 'Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s' -html_limit 20 +host_limit 20 'Mail containing too many host names rejected' +html_limit 20 'Mail containing excessive bad html tags rejected' include html-tags.conf