Mercurial > dnsbl
diff src/dnsbl.cpp @ 24:2e23b7184d2b
start coding for bad html tag detection
author | carl |
---|---|
date | Wed, 19 May 2004 21:40:50 -0700 |
parents | 06de5ab6a232 |
children | fdae7ab30cfc |
line wrap: on
line diff
--- a/src/dnsbl.cpp Wed May 12 13:23:22 2004 -0700 +++ b/src/dnsbl.cpp Wed May 19 21:40:50 2004 -0700 @@ -125,6 +125,9 @@ string_map env_to_chkfrom; // map recipient to a named from map char * content_suffix; // for sbl url body filtering char * content_message; // "" + char * limit_message; // error message for excessive bad html tags + int bad_tag_limit; // limit on bad html tags + string_set html_tags; // set of valid html tags CONFIG(); ~CONFIG(); }; @@ -133,14 +136,19 @@ load_time = 0; content_suffix = NULL; content_message = NULL; + limit_message = NULL; + bad_tag_limit = 0; } CONFIG::~CONFIG() { for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) { DNSBLP d = (*i).second; + // delete the underlying DNSBL objects. delete d; } for (dnsbllp_map::iterator i=dnsblls.begin(); i!=dnsblls.end(); i++) { DNSBLLP d = (*i).second; + // *d is a list of pointers to DNSBL objects, but + // the underlying objects have already been deleted above. delete d; } for (from_map::iterator i=env_from.begin(); i!=env_from.end(); i++) { @@ -213,9 +221,9 @@ bool authenticated; // client authenticated? if so, suppress all dnsbl checks bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails bool only_whites; // every recipient is whitelisted? + string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be + recorder *memory; // memory for the content scanner url_scanner *scanner; // object to handle body scanning - string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be - string_set hosts; // remember the hosts that we have checked mlfiPriv(); ~mlfiPriv(); void reset(bool final = false); // for a new message @@ -230,7 +238,8 @@ authenticated = false; have_whites = false; only_whites = true; - scanner = new url_scanner(&hosts); + memory = new recorder(&pc->html_tags); + scanner = new url_scanner(memory); } mlfiPriv::~mlfiPriv() { pthread_mutex_lock(&config_mutex); @@ -240,15 +249,16 @@ } void mlfiPriv::reset(bool final) { if (mailaddr) free(mailaddr); + discard(non_whites); + delete memory; delete scanner; - discard(non_whites); - discard(hosts); if (!final) { mailaddr = NULL; authenticated = false; have_whites = false; only_whites = true; - scanner = new url_scanner(&hosts); + memory = new recorder(&pc->html_tags); + scanner = new url_scanner(memory); } } @@ -471,7 +481,7 @@ CONFIG &dc = *priv.pc; if (!dc.content_suffix) return oksofar; int count = 0; - for (string_set::iterator i=priv.hosts.begin(); i!=priv.hosts.end(); i++) { + for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) { count++; if (count > 20) return oksofar; // silly to check too many hosts host = *i; @@ -494,6 +504,10 @@ if (st == reject) return st; } } + host = NULL; + int bad = priv.memory->bad_html_tags; + int lim = priv.pc->bad_tag_limit; + if ((bad > lim) && (lim > 0)) return reject; return oksofar; } @@ -609,11 +623,17 @@ else { if (!priv.have_whites) { // can reject the entire message + char buf[2000]; + if (!host) { + // must be rejected due to excessive bad html tags + snprintf(buf, sizeof(buf), priv.pc->limit_message); + } + else { char adr[sizeof "255.255.255.255"]; adr[0] = '\0'; inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); - char buf[2000]; snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr); + } smfi_setreply(ctx, "550", "5.7.1", buf); rc = SMFIS_REJECT; } @@ -708,6 +728,9 @@ if (dc.content_suffix) { fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message); } + if (dc.bad_tag_limit) { + fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.bad_tag_limit, dc.limit_message); + } fprintf(stdout, "\nfiles\n"); for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { char *f = *i; @@ -805,8 +828,10 @@ static void load_conf(CONFIG &dc, char *fn) { dc.config_files.push_back(fn); map<char*, int, ltstr> commands; - enum {dummy, content, dnsbl, dnsbll, envfrom, envto, include, includedcc}; + enum {dummy, content, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc}; commands["content" ] = content; + commands["html_limit" ] = htmllimit; + commands["html_tag" ] = htmltag; commands["dnsbl" ] = dnsbl; commands["dnsbl_list" ] = dnsbll; commands["env_from" ] = envfrom; @@ -833,7 +858,7 @@ switch (commands[cmd]) { case content: { char *suff = strtok(NULL, delim); - if (!suff) break; // no dns suffic + if (!suff) break; // no dns suffix char *msg = suff + strlen(suff); if ((msg - line) >= strlen(orig)) break; // line ended with the dns suffix msg = strchr(msg+1, '\''); @@ -848,6 +873,30 @@ processed = true; } break; + case htmllimit: { + char *limit = strtok(NULL, delim); + if (!limit) break; // no integer limit + char *msg = limit + strlen(limit); + if ((msg - line) >= strlen(orig)) break; // line ended with the limit + msg = strchr(msg+1, '\''); + if (!msg) break; // no reply message template + msg++; // move over the leading ' + if ((msg - line) >= strlen(orig)) break; // line ended with the leading quote + char *last = strchr(msg, '\''); + if (!last) break; // no trailing quote + *last = '\0'; // make it a null terminator + dc.bad_tag_limit = atoi(limit); + dc.limit_message = register_string(msg); + processed = true; + } break; + + case htmltag: { + char *tag = next_token(delim); + if (!tag) break; // no html tag value + dc.html_tags.insert(tag); + processed = true; + } break; + case dnsbl: { // have a new dnsbl to use char *name = next_token(delim);