comparison src/dnsbl.cpp @ 24:2e23b7184d2b

start coding for bad html tag detection
author carl
date Wed, 19 May 2004 21:40:50 -0700
parents 06de5ab6a232
children fdae7ab30cfc
comparison
equal deleted inserted replaced
23:06de5ab6a232 24:2e23b7184d2b
123 from_map env_from; 123 from_map env_from;
124 string_map env_to_dnsbll; // map recipient to a named dnsbll 124 string_map env_to_dnsbll; // map recipient to a named dnsbll
125 string_map env_to_chkfrom; // map recipient to a named from map 125 string_map env_to_chkfrom; // map recipient to a named from map
126 char * content_suffix; // for sbl url body filtering 126 char * content_suffix; // for sbl url body filtering
127 char * content_message; // "" 127 char * content_message; // ""
128 char * limit_message; // error message for excessive bad html tags
129 int bad_tag_limit; // limit on bad html tags
130 string_set html_tags; // set of valid html tags
128 CONFIG(); 131 CONFIG();
129 ~CONFIG(); 132 ~CONFIG();
130 }; 133 };
131 CONFIG::CONFIG() { 134 CONFIG::CONFIG() {
132 reference_count = 0; 135 reference_count = 0;
133 load_time = 0; 136 load_time = 0;
134 content_suffix = NULL; 137 content_suffix = NULL;
135 content_message = NULL; 138 content_message = NULL;
139 limit_message = NULL;
140 bad_tag_limit = 0;
136 } 141 }
137 CONFIG::~CONFIG() { 142 CONFIG::~CONFIG() {
138 for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) { 143 for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) {
139 DNSBLP d = (*i).second; 144 DNSBLP d = (*i).second;
145 // delete the underlying DNSBL objects.
140 delete d; 146 delete d;
141 } 147 }
142 for (dnsbllp_map::iterator i=dnsblls.begin(); i!=dnsblls.end(); i++) { 148 for (dnsbllp_map::iterator i=dnsblls.begin(); i!=dnsblls.end(); i++) {
143 DNSBLLP d = (*i).second; 149 DNSBLLP d = (*i).second;
150 // *d is a list of pointers to DNSBL objects, but
151 // the underlying objects have already been deleted above.
144 delete d; 152 delete d;
145 } 153 }
146 for (from_map::iterator i=env_from.begin(); i!=env_from.end(); i++) { 154 for (from_map::iterator i=env_from.begin(); i!=env_from.end(); i++) {
147 string_map *d = (*i).second; 155 string_map *d = (*i).second;
148 delete d; 156 delete d;
211 // message specific data 219 // message specific data
212 char *mailaddr; // envelope from value 220 char *mailaddr; // envelope from value
213 bool authenticated; // client authenticated? if so, suppress all dnsbl checks 221 bool authenticated; // client authenticated? if so, suppress all dnsbl checks
214 bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails 222 bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails
215 bool only_whites; // every recipient is whitelisted? 223 bool only_whites; // every recipient is whitelisted?
224 string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be
225 recorder *memory; // memory for the content scanner
216 url_scanner *scanner; // object to handle body scanning 226 url_scanner *scanner; // object to handle body scanning
217 string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be
218 string_set hosts; // remember the hosts that we have checked
219 mlfiPriv(); 227 mlfiPriv();
220 ~mlfiPriv(); 228 ~mlfiPriv();
221 void reset(bool final = false); // for a new message 229 void reset(bool final = false); // for a new message
222 }; 230 };
223 mlfiPriv::mlfiPriv() { 231 mlfiPriv::mlfiPriv() {
228 ip = 0; 236 ip = 0;
229 mailaddr = NULL; 237 mailaddr = NULL;
230 authenticated = false; 238 authenticated = false;
231 have_whites = false; 239 have_whites = false;
232 only_whites = true; 240 only_whites = true;
233 scanner = new url_scanner(&hosts); 241 memory = new recorder(&pc->html_tags);
242 scanner = new url_scanner(memory);
234 } 243 }
235 mlfiPriv::~mlfiPriv() { 244 mlfiPriv::~mlfiPriv() {
236 pthread_mutex_lock(&config_mutex); 245 pthread_mutex_lock(&config_mutex);
237 pc->reference_count--; 246 pc->reference_count--;
238 pthread_mutex_unlock(&config_mutex); 247 pthread_mutex_unlock(&config_mutex);
239 reset(true); 248 reset(true);
240 } 249 }
241 void mlfiPriv::reset(bool final) { 250 void mlfiPriv::reset(bool final) {
242 if (mailaddr) free(mailaddr); 251 if (mailaddr) free(mailaddr);
252 discard(non_whites);
253 delete memory;
243 delete scanner; 254 delete scanner;
244 discard(non_whites);
245 discard(hosts);
246 if (!final) { 255 if (!final) {
247 mailaddr = NULL; 256 mailaddr = NULL;
248 authenticated = false; 257 authenticated = false;
249 have_whites = false; 258 have_whites = false;
250 only_whites = true; 259 only_whites = true;
251 scanner = new url_scanner(&hosts); 260 memory = new recorder(&pc->html_tags);
261 scanner = new url_scanner(memory);
252 } 262 }
253 } 263 }
254 264
255 #define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx)) 265 #define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx))
256 266
469 static status check_hosts(mlfiPriv &priv, char *&host, int &ip); 479 static status check_hosts(mlfiPriv &priv, char *&host, int &ip);
470 static status check_hosts(mlfiPriv &priv, char *&host, int &ip) { 480 static status check_hosts(mlfiPriv &priv, char *&host, int &ip) {
471 CONFIG &dc = *priv.pc; 481 CONFIG &dc = *priv.pc;
472 if (!dc.content_suffix) return oksofar; 482 if (!dc.content_suffix) return oksofar;
473 int count = 0; 483 int count = 0;
474 for (string_set::iterator i=priv.hosts.begin(); i!=priv.hosts.end(); i++) { 484 for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) {
475 count++; 485 count++;
476 if (count > 20) return oksofar; // silly to check too many hosts 486 if (count > 20) return oksofar; // silly to check too many hosts
477 host = *i; 487 host = *i;
478 if (debug_syslog) { 488 if (debug_syslog) {
479 char buf[200]; 489 char buf[200];
492 } 502 }
493 status st = check_single(ip, dc.content_suffix); 503 status st = check_single(ip, dc.content_suffix);
494 if (st == reject) return st; 504 if (st == reject) return st;
495 } 505 }
496 } 506 }
507 host = NULL;
508 int bad = priv.memory->bad_html_tags;
509 int lim = priv.pc->bad_tag_limit;
510 if ((bad > lim) && (lim > 0)) return reject;
497 return oksofar; 511 return oksofar;
498 } 512 }
499 513
500 514
501 //////////////////////////////////////////////// 515 ////////////////////////////////////////////////
607 priv.only_whites || 621 priv.only_whites ||
608 (check_hosts(priv, host, ip) == oksofar)) rc = SMFIS_CONTINUE; 622 (check_hosts(priv, host, ip) == oksofar)) rc = SMFIS_CONTINUE;
609 else { 623 else {
610 if (!priv.have_whites) { 624 if (!priv.have_whites) {
611 // can reject the entire message 625 // can reject the entire message
612 char adr[sizeof "255.255.255.255"];
613 adr[0] = '\0';
614 inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
615 char buf[2000]; 626 char buf[2000];
616 snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr); 627 if (!host) {
628 // must be rejected due to excessive bad html tags
629 snprintf(buf, sizeof(buf), priv.pc->limit_message);
630 }
631 else {
632 char adr[sizeof "255.255.255.255"];
633 adr[0] = '\0';
634 inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr));
635 snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr);
636 }
617 smfi_setreply(ctx, "550", "5.7.1", buf); 637 smfi_setreply(ctx, "550", "5.7.1", buf);
618 rc = SMFIS_REJECT; 638 rc = SMFIS_REJECT;
619 } 639 }
620 else { 640 else {
621 // need to accept it but remove the recipients that don't want it 641 // need to accept it but remove the recipients that don't want it
705 } 725 }
706 fprintf(stdout, "\n"); 726 fprintf(stdout, "\n");
707 } 727 }
708 if (dc.content_suffix) { 728 if (dc.content_suffix) {
709 fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message); 729 fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message);
730 }
731 if (dc.bad_tag_limit) {
732 fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.bad_tag_limit, dc.limit_message);
710 } 733 }
711 fprintf(stdout, "\nfiles\n"); 734 fprintf(stdout, "\nfiles\n");
712 for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { 735 for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) {
713 char *f = *i; 736 char *f = *i;
714 fprintf(stdout, "config includes %s\n", f); 737 fprintf(stdout, "config includes %s\n", f);
803 826
804 static void load_conf(CONFIG &dc, char *fn); 827 static void load_conf(CONFIG &dc, char *fn);
805 static void load_conf(CONFIG &dc, char *fn) { 828 static void load_conf(CONFIG &dc, char *fn) {
806 dc.config_files.push_back(fn); 829 dc.config_files.push_back(fn);
807 map<char*, int, ltstr> commands; 830 map<char*, int, ltstr> commands;
808 enum {dummy, content, dnsbl, dnsbll, envfrom, envto, include, includedcc}; 831 enum {dummy, content, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc};
809 commands["content" ] = content; 832 commands["content" ] = content;
833 commands["html_limit" ] = htmllimit;
834 commands["html_tag" ] = htmltag;
810 commands["dnsbl" ] = dnsbl; 835 commands["dnsbl" ] = dnsbl;
811 commands["dnsbl_list" ] = dnsbll; 836 commands["dnsbl_list" ] = dnsbll;
812 commands["env_from" ] = envfrom; 837 commands["env_from" ] = envfrom;
813 commands["env_to" ] = envto; 838 commands["env_to" ] = envto;
814 commands["include" ] = include; 839 commands["include" ] = include;
831 // have a decent command 856 // have a decent command
832 bool processed = false; 857 bool processed = false;
833 switch (commands[cmd]) { 858 switch (commands[cmd]) {
834 case content: { 859 case content: {
835 char *suff = strtok(NULL, delim); 860 char *suff = strtok(NULL, delim);
836 if (!suff) break; // no dns suffic 861 if (!suff) break; // no dns suffix
837 char *msg = suff + strlen(suff); 862 char *msg = suff + strlen(suff);
838 if ((msg - line) >= strlen(orig)) break; // line ended with the dns suffix 863 if ((msg - line) >= strlen(orig)) break; // line ended with the dns suffix
839 msg = strchr(msg+1, '\''); 864 msg = strchr(msg+1, '\'');
840 if (!msg) break; // no reply message template 865 if (!msg) break; // no reply message template
841 msg++; // move over the leading ' 866 msg++; // move over the leading '
843 char *last = strchr(msg, '\''); 868 char *last = strchr(msg, '\'');
844 if (!last) break; // no trailing quote 869 if (!last) break; // no trailing quote
845 *last = '\0'; // make it a null terminator 870 *last = '\0'; // make it a null terminator
846 dc.content_suffix = register_string(suff); 871 dc.content_suffix = register_string(suff);
847 dc.content_message = register_string(msg); 872 dc.content_message = register_string(msg);
873 processed = true;
874 } break;
875
876 case htmllimit: {
877 char *limit = strtok(NULL, delim);
878 if (!limit) break; // no integer limit
879 char *msg = limit + strlen(limit);
880 if ((msg - line) >= strlen(orig)) break; // line ended with the limit
881 msg = strchr(msg+1, '\'');
882 if (!msg) break; // no reply message template
883 msg++; // move over the leading '
884 if ((msg - line) >= strlen(orig)) break; // line ended with the leading quote
885 char *last = strchr(msg, '\'');
886 if (!last) break; // no trailing quote
887 *last = '\0'; // make it a null terminator
888 dc.bad_tag_limit = atoi(limit);
889 dc.limit_message = register_string(msg);
890 processed = true;
891 } break;
892
893 case htmltag: {
894 char *tag = next_token(delim);
895 if (!tag) break; // no html tag value
896 dc.html_tags.insert(tag);
848 processed = true; 897 processed = true;
849 } break; 898 } break;
850 899
851 case dnsbl: { 900 case dnsbl: {
852 // have a new dnsbl to use 901 // have a new dnsbl to use