Mercurial > dnsbl
comparison src/dnsbl.cpp @ 24:2e23b7184d2b
start coding for bad html tag detection
author | carl |
---|---|
date | Wed, 19 May 2004 21:40:50 -0700 |
parents | 06de5ab6a232 |
children | fdae7ab30cfc |
comparison
equal
deleted
inserted
replaced
23:06de5ab6a232 | 24:2e23b7184d2b |
---|---|
123 from_map env_from; | 123 from_map env_from; |
124 string_map env_to_dnsbll; // map recipient to a named dnsbll | 124 string_map env_to_dnsbll; // map recipient to a named dnsbll |
125 string_map env_to_chkfrom; // map recipient to a named from map | 125 string_map env_to_chkfrom; // map recipient to a named from map |
126 char * content_suffix; // for sbl url body filtering | 126 char * content_suffix; // for sbl url body filtering |
127 char * content_message; // "" | 127 char * content_message; // "" |
128 char * limit_message; // error message for excessive bad html tags | |
129 int bad_tag_limit; // limit on bad html tags | |
130 string_set html_tags; // set of valid html tags | |
128 CONFIG(); | 131 CONFIG(); |
129 ~CONFIG(); | 132 ~CONFIG(); |
130 }; | 133 }; |
131 CONFIG::CONFIG() { | 134 CONFIG::CONFIG() { |
132 reference_count = 0; | 135 reference_count = 0; |
133 load_time = 0; | 136 load_time = 0; |
134 content_suffix = NULL; | 137 content_suffix = NULL; |
135 content_message = NULL; | 138 content_message = NULL; |
139 limit_message = NULL; | |
140 bad_tag_limit = 0; | |
136 } | 141 } |
137 CONFIG::~CONFIG() { | 142 CONFIG::~CONFIG() { |
138 for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) { | 143 for (dnsblp_map::iterator i=dnsbls.begin(); i!=dnsbls.end(); i++) { |
139 DNSBLP d = (*i).second; | 144 DNSBLP d = (*i).second; |
145 // delete the underlying DNSBL objects. | |
140 delete d; | 146 delete d; |
141 } | 147 } |
142 for (dnsbllp_map::iterator i=dnsblls.begin(); i!=dnsblls.end(); i++) { | 148 for (dnsbllp_map::iterator i=dnsblls.begin(); i!=dnsblls.end(); i++) { |
143 DNSBLLP d = (*i).second; | 149 DNSBLLP d = (*i).second; |
150 // *d is a list of pointers to DNSBL objects, but | |
151 // the underlying objects have already been deleted above. | |
144 delete d; | 152 delete d; |
145 } | 153 } |
146 for (from_map::iterator i=env_from.begin(); i!=env_from.end(); i++) { | 154 for (from_map::iterator i=env_from.begin(); i!=env_from.end(); i++) { |
147 string_map *d = (*i).second; | 155 string_map *d = (*i).second; |
148 delete d; | 156 delete d; |
211 // message specific data | 219 // message specific data |
212 char *mailaddr; // envelope from value | 220 char *mailaddr; // envelope from value |
213 bool authenticated; // client authenticated? if so, suppress all dnsbl checks | 221 bool authenticated; // client authenticated? if so, suppress all dnsbl checks |
214 bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails | 222 bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails |
215 bool only_whites; // every recipient is whitelisted? | 223 bool only_whites; // every recipient is whitelisted? |
224 string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be | |
225 recorder *memory; // memory for the content scanner | |
216 url_scanner *scanner; // object to handle body scanning | 226 url_scanner *scanner; // object to handle body scanning |
217 string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be | |
218 string_set hosts; // remember the hosts that we have checked | |
219 mlfiPriv(); | 227 mlfiPriv(); |
220 ~mlfiPriv(); | 228 ~mlfiPriv(); |
221 void reset(bool final = false); // for a new message | 229 void reset(bool final = false); // for a new message |
222 }; | 230 }; |
223 mlfiPriv::mlfiPriv() { | 231 mlfiPriv::mlfiPriv() { |
228 ip = 0; | 236 ip = 0; |
229 mailaddr = NULL; | 237 mailaddr = NULL; |
230 authenticated = false; | 238 authenticated = false; |
231 have_whites = false; | 239 have_whites = false; |
232 only_whites = true; | 240 only_whites = true; |
233 scanner = new url_scanner(&hosts); | 241 memory = new recorder(&pc->html_tags); |
242 scanner = new url_scanner(memory); | |
234 } | 243 } |
235 mlfiPriv::~mlfiPriv() { | 244 mlfiPriv::~mlfiPriv() { |
236 pthread_mutex_lock(&config_mutex); | 245 pthread_mutex_lock(&config_mutex); |
237 pc->reference_count--; | 246 pc->reference_count--; |
238 pthread_mutex_unlock(&config_mutex); | 247 pthread_mutex_unlock(&config_mutex); |
239 reset(true); | 248 reset(true); |
240 } | 249 } |
241 void mlfiPriv::reset(bool final) { | 250 void mlfiPriv::reset(bool final) { |
242 if (mailaddr) free(mailaddr); | 251 if (mailaddr) free(mailaddr); |
252 discard(non_whites); | |
253 delete memory; | |
243 delete scanner; | 254 delete scanner; |
244 discard(non_whites); | |
245 discard(hosts); | |
246 if (!final) { | 255 if (!final) { |
247 mailaddr = NULL; | 256 mailaddr = NULL; |
248 authenticated = false; | 257 authenticated = false; |
249 have_whites = false; | 258 have_whites = false; |
250 only_whites = true; | 259 only_whites = true; |
251 scanner = new url_scanner(&hosts); | 260 memory = new recorder(&pc->html_tags); |
261 scanner = new url_scanner(memory); | |
252 } | 262 } |
253 } | 263 } |
254 | 264 |
255 #define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx)) | 265 #define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx)) |
256 | 266 |
469 static status check_hosts(mlfiPriv &priv, char *&host, int &ip); | 479 static status check_hosts(mlfiPriv &priv, char *&host, int &ip); |
470 static status check_hosts(mlfiPriv &priv, char *&host, int &ip) { | 480 static status check_hosts(mlfiPriv &priv, char *&host, int &ip) { |
471 CONFIG &dc = *priv.pc; | 481 CONFIG &dc = *priv.pc; |
472 if (!dc.content_suffix) return oksofar; | 482 if (!dc.content_suffix) return oksofar; |
473 int count = 0; | 483 int count = 0; |
474 for (string_set::iterator i=priv.hosts.begin(); i!=priv.hosts.end(); i++) { | 484 for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) { |
475 count++; | 485 count++; |
476 if (count > 20) return oksofar; // silly to check too many hosts | 486 if (count > 20) return oksofar; // silly to check too many hosts |
477 host = *i; | 487 host = *i; |
478 if (debug_syslog) { | 488 if (debug_syslog) { |
479 char buf[200]; | 489 char buf[200]; |
492 } | 502 } |
493 status st = check_single(ip, dc.content_suffix); | 503 status st = check_single(ip, dc.content_suffix); |
494 if (st == reject) return st; | 504 if (st == reject) return st; |
495 } | 505 } |
496 } | 506 } |
507 host = NULL; | |
508 int bad = priv.memory->bad_html_tags; | |
509 int lim = priv.pc->bad_tag_limit; | |
510 if ((bad > lim) && (lim > 0)) return reject; | |
497 return oksofar; | 511 return oksofar; |
498 } | 512 } |
499 | 513 |
500 | 514 |
501 //////////////////////////////////////////////// | 515 //////////////////////////////////////////////// |
607 priv.only_whites || | 621 priv.only_whites || |
608 (check_hosts(priv, host, ip) == oksofar)) rc = SMFIS_CONTINUE; | 622 (check_hosts(priv, host, ip) == oksofar)) rc = SMFIS_CONTINUE; |
609 else { | 623 else { |
610 if (!priv.have_whites) { | 624 if (!priv.have_whites) { |
611 // can reject the entire message | 625 // can reject the entire message |
612 char adr[sizeof "255.255.255.255"]; | |
613 adr[0] = '\0'; | |
614 inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); | |
615 char buf[2000]; | 626 char buf[2000]; |
616 snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr); | 627 if (!host) { |
628 // must be rejected due to excessive bad html tags | |
629 snprintf(buf, sizeof(buf), priv.pc->limit_message); | |
630 } | |
631 else { | |
632 char adr[sizeof "255.255.255.255"]; | |
633 adr[0] = '\0'; | |
634 inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); | |
635 snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr); | |
636 } | |
617 smfi_setreply(ctx, "550", "5.7.1", buf); | 637 smfi_setreply(ctx, "550", "5.7.1", buf); |
618 rc = SMFIS_REJECT; | 638 rc = SMFIS_REJECT; |
619 } | 639 } |
620 else { | 640 else { |
621 // need to accept it but remove the recipients that don't want it | 641 // need to accept it but remove the recipients that don't want it |
705 } | 725 } |
706 fprintf(stdout, "\n"); | 726 fprintf(stdout, "\n"); |
707 } | 727 } |
708 if (dc.content_suffix) { | 728 if (dc.content_suffix) { |
709 fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message); | 729 fprintf(stdout, "\ncontent filtering enabled with %s %s\n", dc.content_suffix, dc.content_message); |
730 } | |
731 if (dc.bad_tag_limit) { | |
732 fprintf(stdout, "\ncontent filtering for excessive html tags enabled with limit %d %s\n", dc.bad_tag_limit, dc.limit_message); | |
710 } | 733 } |
711 fprintf(stdout, "\nfiles\n"); | 734 fprintf(stdout, "\nfiles\n"); |
712 for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { | 735 for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { |
713 char *f = *i; | 736 char *f = *i; |
714 fprintf(stdout, "config includes %s\n", f); | 737 fprintf(stdout, "config includes %s\n", f); |
803 | 826 |
804 static void load_conf(CONFIG &dc, char *fn); | 827 static void load_conf(CONFIG &dc, char *fn); |
805 static void load_conf(CONFIG &dc, char *fn) { | 828 static void load_conf(CONFIG &dc, char *fn) { |
806 dc.config_files.push_back(fn); | 829 dc.config_files.push_back(fn); |
807 map<char*, int, ltstr> commands; | 830 map<char*, int, ltstr> commands; |
808 enum {dummy, content, dnsbl, dnsbll, envfrom, envto, include, includedcc}; | 831 enum {dummy, content, htmllimit, htmltag, dnsbl, dnsbll, envfrom, envto, include, includedcc}; |
809 commands["content" ] = content; | 832 commands["content" ] = content; |
833 commands["html_limit" ] = htmllimit; | |
834 commands["html_tag" ] = htmltag; | |
810 commands["dnsbl" ] = dnsbl; | 835 commands["dnsbl" ] = dnsbl; |
811 commands["dnsbl_list" ] = dnsbll; | 836 commands["dnsbl_list" ] = dnsbll; |
812 commands["env_from" ] = envfrom; | 837 commands["env_from" ] = envfrom; |
813 commands["env_to" ] = envto; | 838 commands["env_to" ] = envto; |
814 commands["include" ] = include; | 839 commands["include" ] = include; |
831 // have a decent command | 856 // have a decent command |
832 bool processed = false; | 857 bool processed = false; |
833 switch (commands[cmd]) { | 858 switch (commands[cmd]) { |
834 case content: { | 859 case content: { |
835 char *suff = strtok(NULL, delim); | 860 char *suff = strtok(NULL, delim); |
836 if (!suff) break; // no dns suffic | 861 if (!suff) break; // no dns suffix |
837 char *msg = suff + strlen(suff); | 862 char *msg = suff + strlen(suff); |
838 if ((msg - line) >= strlen(orig)) break; // line ended with the dns suffix | 863 if ((msg - line) >= strlen(orig)) break; // line ended with the dns suffix |
839 msg = strchr(msg+1, '\''); | 864 msg = strchr(msg+1, '\''); |
840 if (!msg) break; // no reply message template | 865 if (!msg) break; // no reply message template |
841 msg++; // move over the leading ' | 866 msg++; // move over the leading ' |
843 char *last = strchr(msg, '\''); | 868 char *last = strchr(msg, '\''); |
844 if (!last) break; // no trailing quote | 869 if (!last) break; // no trailing quote |
845 *last = '\0'; // make it a null terminator | 870 *last = '\0'; // make it a null terminator |
846 dc.content_suffix = register_string(suff); | 871 dc.content_suffix = register_string(suff); |
847 dc.content_message = register_string(msg); | 872 dc.content_message = register_string(msg); |
873 processed = true; | |
874 } break; | |
875 | |
876 case htmllimit: { | |
877 char *limit = strtok(NULL, delim); | |
878 if (!limit) break; // no integer limit | |
879 char *msg = limit + strlen(limit); | |
880 if ((msg - line) >= strlen(orig)) break; // line ended with the limit | |
881 msg = strchr(msg+1, '\''); | |
882 if (!msg) break; // no reply message template | |
883 msg++; // move over the leading ' | |
884 if ((msg - line) >= strlen(orig)) break; // line ended with the leading quote | |
885 char *last = strchr(msg, '\''); | |
886 if (!last) break; // no trailing quote | |
887 *last = '\0'; // make it a null terminator | |
888 dc.bad_tag_limit = atoi(limit); | |
889 dc.limit_message = register_string(msg); | |
890 processed = true; | |
891 } break; | |
892 | |
893 case htmltag: { | |
894 char *tag = next_token(delim); | |
895 if (!tag) break; // no html tag value | |
896 dc.html_tags.insert(tag); | |
848 processed = true; | 897 processed = true; |
849 } break; | 898 } break; |
850 | 899 |
851 case dnsbl: { | 900 case dnsbl: { |
852 // have a new dnsbl to use | 901 // have a new dnsbl to use |