# HG changeset patch # User carl # Date 1121030340 25200 # Node ID b7449114ebb092e8be8536a4a05d35d66f3c4289 # Parent 2b369f7db7bf638d62dfb910590f1b5dfc514aca start coding on new config syntax diff -r 2b369f7db7bf -r b7449114ebb0 new.bash --- a/new.bash Sun Jul 10 13:28:33 2005 -0700 +++ b/new.bash Sun Jul 10 14:19:00 2005 -0700 @@ -4,12 +4,12 @@ ## compile and run the new parser program ## rm -f dnsbl.o scanner.o context.o tokenizer.o -g++ -c dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp +g++ -c -pthread dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp if [ $? -ne 0 ]; then echo "compiler errors" exit fi -g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o -pthread +g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o /usr/lib/libresolv.a -lmilter -pthread if [ $? -ne 0 ]; then echo "linker errors" exit diff -r 2b369f7db7bf -r b7449114ebb0 src/context.cpp --- a/src/context.cpp Sun Jul 10 13:28:33 2005 -0700 +++ b/src/context.cpp Sun Jul 10 14:19:00 2005 -0700 @@ -324,7 +324,7 @@ //////////////////////////////////////////////// // helper to discard the strings held by a string_set // -static void discard(string_set &s) { +void discard(string_set &s) { for (string_set::iterator i=s.begin(); i!=s.end(); i++) { free(*i); } diff -r 2b369f7db7bf -r b7449114ebb0 src/context.h --- a/src/context.h Sun Jul 10 13:28:33 2005 -0700 +++ b/src/context.h Sun Jul 10 14:19:00 2005 -0700 @@ -100,7 +100,7 @@ dnsblp_list& get_dnsbl_list() {return dnsbl_list;}; bool get_content_filtering() {return content_filtering;}; - bool acceptable_content(recorder &memory); + bool acceptable_content(recorder &memory, char *&msg); bool ignore_host(char *host); void dump(int level = 0); @@ -166,7 +166,7 @@ extern string_set all_strings; // owns all the strings, only modified by the config loader thread -static void discard(string_set &s); +void discard(string_set &s); char* register_string(string_set &s, char *name); char* register_string(char *name); CONFIG *parse_config(char *fn); diff -r 2b369f7db7bf -r b7449114ebb0 src/dnsbl.cpp --- a/src/dnsbl.cpp Sun Jul 10 13:28:33 2005 -0700 +++ b/src/dnsbl.cpp Sun Jul 10 14:19:00 2005 -0700 @@ -119,7 +119,7 @@ }; -void ns_map::~ns_map() { +ns_map::~ns_map() { for (string_map::iterator i=ns_host.begin(); i!=ns_host.end(); i++) { char *x = (*i).first; char *y = (*i).second; @@ -635,10 +635,9 @@ // check the hosts from the body against the content dnsbl // bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int ip); -bool check_hosts(mlfiPriv &priv) { - static buf[2000]; +bool check_hosts(mlfiPriv &priv, bool random, int limit, char *&host, int ip) { CONFIG &dc = *priv.pc; - string_set &hosts = priv.memory->hosts; + string_set &hosts = priv.memory->get_hosts(); string_set &ignore = dc.get_content_host_ignore(); int count = 0; @@ -681,7 +680,7 @@ int_set::iterator i = ips.find(ip); if (i == ips.end()) { ips.insert(ip); - if (check_single(priv, ip, dc.content_suffix)) { + if (check_single(priv, ip, dc.get_content_suffix())) { return true; } } @@ -714,7 +713,7 @@ int_set::iterator i = ips.find(ip); if (i == ips.end()) { ips.insert(ip); - if (check_single(priv, ip, dc.content_suffix)) { + if (check_single(priv, ip, dc.get_content_suffix())) { string_map::iterator j = nameservers.ns_host.find(host); if (j != nameservers.ns_host.end()) { char *refer = (*j).second; @@ -778,7 +777,7 @@ } else { // check the dns based lists - st = check_dnsbl(priv, con.get_dnsbl_list(), rejectlist); + st = (check_dnsbl(priv, con.get_dnsbl_list(), rejectlist)) ? black : oksofar; } if (st == reject) { // reject the recipient based on some dnsbl @@ -833,11 +832,11 @@ char *msg = NULL; string_set alive; bool random = false; - bool limit = 0; - for (context_map::iterator i=env_to.begin(); i!=env_to.end(); i++) { + int limit = 0; + for (context_map::iterator i=priv.env_to.begin(); i!=priv.env_to.end(); i++) { char *rcpt = (*i).first; CONTEXT &con = *((*i).second); - if (!con.acceptable_content(priv.memory, msg)) { + if (!con.acceptable_content(*priv.memory, msg)) { // bad html tags or excessive hosts smfi_delrcpt(ctx, rcpt); } @@ -927,7 +926,7 @@ char buf[200]; snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation); my_syslog(buf); - if (load_conf(*newc, "dnsbl.conf") { + if (load_conf(*newc, "dnsbl.conf")) { newc->load_time = time(NULL); return newc; } @@ -952,7 +951,7 @@ time_t then = dc.load_time; struct stat st; bool reload = false; - for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { + for (string_set::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { char *fn = *i; if (stat(fn, &st)) reload = true; // file disappeared else if (st.st_mtime > then) reload = true; // file modified diff -r 2b369f7db7bf -r b7449114ebb0 src/dnsbl.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/dnsbl.h Sun Jul 10 14:19:00 2005 -0700 @@ -0,0 +1,45 @@ +#ifndef dnsbl_include +#define dnsbl_include + +#include "context.h" + +extern bool debug_syslog; + +class recorder; +class url_scanner; + +//////////////////////////////////////////////// +// mail filter private data, held for us by sendmail +// +struct mlfiPriv +{ + // connection specific data + CONFIG *pc; // global filtering configuration + int fd; // to talk to dns resolvers process + bool err; // did we get any errors on the resolver socket? + int ip; // ip4 address of the smtp client + map checked; // map of dnsblp to result of (ip listed on that dnsbl) + // message specific data + char *mailaddr; // envelope from value + char *queueid; // sendmail queue id + bool authenticated; // client authenticated? if so, suppress all dnsbl checks + bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails + bool only_whites; // every recipient is whitelisted? + context_map env_to; // map each non-whitelisted recipient to their filtering context + recorder *memory; // memory for the content scanner + url_scanner *scanner; // object to handle body scanning + + mlfiPriv(); + ~mlfiPriv(); + void reset(bool final = false); // for a new message + void get_fd(); + void return_fd(); + int my_read(char *buf, int len); + int my_write(char *buf, int len); + void need_content_filter(char *rcpt, CONTEXT &con); +}; + +void my_syslog(mlfiPriv *priv, char *text); +void my_syslog(char *text); + +#endif diff -r 2b369f7db7bf -r b7449114ebb0 src/includes.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/includes.h Sun Jul 10 14:19:00 2005 -0700 @@ -0,0 +1,4 @@ +#include "tokenizer.h" +#include "context.h" +#include "dnsbl.h" +#include "scanner.h" diff -r 2b369f7db7bf -r b7449114ebb0 src/scanner.cpp --- a/src/scanner.cpp Sun Jul 10 13:28:33 2005 -0700 +++ b/src/scanner.cpp Sun Jul 10 14:19:00 2005 -0700 @@ -1105,7 +1105,7 @@ void fsa::error(char *err) { count = 0; st = init; - if (err) my_syslog(memory->priv, err); + if (err) my_syslog(memory->get_priv(), err); } void fsa::pusher() { @@ -1136,8 +1136,8 @@ // have two periods, so at least three components, and no empty components for (int i=0; itlds->find(p2+1); - if (i != memory->tlds->end()) memory->new_url((char*)pending); + string_set::iterator i = memory->get_tlds()->find(p2+1); + if (i != memory->get_tlds()->end()) memory->new_url((char*)pending); } } st = h_init; diff -r 2b369f7db7bf -r b7449114ebb0 src/scanner.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/scanner.h Sun Jul 10 14:19:00 2005 -0700 @@ -0,0 +1,138 @@ +#ifndef scanner_include +#define scanner_include + +#include "dnsbl.h" + +//////////////////////////////////////////////// +// memory for the content scanner +// +class recorder +{ + mlfiPriv *priv; // needed for syslog + string_set *html_tags; // valid tags + string_set *tlds; // valid tlds + string_set hosts; + int bad_html_tags; + int binary_tags; + +public: + recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_); + ~recorder(); + void empty(); + void new_url(char *host); + void new_tag(char *tag); + void binary(); + mlfiPriv *get_priv() {return priv; }; + string_set *get_tlds() {return tlds; }; + string_set &get_hosts() {return hosts; }; + bool excessive_bad_tags(int limit) {return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); }; + bool excessive_hosts(int limit) {return (limit > 0) && (hosts.size() > limit); }; +}; + + +//////////////////////////////////////////////// +// finite state machine +// +enum state {// host name recognizer states + h_init, + h_host, + + // html tag discarder states + t_init, + t_tag1, // seen opening < + t_tag2, // not comment + t_com1, // seen ! + t_com2, // seen first - + t_com3, // seen second -, looking for --> + t_com4, // seen first - + t_com5, // seen second - + t_disc, // looking for closing > + + // url recognizer states + u_init, + u_http, + u_sla, + u_url, + + // url decoder states %xx + d_init, + d_pcnt, + d_1, + + // html entity decoder states &#nnn; + e_init, + e_amp, + e_num, + + // mime decoder states =xx + m_init, + m_eq, + m_1, + + // base64 decoder states + b_init, + b_lf, + b_lf2, + b_64, + + // uuencoding decoder states + uu_init, + uu_lf, + uu_lf2, + uu_64, + + // counter for number of columns in the table + end_state, + + // temporary states + h_end, + t_bin, + t_end, + u_reco, + d_2, + e_semi, + m_2, + m_cr, + m_nl, + b_cr, + uu_cr + }; + +#define PENDING_LIMIT 100 +class fsa { + u_char pending[PENDING_LIMIT]; + int count; + state st; + state init; + fsa *next1; + fsa *next2; + recorder *memory; + +public: + fsa(state init, fsa *next1_, fsa *next2_, recorder *memory_); + void push(u_char *buf, int len); + void pusher(); + void error(char *err); +}; + + +//////////////////////////////////////////////// +// the content scanner +// +class url_scanner { + fsa *host_parser; + fsa *tags_parser; + fsa *urls_parser; + fsa *urld_parser; + fsa *html_parser; + fsa *mime_parser; + fsa *b64_parser; + fsa *uu_parser; + +public: + url_scanner(recorder *memory); + ~url_scanner(); + void scan(u_char *buffer, size_t length); +}; + +#endif diff -r 2b369f7db7bf -r b7449114ebb0 src/tokenizer.cpp --- a/src/tokenizer.cpp Sun Jul 10 13:28:33 2005 -0700 +++ b/src/tokenizer.cpp Sun Jul 10 14:19:00 2005 -0700 @@ -6,7 +6,7 @@ */ -#include "context.h" +#include "dnsbl.h" static char* tokenizer_version="$Id$"; diff -r 2b369f7db7bf -r b7449114ebb0 test.bash --- a/test.bash Sun Jul 10 13:28:33 2005 -0700 +++ b/test.bash Sun Jul 10 14:19:00 2005 -0700 @@ -18,12 +18,13 @@ ########################### # compile the milter # -g++ -c -pthread dnsbl.cpp +rm -f dnsbl.o scanner.o context.o tokenizer.o +g++ -c -pthread dnsbl.cpp scanner.cpp context.cpp tokenizer.cpp if [ $? -ne 0 ]; then echo "compiler errors" exit fi -g++ -o dnsbl dnsbl.o /usr/lib/libresolv.a -lmilter -pthread +g++ -o dnsbl dnsbl.o scanner.o context.o tokenizer.o /usr/lib/libresolv.a -lmilter -pthread if [ $? -ne 0 ]; then echo "linker errors" exit