Mercurial > sm-archive
changeset 0:616666e2f34c
initial version
author | carl |
---|---|
date | Fri, 10 Mar 2006 10:30:08 -0800 |
parents | |
children | 45c8592d5d13 |
files | html/Makefile.am info/Makefile.am man/Makefile.am src/Makefile.am src/context.cpp src/context.h src/includes.h src/sm-archive.cpp src/sm-archive.h src/test.cpp src/tokenizer.cpp src/tokenizer.h xml/Makefile.am xml/header.sgml xml/header.xml xml/sm-archive.in |
diffstat | 16 files changed, 1769 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/html/Makefile.am Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,3 @@ +htmldir = ${datadir}/doc/@PACKAGE@-@VERSION@ +html_DATA = $(wildcard *.html) $(wildcard *.pdf) +EXTRA_DIST = $(html_DATA)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/info/Makefile.am Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,2 @@ +info_INFOS = dnsbl.texi +EXTRA_DIST = $(info_INFOS)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/man/Makefile.am Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,2 @@ +man_MANS = dnsbl.1 dnsbl.conf.5 +EXTRA_DIST = $(man_MANS)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Makefile.am Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,13 @@ +sbin_PROGRAMS = sm-archive +sm-archive_SOURCES = sm-archive.cpp sm-archive.h context.cpp context.h tokenizer.cpp tokenizer.h includes.h +EXTRA_DIST = test.cpp + +# set the include path found by configure +INCLUDES= $(all_includes) + +# the library search path. +sm-archive_LDFLAGS = $(all_libraries) /usr/lib/libresolv.a -lmilter -pthread + +# default compile flags +sm-archive_CXXFLAGS = -pthread +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/context.cpp Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,202 @@ +/* + +Copyright (c) 2004 Carl Byington - 510 Software Group, released under +the GPL version 2 or any later version at your choice available at +http://www.fsf.org/licenses/gpl.txt + +*/ + +#include "includes.h" + +// needed for socket io +#include <unistd.h> +#include <sys/ioctl.h> +#include <net/if.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <netdb.h> +#include <sys/socket.h> +#include <sys/un.h> + +static char* context_version="$Id$"; + +char *token_envfrom; +char *token_lbrace; +char *token_rbrace; +char *token_rcptto; +char *token_semi; + +string_set all_strings; // owns all the strings, only modified by the config loader thread +const int maxlen = 1000; // used for snprintf buffers + +CONFIG::CONFIG() { + reference_count = 0; + generation = 0; + load_time = 0; +} + + +CONFIG::~CONFIG() { +} + + +char *CONFIG::find(char *needle, &string_map haystack) { + string_map::iterator i = haystack.find(needle); + if (i != haystack.end()) return (*i).second; // found user@domain.tld key + char *x = strchr(needle, '@'); + if (x) { + x++; + i = haystack.find(x); + if (i != haystack.end()) return (*i).second; // found domain.tld key + char y = *x; + *x = '\0'; + i = haystack.find(needle); + *x = y; + if (i != haystack.end()) return (*i).second; // found user@ key + } + return NULL; +} + + +void CONFIG::dump() { + printf("rcpt_to {\n"); + for (string_map::iterator i=rcpt_to.begin(); i!=rcpt_to.end(); i++) { + char *to = (*i).first; + char *target = (*i).second; + printf(" %s \t %s\n", to, target); + } + printf("};\n"); + printf("env_from {\n"); + for (string_map::iterator i=env_from.begin(); i!=env_from.end(); i++) { + char *from = (*i).first; + char *target = (*i).second; + printf(" %s \t %s\n", from, target); + } + printf("};\n"); +} + + +//////////////////////////////////////////////// +// helper to discard the strings held by a string_set +// +void discard(string_set &s) { + for (string_set::iterator i=s.begin(); i!=s.end(); i++) { + free(*i); + } + s.clear(); +} + + +//////////////////////////////////////////////// +// helper to register a string in a string set +// +char* register_string(string_set &s, char *name) { + string_set::iterator i = s.find(name); + if (i != s.end()) return *i; + char *x = strdup(name); + s.insert(x); + return x; +} + + +//////////////////////////////////////////////// +// register a global string +// +char* register_string(char *name) { + return register_string(all_strings, name); +} + + +//////////////////////////////////////////////// +// +bool tsa(TOKEN &tok, char *token); +bool tsa(TOKEN &tok, char *token) { + char *have = tok.next(); + if (have == token) return true; + tok.token_error(token, have); + return false; +} + + +//////////////////////////////////////////////// +// +bool parse_rcpt_to(TOKEN &tok, CONFIG &dc); +bool parse_rcpt_to(TOKEN &tok, CONFIG &dc) { + if (!tsa(tok, token_lbrace)) return false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) break; + if (have == token_semi) { + // optional separators + } + else { + char *target = tok.next(); + dc.add_to(have, target); + } + } + return tsa(tok, token_semi); +} + + +//////////////////////////////////////////////// +// +bool parse_env_from(TOKEN &tok, CONFIG &dc); +bool parse_env_from(TOKEN &tok, CONFIG &dc) { + if (!tsa(tok, token_lbrace)) return false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) break; + if (have == token_semi) { + // optional separators + } + else { + char *target = tok.next(); + dc.add_from(have, target); + } + } + return tsa(tok, token_semi); +} + + +//////////////////////////////////////////////// +// parse a config file +// +bool load_conf(CONFIG &dc, char *fn) { + TOKEN tok(fn, &dc.config_files); + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_envfrom) { + if (!parse_env_from(tok, dc)) { + tok.token_error("load_conf() failed to parse env_from"); + return false; + } + } + else if (have == token_rcptto) { + if (!parse_rcpt_to(tok, dc)) { + tok.token_error("load_conf() failed to parse rcpt_to"); + return false; + } + } + else { + tok.token_error(token_context, have); + return false; + } + } + return true; +} + + +//////////////////////////////////////////////// +// init the tokens +// +void token_init() { + token_envfrom = register_string("env_from"); + token_lbrace = register_string("{"); + token_rbrace = register_string("}"); + token_rcptto = register_string("rcpt_to"); + token_semi = register_string(";"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/context.h Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,44 @@ +#ifndef context_include +#define context_include + +#include "tokenizer.h" +#include <map> + + +typedef map<char *, char *, ltstr> string_map; + +struct CONFIG { + // the only mutable stuff once it has been loaded from the config file + int reference_count; // protected by the global config_mutex + // all the rest is constant after loading from the config file + int generation; + time_t load_time; + string_map env_from; // map senders to archive mailboxes + string_map rcpt_to; // map recipients to archive mailboxes + + CONFIG(); + ~CONFIG(); + void add_from(char *from, char *target) {env_from[from] = target; }; + void add_to(char *to, char *target) {rcpt_to[to] = target; }; + char * find_from(char *from) {return find(from, env_from);}; + char * find_to(char *to) {return find(to, env_to); }; + char * find(char *needle, &string_map haystack); + void dump(); +}; + +extern char *token_envfrom; +extern char *token_lbrace; +extern char *token_rbrace; +extern char *token_rcptto; +extern char *token_semi; + +extern string_set all_strings; // owns all the strings, only modified by the config loader thread + +void discard(string_set &s); +char* register_string(string_set &s, char *name); +char* register_string(char *name); +CONFIG *parse_config(char *fn); +bool load_conf(CONFIG &dc, char *fn); +void token_init(); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/includes.h Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,8 @@ +#define VERIFY_DEBUG 1 +#define RESOLVER_DEBUG 1 +#undef VERIFY_DEBUG +#undef RESOLVER_DEBUG + +#include "tokenizer.h" +#include "context.h" +#include "dnsbl.h"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/sm-archive.cpp Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,509 @@ +/* + +Copyright (c) 2004, 2005 Carl Byington - 510 Software Group, released +under the GPL version 2 or any later version at your choice available at +http://www.fsf.org/licenses/gpl.txt + +Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its +suppliers. Inspired by the DCC by Rhyolite Software + +-p port The port through which the MTA will connect to this milter. +-t sec The timeout value. +-c Check the config, and print a copy to stdout. Don't start the + milter or do anything with the socket. +-d increase debug level + +*/ + + +// from sendmail sample +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <sysexits.h> +#include <unistd.h> + +// needed for socket io +#include <sys/ioctl.h> +#include <net/if.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <netdb.h> +#include <sys/socket.h> +#include <sys/un.h> + +// needed for thread +#include <pthread.h> + +// needed for std c++ collections +#include <set> +#include <map> +#include <list> + +// for the dns resolver +#include <netinet/in.h> +#include <arpa/nameser.h> +#include <resolv.h> + +// misc stuff needed here +#include <ctype.h> +#include <syslog.h> +#include <pwd.h> +#include <sys/wait.h> /* header for waitpid() and various macros */ +#include <signal.h> /* header for signal functions */ + +#include "includes.h" + +static char* dnsbl_version="$Id$"; + + +extern "C" { + #include "libmilter/mfapi.h" + sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr); + sfsistat mlfi_envfrom(SMFICTX *ctx, char **argv); + sfsistat mlfi_envrcpt(SMFICTX *ctx, char **argv); + sfsistat mlfi_eom(SMFICTX *ctx); + sfsistat mlfi_abort(SMFICTX *ctx); + sfsistat mlfi_close(SMFICTX *ctx); + void sig_chld(int signo); +} + +int debug_syslog = 0; +bool syslog_opened = false; +bool use_syslog = true; // false to printf +bool loader_run = true; // used to stop the config loader thread +CONFIG *config = NULL; // protected by the config_mutex +int generation = 0; // protected by the config_mutex +const int maxlen = 1000; // used for snprintf buffers + +pthread_mutex_t config_mutex; +pthread_mutex_t syslog_mutex; + + +mlfiPriv::mlfiPriv() { + pthread_mutex_lock(&config_mutex); + pc = config; + pc->reference_count++; + pthread_mutex_unlock(&config_mutex); + mailaddr = NULL; + queueid = NULL; + processed_from = false; +} + +mlfiPriv::~mlfiPriv() { + pthread_mutex_lock(&config_mutex); + pc->reference_count--; + pthread_mutex_unlock(&config_mutex); + reset(true); +} + +void mlfiPriv::reset(bool final) { + processed_from = false; + if (mailaddr) free(mailaddr); + if (queueid) free(queueid); + if (!final) { + mailaddr = NULL; + queueid = NULL; + } +} + +#define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx)) + + +//////////////////////////////////////////////// +// syslog a message +// +void my_syslog(mlfiPriv *priv, char *text) { + char buf[maxlen]; + if (priv) { + snprintf(buf, sizeof(buf), "%s: %s", priv->queueid, text); + text = buf; + } + if (use_syslog) { + pthread_mutex_lock(&syslog_mutex); + if (!syslog_opened) { + openlog("dnsbl", LOG_PID, LOG_MAIL); + syslog_opened = true; + } + syslog(LOG_NOTICE, "%s", text); + pthread_mutex_unlock(&syslog_mutex); + } + else { + printf("%s \n", text); + } +} + +void my_syslog(char *text) { + my_syslog(NULL, text); +} + + +//////////////////////////////////////////////// +// this email address is passed in from sendmail, and will +// always be enclosed in <>. It may have mixed case, just +// as the mail client sent it. We dup the string and convert +// the duplicate to lower case. +// +char *to_lower_string(char *email); +char *to_lower_string(char *email) { + int n = strlen(email)-2; + if (n < 1) return strdup(email); + char *key = strdup(email+1); + key[n] = '\0'; + for (int i=0; i<n; i++) key[i] = tolower(key[i]); + return key; +} + + +//////////////////////////////////////////////// +// start of sendmail milter interfaces +// +sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr) +{ + // allocate some private memory + mlfiPriv *priv = new mlfiPriv; + // save the private data + smfi_setpriv(ctx, (void*)priv); + // continue processing + return SMFIS_CONTINUE; +} + +sfsistat mlfi_envfrom(SMFICTX *ctx, char **from) +{ + mlfiPriv &priv = *MLFIPRIV; + priv.mailaddr = to_lower_string(from[0]); + return SMFIS_CONTINUE; +} + +sfsistat mlfi_envrcpt(SMFICTX *ctx, char **rcpt) +{ + mlfiPriv &priv = *MLFIPRIV; + CONFIG &dc = *priv.pc; + if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i")); + char *rcptaddr = to_lower_string(rcpt[0]); + if (debug_syslog > 1) { + char msg[maxlen]; + snprintf(msg, sizeof(msg), "from <%s> to <%s>", priv.mailaddr, rcptaddr); + my_syslog(&priv, msg); + } + char *target = dc.find_to(rcptaddr); + if (target) smfi_addrcpt(ctx, target); + free(rcptaddr); + if (!processed_from) { + target = dc.find_from(priv.mailaddr); + if (target) smfi_addrcpt(ctx, target); + processed_from = true; + } + return SMFIS_CONTINUE; +} + +sfsistat mlfi_eom(SMFICTX *ctx) +{ + // reset for a new message on the same connection + mlfi_abort(ctx); + return SMFIS_CONTINUE; +} + +sfsistat mlfi_abort(SMFICTX *ctx) +{ + mlfiPriv &priv = *MLFIPRIV; + if (!priv) return SMFIS_CONTINUE; + priv.reset(); + return SMFIS_CONTINUE; +} + +sfsistat mlfi_close(SMFICTX *ctx) +{ + mlfiPriv *priv = MLFIPRIV; + if (!priv) return SMFIS_CONTINUE; + delete priv; + smfi_setpriv(ctx, NULL); + return SMFIS_CONTINUE; +} + +struct smfiDesc smfilter = +{ + "SM-ARCHIVE", // filter name + SMFI_VERSION, // version code -- do not change + SMFIF_ADDRCPT, // flags + mlfi_connect, // connection info filter + NULL, // SMTP HELO command filter + mlfi_envfrom, // envelope sender filter + mlfi_envrcpt, // envelope recipient filter + NULL, // header filter + NULL, // end of header + NULL, // body block filter + mlfi_eom, // end of message + mlfi_abort, // message aborted + mlfi_close, // connection cleanup +}; + + +//////////////////////////////////////////////// +// reload the config +// +CONFIG* new_conf(); +CONFIG* new_conf() { + CONFIG *newc = new CONFIG; + pthread_mutex_lock(&config_mutex); + newc->generation = generation++; + pthread_mutex_unlock(&config_mutex); + if (debug_syslog) { + char buf[maxlen]; + snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation); + my_syslog(buf); + } + if (load_conf(*newc, "dnsbl.conf")) { + newc->load_time = time(NULL); + return newc; + } + delete newc; + return NULL; +} + + +//////////////////////////////////////////////// +// thread to watch the old config files for changes +// and reload when needed. we also cleanup old +// configs whose reference count has gone to zero. +// +void* config_loader(void *arg); +void* config_loader(void *arg) { + typedef set<CONFIG *> configp_set; + configp_set old_configs; + while (loader_run) { + sleep(180); // look for modifications every 3 minutes + if (!loader_run) break; + CONFIG &dc = *config; + time_t then = dc.load_time; + struct stat st; + bool reload = false; + for (string_set::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { + char *fn = *i; + if (stat(fn, &st)) reload = true; // file disappeared + else if (st.st_mtime > then) reload = true; // file modified + if (reload) break; + } + if (reload) { + CONFIG *newc = new_conf(); + if (newc) { + // replace the global config pointer + pthread_mutex_lock(&config_mutex); + CONFIG *old = config; + config = newc; + pthread_mutex_unlock(&config_mutex); + if (old) old_configs.insert(old); + } + else { + // failed to load new config + my_syslog("failed to load new configuration"); + system("echo 'failed to load new dnsbl configuration from /etc/dnsbl' | mail -s 'error in /etc/dnsbl configuration' root"); + // update the load time on the current config to prevent complaining every 3 minutes + dc.load_time = time(NULL); + } + } + // now look for old configs with zero ref counts + for (configp_set::iterator i=old_configs.begin(); i!=old_configs.end(); ) { + CONFIG *old = *i; + if (!old->reference_count) { + if (debug_syslog) { + char buf[maxlen]; + snprintf(buf, sizeof(buf), "freeing memory for old configuration generation %d", old->generation); + my_syslog(buf); + } + delete old; // destructor does all the work + old_configs.erase(i++); + } + else i++; + } + } + return NULL; +} + + +void usage(char *prog); +void usage(char *prog) +{ + fprintf(stderr, "Usage: %s [-d [level]] [-c] -p sm-sock-addr [-t timeout]\n", prog); + fprintf(stderr, "where sm-sock-addr is for the connection to sendmail\n"); + fprintf(stderr, " and should be one of\n"); + fprintf(stderr, " inet:port@ip-address\n"); + fprintf(stderr, " local:local-domain-socket-file-name\n"); + fprintf(stderr, "-c will load and dump the config to stdout\n"); + fprintf(stderr, "-d will set the syslog message level, currently 0 to 3\n"); +} + + + +void setup_socket(char *sock); +void setup_socket(char *sock) { + unlink(sock); + // sockaddr_un addr; + // memset(&addr, '\0', sizeof addr); + // addr.sun_family = AF_UNIX; + // strncpy(addr.sun_path, sock, sizeof(addr.sun_path)-1); + // int s = socket(AF_UNIX, SOCK_STREAM, 0); + // bind(s, (sockaddr*)&addr, sizeof(addr)); + // close(s); +} + + +/* + * The signal handler function -- only gets called when a SIGCHLD + * is received, ie when a child terminates + */ +void sig_chld(int signo) +{ + int status; + /* Wait for any child without blocking */ + while (waitpid(-1, &status, WNOHANG) > 0) { + // ignore child exit status, we only do this to cleanup zombies + } +} + + +int main(int argc, char**argv) +{ + token_init(); + bool check = false; + bool setconn = false; + int c; + const char *args = "p:t:d:ch"; + extern char *optarg; + + // Process command line options + while ((c = getopt(argc, argv, args)) != -1) { + switch (c) { + case 'p': + if (optarg == NULL || *optarg == '\0') { + fprintf(stderr, "Illegal sendmail socket: %s\n", optarg); + exit(EX_USAGE); + } + if (smfi_setconn(optarg) == MI_FAILURE) { + fprintf(stderr, "smfi_setconn failed\n"); + exit(EX_SOFTWARE); + } + if (strncasecmp(optarg, "unix:", 5) == 0) setup_socket(optarg + 5); + else if (strncasecmp(optarg, "local:", 6) == 0) setup_socket(optarg + 6); + setconn = true; + break; + + case 't': + if (optarg == NULL || *optarg == '\0') { + fprintf(stderr, "Illegal timeout: %s\n", optarg); + exit(EX_USAGE); + } + if (smfi_settimeout(atoi(optarg)) == MI_FAILURE) { + fprintf(stderr, "smfi_settimeout failed\n"); + exit(EX_SOFTWARE); + } + break; + + case 'c': + check = true; + break; + + case 'd': + if (optarg == NULL || *optarg == '\0') debug_syslog = 1; + else debug_syslog = atoi(optarg); + break; + + case 'h': + default: + usage(argv[0]); + exit(EX_USAGE); + } + } + + if (check) { + use_syslog = false; + debug_syslog = 10; + CONFIG *conf = new_conf(); + if (conf) { + conf->dump(); + delete conf; + return 0; + } + else { + return 1; // config failed to load + } + } + + if (!setconn) { + fprintf(stderr, "%s: Missing required -p argument\n", argv[0]); + usage(argv[0]); + exit(EX_USAGE); + } + + if (smfi_register(smfilter) == MI_FAILURE) { + fprintf(stderr, "smfi_register failed\n"); + exit(EX_UNAVAILABLE); + } + + // switch to background mode + if (daemon(1,0) < 0) { + fprintf(stderr, "daemon() call failed\n"); + exit(EX_UNAVAILABLE); + } + + // write the pid + const char *pidpath = "/var/run/dnsbl.pid"; + unlink(pidpath); + FILE *f = fopen(pidpath, "w"); + if (f) { +#ifdef linux + // from a comment in the DCC source code: + // Linux threads are broken. Signals given the + // original process are delivered to only the + // thread that happens to have that PID. The + // sendmail libmilter thread that needs to hear + // SIGINT and other signals does not, and that breaks + // scripts that need to stop milters. + // However, signaling the process group works. + fprintf(f, "-%d\n", (u_int)getpgrp()); +#else + fprintf(f, "%d\n", (u_int)getpid()); +#endif + fclose(f); + } + + // initialize the thread sync objects + pthread_mutex_init(&config_mutex, 0); + pthread_mutex_init(&syslog_mutex, 0); + + // drop root privs + struct passwd *pw = getpwnam("sm-archive"); + if (pw) { + if (setgid(pw->pw_gid) == -1) { + my_syslog("failed to switch to group dnsbl"); + } + if (setuid(pw->pw_uid) == -1) { + my_syslog("failed to switch to user dnsbl"); + } + } + + // load the initial config + config = new_conf(); + if (!config) { + my_syslog("failed to load initial configuration, quitting"); + exit(1); + } + + // only create threads after the fork() in daemon + pthread_t tid; + if (pthread_create(&tid, 0, config_loader, 0)) + my_syslog("failed to create config loader thread"); + if (pthread_detach(tid)) + my_syslog("failed to detach config loader thread"); + + time_t starting = time(NULL); + int rc = smfi_main(); + if ((rc != MI_SUCCESS) && (time(NULL) > starting+5*60)) { + my_syslog("trying to restart after smfi_main()"); + loader_run = false; // eventually the config loader thread will terminate + execvp(argv[0], argv); + } + exit((rc == MI_SUCCESS) ? 0 : EX_UNAVAILABLE); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/sm-archive.h Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,27 @@ +#ifndef dnsbl_include +#define dnsbl_include + +#include "context.h" + +extern int debug_syslog; + +//////////////////////////////////////////////// +// mail filter private data, held for us by sendmail +// +struct mlfiPriv +{ + // connection specific data + CONFIG *pc; // global filtering configuration + // message specific data + char *mailaddr; // envelope from value + char *queueid; // sendmail queue id + bool processed_from; // looked at env_from address + mlfiPriv(); + ~mlfiPriv(); + void reset(bool final = false); // for a new message +}; + +void my_syslog(mlfiPriv *priv, char *text); +void my_syslog(char *text); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/test.cpp Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,124 @@ +/* + +Copyright (c) 2004 Carl Byington - 510 Software Group, released under +the GPL version 2 or any later version at your choice available at +http://www.fsf.org/licenses/gpl.txt + +*/ + +#include <stdio.h> +#include <unistd.h> +#include <pthread.h> +#include <set> + +static char* test_version="$Id$"; + +using namespace std; + +struct ltstr { + bool operator()(char* s1, char* s2) const { + return strcmp(s1, s2) < 0; + } +}; + +typedef set<char *, ltstr> string_set; + +static string_set all_strings; // owns all the strings, only modified by the config loader thread + +struct stats { + bool stop; + bool running; + int counter; + int errors; + stats(); +}; +stats::stats() { + stop = false; + running = false; + counter = 0; + errors = 0; +} + +//////////////////////////////////////////////// +// helper to discard the strings held by a string_set +// +static void discard(string_set &s); +static void discard(string_set &s) { + for (string_set::iterator i=s.begin(); i!=s.end(); i++) { + free(*i); + } + s.clear(); +} + +//////////////////////////////////////////////// +// helper to register a string in a string set +// +static char* register_string(string_set &s, char *name); +static char* register_string(string_set &s, char *name) { + string_set::iterator i = s.find(name); + if (i != s.end()) return *i; + char *x = strdup(name); + s.insert(x); + return x; +} + + +//////////////////////////////////////////////// +// thread tester +// +static void* tester(void *arg); +static void* tester(void *arg) { + stats &st = *((stats *)arg); + st.running = true; + while (!st.stop) { + const int LIMIT = 1000; + string_set *mine = new string_set; + string_set &me = *mine; + for (int i=0; i<LIMIT; i++) { + char buf[100]; + snprintf(buf, sizeof(buf), "this is string %d", i); + register_string(me, buf); + } + for (int i=0; i<LIMIT; i+=5) { + char buf[100]; + snprintf(buf, sizeof(buf), "this is string %d", i); + string_set::iterator j = me.find(buf); + if (j == me.end()) st.errors++; + } + discard(me); + delete mine; + st.counter++; + } + st.running = false; + return NULL; +} + +int main(int argc, char**argv) +{ + stats st1; + stats st2; + pthread_t tid; + if (pthread_create(&tid, 0, tester, &st1)) + fprintf(stdout, "failed to create test thread"); + if (pthread_detach(tid)) + fprintf(stdout, "failed to detach test thread"); + if (pthread_create(&tid, 0, tester, &st2)) + fprintf(stdout, "failed to create test thread"); + if (pthread_detach(tid)) + fprintf(stdout, "failed to detach test thread"); + + fprintf(stdout, "tests are running\n"); + sleep(60); + st1.stop = true; + st2.stop = true; + while (st1.running || st2.running) { + sleep(1); + } + + fprintf(stdout, "counter 1 = %d\n", st1.counter); + fprintf(stdout, "counter 2 = %d\n", st2.counter); + fprintf(stdout, "errors 1 = %d\n", st1.errors); + fprintf(stdout, "errors 2 = %d\n", st2.errors); + return 0; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/tokenizer.cpp Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,530 @@ +/* + +Copyright (c) 2004 Carl Byington - 510 Software Group, released under +the GPL version 2 or any later version at your choice available at +http://www.fsf.org/licenses/gpl.txt + +*/ + +#include "includes.h" + +static char* tokenizer_version="$Id$"; + +const int maxlen = 1000; // used for snprintf buffers + +enum state {s_init, + s_token, + s_string, + s_ignore, // whitespace + s_eol, // ignore to eol + end_state, + + s_term, // token terminator + s_single, + s_string1, // first " of string + s_string2, // last " of string + s_slash // possible start of ignore to eol + }; + +typedef state PARSE[end_state]; + +static PARSE parse_table[256] = { + // s_init s_token s_string s_ignore s_eol + { s_single, s_term, s_string, s_single, s_eol, }, // 0x00 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x01 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x02 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x03 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x04 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x05 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x06 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x07 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x08 + { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x09 <tab> + { s_ignore, s_term, s_string2, s_ignore, s_ignore, }, // 0x0a <lf> + { s_single, s_term, s_string, s_single, s_eol, }, // 0x0b + { s_single, s_term, s_string, s_single, s_eol, }, // 0x0c + { s_ignore, s_term, s_string2, s_ignore, s_eol, }, // 0x0d <cr> + { s_single, s_term, s_string, s_single, s_eol, }, // 0x0e + { s_single, s_term, s_string, s_single, s_eol, }, // 0x0f + { s_single, s_term, s_string, s_single, s_eol, }, // 0x10 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x11 xon char + { s_single, s_term, s_string, s_single, s_eol, }, // 0x12 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x13 xoff char + { s_single, s_term, s_string, s_single, s_eol, }, // 0x14 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x15 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x16 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x17 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x18 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x19 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1a + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1b + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1c + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1d + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1e + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1f + { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x20 space + { s_single, s_term, s_string, s_single, s_eol, }, // 0x21 ! + { s_string1, s_term, s_string2, s_string1, s_eol, }, // 0x22 " + { s_eol, s_term, s_string, s_eol, s_eol, }, // 0x23 # + { s_single, s_term, s_string, s_single, s_eol, }, // 0x24 $ + { s_single, s_term, s_string, s_single, s_eol, }, // 0x25 % + { s_single, s_term, s_string, s_single, s_eol, }, // 0x26 & + { s_single, s_term, s_string, s_single, s_eol, }, // 0x27 ' + { s_single, s_term, s_string, s_single, s_eol, }, // 0x28 ( + { s_single, s_term, s_string, s_single, s_eol, }, // 0x29 ) + { s_single, s_term, s_string, s_single, s_eol, }, // 0x2A * + { s_single, s_token, s_string, s_single, s_eol, }, // 0x2B + + { s_single, s_term, s_string, s_single, s_eol, }, // 0x2C , + { s_single, s_token, s_string, s_single, s_eol, }, // 0x2D - + { s_single, s_token, s_string, s_single, s_eol, }, // 0x2E . + { s_slash, s_token, s_string, s_slash, s_eol, }, // 0x2F / + { s_token, s_token, s_string, s_token, s_eol, }, // 0x30 0 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x31 1 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x32 2 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x33 3 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x34 4 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x35 5 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x36 6 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x37 7 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x38 8 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x39 9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3A : + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3B ; + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3C < + { s_single, s_token, s_string, s_single, s_eol, }, // 0x3D = + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3E > + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3F ? + { s_single, s_token, s_string, s_single, s_eol, }, // 0x40 @ + { s_token, s_token, s_string, s_token, s_eol, }, // 0x41 A + { s_token, s_token, s_string, s_token, s_eol, }, // 0x42 B + { s_token, s_token, s_string, s_token, s_eol, }, // 0x43 C + { s_token, s_token, s_string, s_token, s_eol, }, // 0x44 D + { s_token, s_token, s_string, s_token, s_eol, }, // 0x45 E + { s_token, s_token, s_string, s_token, s_eol, }, // 0x46 F + { s_token, s_token, s_string, s_token, s_eol, }, // 0x47 G + { s_token, s_token, s_string, s_token, s_eol, }, // 0x48 H + { s_token, s_token, s_string, s_token, s_eol, }, // 0x49 I + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4A J + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4B K + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4C L + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4D M + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4E N + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4F O + { s_token, s_token, s_string, s_token, s_eol, }, // 0x50 P + { s_token, s_token, s_string, s_token, s_eol, }, // 0x51 Q + { s_token, s_token, s_string, s_token, s_eol, }, // 0x52 R + { s_token, s_token, s_string, s_token, s_eol, }, // 0x53 S + { s_token, s_token, s_string, s_token, s_eol, }, // 0x54 T + { s_token, s_token, s_string, s_token, s_eol, }, // 0x55 U + { s_token, s_token, s_string, s_token, s_eol, }, // 0x56 V + { s_token, s_token, s_string, s_token, s_eol, }, // 0x57 W + { s_token, s_token, s_string, s_token, s_eol, }, // 0x58 X + { s_token, s_token, s_string, s_token, s_eol, }, // 0x59 Y + { s_token, s_token, s_string, s_token, s_eol, }, // 0x5A Z + { s_single, s_term, s_string, s_single, s_eol, }, // 0x5B [ + { s_single, s_term, s_string, s_single, s_eol, }, // 0x5C backslash + { s_single, s_term, s_string, s_single, s_eol, }, // 0x5D ] + { s_single, s_term, s_string, s_single, s_eol, }, // 0x5E ^ + { s_single, s_token, s_string, s_single, s_eol, }, // 0x5F _ + { s_single, s_term, s_string, s_single, s_eol, }, // 0x60 ` + { s_token, s_token, s_string, s_token, s_eol, }, // 0x61 a + { s_token, s_token, s_string, s_token, s_eol, }, // 0x62 b + { s_token, s_token, s_string, s_token, s_eol, }, // 0x63 c + { s_token, s_token, s_string, s_token, s_eol, }, // 0x64 d + { s_token, s_token, s_string, s_token, s_eol, }, // 0x65 e + { s_token, s_token, s_string, s_token, s_eol, }, // 0x66 f + { s_token, s_token, s_string, s_token, s_eol, }, // 0x67 g + { s_token, s_token, s_string, s_token, s_eol, }, // 0x68 h + { s_token, s_token, s_string, s_token, s_eol, }, // 0x69 i + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6A j + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6B k + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6C l + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6D m + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6E n + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6F o + { s_token, s_token, s_string, s_token, s_eol, }, // 0x70 p + { s_token, s_token, s_string, s_token, s_eol, }, // 0x71 q + { s_token, s_token, s_string, s_token, s_eol, }, // 0x72 r + { s_token, s_token, s_string, s_token, s_eol, }, // 0x73 s + { s_token, s_token, s_string, s_token, s_eol, }, // 0x74 t + { s_token, s_token, s_string, s_token, s_eol, }, // 0x75 u + { s_token, s_token, s_string, s_token, s_eol, }, // 0x76 v + { s_token, s_token, s_string, s_token, s_eol, }, // 0x77 w + { s_token, s_token, s_string, s_token, s_eol, }, // 0x78 x + { s_token, s_token, s_string, s_token, s_eol, }, // 0x79 y + { s_token, s_token, s_string, s_token, s_eol, }, // 0x7A z + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7B { + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7C | + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7D } + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7E ~ + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7f + { s_single, s_term, s_string, s_single, s_eol, }, // 0x80 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x81 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x82 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x83 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x84 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x85 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x86 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x87 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x88 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x89 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8a + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8b + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8c + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8d + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8e + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8f + { s_single, s_term, s_string, s_single, s_eol, }, // 0x90 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x91 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x92 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x93 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x94 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x95 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x96 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x97 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x98 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x99 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9a + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9b + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9c + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9d + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9e + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9f + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xaa + { s_single, s_term, s_string, s_single, s_eol, }, // 0xab + { s_single, s_term, s_string, s_single, s_eol, }, // 0xac + { s_single, s_term, s_string, s_single, s_eol, }, // 0xad + { s_single, s_term, s_string, s_single, s_eol, }, // 0xae + { s_single, s_term, s_string, s_single, s_eol, }, // 0xaf + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xba + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbc + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbd + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbe + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbf + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xca + { s_single, s_term, s_string, s_single, s_eol, }, // 0xcb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xcc + { s_single, s_term, s_string, s_single, s_eol, }, // 0xcd + { s_single, s_term, s_string, s_single, s_eol, }, // 0xce + { s_single, s_term, s_string, s_single, s_eol, }, // 0xcf + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xda + { s_single, s_term, s_string, s_single, s_eol, }, // 0xdb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xdc + { s_single, s_term, s_string, s_single, s_eol, }, // 0xdd + { s_single, s_term, s_string, s_single, s_eol, }, // 0xde + { s_single, s_term, s_string, s_single, s_eol, }, // 0xdf + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xea + { s_single, s_term, s_string, s_single, s_eol, }, // 0xeb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xec + { s_single, s_term, s_string, s_single, s_eol, }, // 0xed + { s_single, s_term, s_string, s_single, s_eol, }, // 0xee + { s_single, s_term, s_string, s_single, s_eol, }, // 0xef + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfa + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfc + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfd + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfe + { s_single, s_term, s_string, s_single, s_eol, }, // 0xff +}; + + +TOKEN::TOKEN(char *fn, string_set *includes) { + pushed = false; + include_files = includes; + include(fn); +} + + +TOKEN::~TOKEN() { + while (!streams.empty()) pop(); +} + + +void TOKEN::pop() { + ifstream *is = streams.front(); + char *fn = filenames.front(); + streams.pop_front(); + filenames.pop_front(); + filenamess.erase(fn); + linenumbers.pop_front(); + is->close(); + delete is; +} + + +void TOKEN::push_char(u_char c) { + pushed = true; + pushed_char = c; +} + + +bool TOKEN::next_char(u_char &uc) { + if (pushed) { + uc = (u_char)tolower((char)pushed_char); + pushed = false; + return true; + } + while (!streams.empty() && streams.front()->eof()) { + pop(); + } + if (streams.empty()) return false; + ifstream *is = streams.front(); + uc = (u_char)is->get(); + if (is->eof()) return next_char(uc); + if (uc == (u_char)'\n') { + int &line = linenumbers.front(); + line++; + } + uc = (u_char)tolower((char)uc); + return true; +} + + +bool TOKEN::include(char *fn) { + string_set::iterator i = filenamess.find(fn); + if (i != filenamess.end()) { + token_error("redundant or recursive include file detected"); + return false; + } + ifstream *is = new ifstream; + is->open(fn); + if (is->fail()) { + char buf[maxlen]; + snprintf(buf, sizeof(buf), "include file %s not found", fn); + token_error(buf); + return false; + } + string_set &inc = *include_files; + inc.insert(fn); + streams.push_front(is); + filenames.push_front(fn); + filenamess.insert(fn); + linenumbers.push_front(1); + return true; +} + + +char *TOKEN::next() { + if (!pending_tokens.empty()) { + char *t = pending_tokens.front(); + pending_tokens.pop_front(); + return t; + } + if (streams.empty()) return NULL; + const int PENDING_LIMIT = 1000; + static u_char buffer[PENDING_LIMIT]; + int count = 0; + state st = s_init; + while (true) { + if (count == (PENDING_LIMIT-1)) { + token_error("token too long"); + break; + } + if (st >= end_state) { + token_error("finite state machine error"); + break; + } + u_char c; + if (!next_char(c)) break; + st = parse_table[c][st]; + switch (st) { + case s_string: + case s_token: { + buffer[count++] = c; + } break; + + case s_term: { + push_char(c); + st = s_init; + } break; + + case s_string1: { + st = s_string; + } break; + + case s_string2: { + st = s_init; + } break; + + case s_single: { + buffer[count++] = c; + st = s_init; + } break; + + case s_ignore: + case s_eol: { + } break; + + + case s_slash: { + buffer[count++] = c; + if (next_char(c)) { + if (c == (u_char)'/') { + // start of ignore to eol on // + count--; + st = s_eol; + } + else { + // not a // token, just return this single / + push_char(c); + st = s_init; + } + } + else { + // cannot get another char + st = s_init; + } + } break; + + default: { + token_error(); + token_error("unknown state %d %s \n", st, " "); + } break; + } + if (st == s_init) break; + } + + buffer[count] = '\0'; + if (count == 0) return NULL; + char *t = register_string((char*)buffer); + if (t == token_include) { + char *f = next(); // should be file name + char *s = next(); // should be semicolon + if (s == token_semi) { + include(f); + return next(); + } + else { + push(s); + push(f); + return t; + } + } + return t; +} + + +int TOKEN::nextint() { + char *t = next(); + char *e; + long i = strtol(t, &e, 10); + if (*e != '\0') { + token_error("integer", t); + return 0; + } + return (int)i; +} + + +void TOKEN::skipeol() { + while (true) { + u_char c; + if (!next_char(c)) break; + if (c == (u_char)'\n') break; + } +} + + +void TOKEN::token_error(const char *err) { + token_error(); + char buf[maxlen]; + snprintf(buf, sizeof(buf), "%s \n", err); + my_syslog(buf); +} + + +void TOKEN::token_error(const char *fmt, int d, const char *s) { + char buf[maxlen]; + snprintf(buf, sizeof(buf), fmt, d, s); + my_syslog(buf); +} + + +void TOKEN::token_error(const char *fmt, const char *t, const char *h) { + if (!h) h = "null"; + char buf[maxlen]; + snprintf(buf, sizeof(buf), fmt, t, h); + my_syslog(buf); +} + + +void TOKEN::token_error(const char *want, const char *have) { + token_error(); + token_error("expecting %s, found %s \n", want, have); +} + + +void TOKEN::token_error() { + token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn()); + line_list::iterator j = linenumbers.begin(); + string_list::iterator i = filenames.begin(); + for (; i!=filenames.end(); i++,j++) { + if (i != filenames.begin()) { + char *fn = (*i); + int li = (*j); + token_error("\n included from line %d in file %s -- ", li, fn); + } + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/tokenizer.h Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,55 @@ +#ifndef tokenizer_include +#define tokenizer_include + +#include <fstream> +#include <list> +#include <set> +#include <stdio.h> +#include <ctype.h> + + +using namespace std; + +struct ltstr { + bool operator()(char* s1, char* s2) const { + return strcmp(s1, s2) < 0; + } +}; + +typedef list<ifstream *> stream_list; +typedef list<char *> string_list; +typedef set<char *, ltstr> string_set; +typedef list<int> line_list; + +class TOKEN { + stream_list streams; + string_list filenames; + string_set filenamess; + line_list linenumbers; + string_list pending_tokens; + string_set *include_files; + bool pushed; + u_char pushed_char; + + void pop(); + bool next_char(u_char &c); + void push_char(u_char c); + +public: + TOKEN(char *fn, string_set *includes); + ~TOKEN(); + bool include(char *fn); + char *next(); // return next token + int nextint(); + void skipeol(); // skip to eol + void push(char *token) {pending_tokens.push_front(token);}; + char *cur_fn() {return filenames.front();}; + int cur_line() {return linenumbers.front();}; + void token_error(const char *err); + void token_error(const char *fmt, int d, const char *s); + void token_error(const char *fmt, const char *t, const char *h); + void token_error(const char *want, const char *have); + void token_error(); +}; + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/Makefile.am Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,10 @@ +all: dnsbl + cat header.xml dnsbl >dnsbl.xml + cat header.sgml dnsbl >dnsbl.sgml + rm -f ../html/*html + rm -f ../html/*pdf + xmlto -o ../man man dnsbl.xml + xmlto -o ../html xhtml dnsbl.xml + xmlto -o ../html pdf dnsbl.xml + docbook2texi -o ../info dnsbl.sgml + rm -f dnsbl.xml dnsbl.sgml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/header.sgml Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,1 @@ +<!DOCTYPE reference PUBLIC "-//OASIS//DTD DocBook V4.1//EN">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/header.xml Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,4 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE reference PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" + "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/sm-archive.in Fri Mar 10 10:30:08 2006 -0800 @@ -0,0 +1,235 @@ +<reference> + <title>@PACKAGE@ Sendmail milter - Version @VERSION@</title> + <partintro> + <title>Packages</title> + <para>The various source and binary packages are available at <ulink + url="http://www.five-ten-sg.com/@PACKAGE@/packages/">http://www.five-ten-sg.com/@PACKAGE@/packages/</ulink> + The most recent documentation is available at <ulink + url="http://www.five-ten-sg.com/@PACKAGE@/">http://www.five-ten-sg.com/@PACKAGE@/</ulink> + </para> + + </partintro> + + <refentry id="@PACKAGE@.1"> + <refentryinfo> + <date>2006-03-10</date> + </refentryinfo> + + <refmeta> + <refentrytitle>@PACKAGE@</refentrytitle> + <manvolnum>1</manvolnum> + <refmiscinfo>@PACKAGE@ @VERSION@</refmiscinfo> + </refmeta> + + <refnamediv id='name.1'> + <refname>@PACKAGE@</refname> + <refpurpose>a sendmail milter to add recipients to messages</refpurpose> + </refnamediv> + + <refsynopsisdiv id='synopsis.1'> + <title>Synopsis</title> + <cmdsynopsis> + <command>@PACKAGE@</command> + <arg><option>-c</option></arg> + <arg><option>-d <replaceable class="parameter">n</replaceable></option></arg> + <arg><option>-p <replaceable class="parameter">sendmail-socket</replaceable></option></arg> + <arg><option>-t <replaceable class="parameter">timeout</replaceable></option></arg> + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1 id='options.1'> + <title>Options</title> + <variablelist> + <varlistentry> + <term>-c</term> + <listitem><para> + Load the configuration file, print a cannonical form + of the configuration on stdout, and exit. + </para></listitem> + </varlistentry> + <varlistentry> + <term>-d <replaceable class="parameter">n</replaceable></term> + <listitem><para> + Set the debug level to <replaceable class="parameter">n</replaceable>. + </para></listitem> + </varlistentry> + <varlistentry> + <term>-p <replaceable class="parameter">sendmail-socket</replaceable></term> + <listitem><para> + Set the socket used for the milter connection to sendmail. This is either + "inet:port@ip-address" or "local:local-domain-socket-file-name". + </para></listitem> + </varlistentry> + <varlistentry> + <term>-t <replaceable class="parameter">timeout</replaceable></term> + <listitem><para> + Set the timeout in seconds used for communication with sendmail. + </para></listitem> + </varlistentry> + </variablelist> + </refsect1> + + <refsect1 id='usage.1'> + <title>Usage</title> + <para><command>@PACKAGE@</command> -c</para> + <para><command>@PACKAGE@</command> -d 10 -p local:dnsbl.sock</para> + </refsect1> + + <refsect1 id='installation.1'> + <title>Installation</title> + <para> + This is now a standard GNU autoconf/automake installation, so the normal + "./configure; make; su; make install" works. "make chkconfig" will + setup the init.d runlevel scripts. Alternatively, you can use the + source or binary RPMs at <ulink + url="http://www.five-ten-sg.com/@PACKAGE@/packages">http://www.five-ten-sg.com/@PACKAGE@/packages</ulink>. + </para> + <para> + Note that this has ONLY been tested on Linux, specifically RedHat Linux. + You will need at a minimum a C++ compiler with a + minimally thread safe STL implementation. The distribution includes a + test.cpp program. If it fails this milter won't work. If it passes, + this milter might work. + </para> + <para> + Modify your sendmail.mc by adding + the following line in your sendmail.mc and rebuild the .cf file + </para> + <para><screen>INPUT_MAIL_FILTER(`sm-archive', `S=local:/var/run/sm-archive/sm-archive.sock, F=T, T=C:30s;S:5m;R:5m;E:5m')</screen></para> + <para> + Modify the default <citerefentry> + <refentrytitle>@PACKAGE@.conf</refentrytitle> <manvolnum>5</manvolnum> + </citerefentry> configuration. + </para> + </refsect1> + + <refsect1 id='configuration.1'> + <title>Configuration</title> + <para> + The configuration file is documented in <citerefentry> + <refentrytitle>@PACKAGE@.conf</refentrytitle> <manvolnum>5</manvolnum> + </citerefentry>. Any change to the config file, or any file included + from that config file, will cause it to be reloaded within three + minutes. + </para> + </refsect1> + + <refsect1 id='introduction.1'> + <title>Introduction</title> + <para> + Consider the problem of archiving all the mail sent to or from particular + email addresses or domains. This milter allows you to configure archive + mailboxes for each address or domain. It will add the address of the archive + mailbox to messages that pass thru this milter. + </para> + <para> + The DNSBL milter reads a text configuration file (sm-archive.conf) on + startup, and whenever the config file (or any of the referenced include + files) is changed. The entire configuration file is case insensitive. + If the configuration cannot be loaded due to a syntax error, the milter + will log the error and quit. If the configuration cannot be reloaded + after being modified, the milter will log the error and send an email to + root from sm-archive@$hostname. You probably want to added sm-archive@$hostname + to your /etc/mail/virtusertable since otherwise sendmail will reject + that message. + </para> + </refsect1> + + <refsect1 id='todo.1'> + <title>TODO</title> + <para> + Placeholder for future ideas. + </para> + </refsect1> + + <refsect1 id='copyright.1'> + <title>Copyright</title> + <para> + Copyright (C) 2006 by 510 Software Group <carl@five-ten-sg.com> + </para> + <para> + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + </para> + <para> + You should have received a copy of the GNU General Public License along + with this program; see the file COPYING. If not, please write to the + Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + </para> + </refsect1> + + <refsect1 id='version.1'> + <title>CVS Version</title> + <para> + $Id$ + </para> + </refsect1> + </refentry> + + + <refentry id="@PACKAGE@.conf.5"> + <refentryinfo> + <date>2006-03-10</date> + </refentryinfo> + + <refmeta> + <refentrytitle>@PACKAGE@.conf</refentrytitle> + <manvolnum>5</manvolnum> + <refmiscinfo>@PACKAGE@ @VERSION@</refmiscinfo> + </refmeta> + + <refnamediv id='name.5'> + <refname>@PACKAGE@.conf</refname> + <refpurpose>configuration file for @PACKAGE@ sendmail milter</refpurpose> + </refnamediv> + + <refsynopsisdiv id='synopsis.5'> + <title>Synopsis</title> + <cmdsynopsis> + <command>@PACKAGE@.conf</command> + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1 id='description.5'> + <title>Description</title> + <para>The <command>@PACKAGE@.conf</command> configuration file is + specified by this partial bnf description.</para> + + <literallayout class="monospaced"><![CDATA[ +CONFIG = {ENV-FROM ENV-TO}+ +ENV-FROM = env_from "{" LIST "}" ";" +ENV-TO = rcpt_to "{" LIST "}" ";" +LIST = ELEMENT {LIST}+ +ELEMENT = ADDRESS TARGET ";" +ADDRESS = (USER@ | DOMAIN | USER@DOMAIN) +TARGET = ("" | USER@DOMAIN) +};]]></literallayout> + </refsect1> + + <refsect1 id='sample.5'> + <title>Sample</title> + <literallayout class="monospaced"><![CDATA[ +env_from { + fred@example.com manager@example.com; + example.com archive@example.com; + joe@example.com ""; # suppress archiving for joe +}; + +rcpt_to { + fred@example.com manager@example.com; +}; + +};]]></literallayout> + </refsect1> + + <refsect1 id='version.5'> + <title>CVS Version</title> + <para> + $Id$ + </para> + </refsect1> + + </refentry> +</reference>