# HG changeset patch # User carl # Date 1120962281 25200 # Node ID dd21c8e130748b84a9cfd78e4d927d8b1813d781 # Parent c812a06f87bf3b238673b7f813dffca6e4cbf6aa start coding on new config syntax diff -r c812a06f87bf -r dd21c8e13074 hosts-ignore.conf --- a/hosts-ignore.conf Sat Apr 02 12:21:06 2005 -0800 +++ b/hosts-ignore.conf Sat Jul 09 19:24:41 2005 -0700 @@ -1,5 +1,5 @@ -ignore www.5iantlavalamp.com # present in almost all microsoft word documents +www.5iantlavalamp.com # present in almost all microsoft word documents -#ignore messenger.msn.click-url.com # hotmail using a spammer -#ignore search.msn.click-url.com # hotmail using a spammer -#ignore toolbar.msn.click-url.com # hotmail using a spammer +#messenger.msn.click-url.com # hotmail using a spammer +#search.msn.click-url.com # hotmail using a spammer +#toolbar.msn.click-url.com # hotmail using a spammer diff -r c812a06f87bf -r dd21c8e13074 html-tags.conf --- a/html-tags.conf Sat Apr 02 12:21:06 2005 -0800 +++ b/html-tags.conf Sat Jul 09 19:24:41 2005 -0700 @@ -1,110 +1,110 @@ # # some common stuff # -html_tag XML -html_tag !DOCTYPE +XML +"!DOCTYPE" # # taken from "index of elements" in the w3c html 4.01 specification # -html_tag A -html_tag ABBR -html_tag ACRONYM -html_tag ADDRESS -html_tag APPLET -html_tag AREA -html_tag B -html_tag BASE -html_tag BASEFONT -html_tag BDO -html_tag BIG -html_tag BLOCKQUOTE -html_tag BODY -html_tag BR -html_tag BUTTON -html_tag CAPTION -html_tag CENTER -html_tag CITE -html_tag CODE -html_tag COL -html_tag COLGROUP -html_tag DD -html_tag DEL -html_tag DFN -html_tag DIR -html_tag DIV -html_tag DL -html_tag DT -html_tag EM -html_tag FIELDSET -html_tag FONT -html_tag FORM -html_tag FRAME -html_tag FRAMESET -html_tag H1 -html_tag H2 -html_tag H3 -html_tag H4 -html_tag H5 -html_tag H6 -html_tag HEAD -html_tag HR -html_tag HTML -html_tag I -html_tag IFRAME -html_tag IMG -html_tag INPUT -html_tag INS -html_tag ISINDEX -html_tag KBD -html_tag LABEL -html_tag LEGEND -html_tag LI -html_tag LINK -html_tag MAP -html_tag MENU -html_tag META -html_tag NOFRAMES -html_tag NOSCRIPT -html_tag OBJECT -html_tag OL -html_tag OPTGROUP -html_tag OPTION -html_tag P -html_tag PARAM -html_tag PRE -html_tag Q -html_tag S -html_tag SAMP -html_tag SCRIPT -html_tag SELECT -html_tag SMALL -html_tag SPAN -html_tag STRIKE -html_tag STRONG -html_tag STYLE -html_tag SUB -html_tag SUP -html_tag TABLE -html_tag TBODY -html_tag TD -html_tag TEXTAREA -html_tag TFOOT -html_tag TH -html_tag THEAD -html_tag TITLE -html_tag TR -html_tag TT -html_tag U -html_tag UL -html_tag VAR +A +ABBR +ACRONYM +ADDRESS +APPLET +AREA +B +BASE +BASEFONT +BDO +BIG +BLOCKQUOTE +BODY +BR +BUTTON +CAPTION +CENTER +CITE +CODE +COL +COLGROUP +DD +DEL +DFN +DIR +DIV +DL +DT +EM +FIELDSET +FONT +FORM +FRAME +FRAMESET +H1 +H2 +H3 +H4 +H5 +H6 +HEAD +HR +HTML +I +IFRAME +IMG +INPUT +INS +ISINDEX +KBD +LABEL +LEGEND +LI +LINK +MAP +MENU +META +NOFRAMES +NOSCRIPT +OBJECT +OL +OPTGROUP +OPTION +P +PARAM +PRE +Q +S +SAMP +SCRIPT +SELECT +SMALL +SPAN +STRIKE +STRONG +STYLE +SUB +SUP +TABLE +TBODY +TD +TEXTAREA +TFOOT +TH +THEAD +TITLE +TR +TT +U +UL +VAR # # extra cruft from apple mac print subsystem # -html_tag PLIST -html_tag DICT -html_tag KEY -html_tag STRING -html_tag REAL -html_tag INTEGER -html_tag ARRAY -html_tag DATE +PLIST +DICT +KEY +STRING +REAL +INTEGER +ARRAY +DATE diff -r c812a06f87bf -r dd21c8e13074 new.bash --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/new.bash Sat Jul 09 19:24:41 2005 -0700 @@ -0,0 +1,16 @@ +#!/bin/bash + +############################ +## compile and run the new parser program +## +rm -f new.o context.o tokenizer.o +g++ -c new.cpp context.cpp tokenizer.cpp +if [ $? -ne 0 ]; then + echo "compiler errors" + exit +fi +g++ -o new new.o context.o tokenizer.o -pthread +if [ $? -ne 0 ]; then + echo "linker errors" + exit +fi diff -r c812a06f87bf -r dd21c8e13074 src/context.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/context.cpp Sat Jul 09 19:24:41 2005 -0700 @@ -0,0 +1,727 @@ +#include "context.h" + +static char* context_version="$Id:"; + +char *token_black; +char *token_content; +char *token_context; +char *token_dccfrom; +char *token_dccto; +char *token_default; +char *token_dnsbl; +char *token_dnsbll; +char *token_envfrom; +char *token_envto; +char *token_filter; +char *token_host_limit; +char *token_html_limit; +char *token_html_tags; +char *token_ignore; +char *token_include; +char *token_inherit; +char *token_lbrace; +char *token_many; +char *token_off; +char *token_ok; +char *token_ok2; +char *token_on; +char *token_rbrace; +char *token_semi; +char *token_soft; +char *token_tld; +char *token_unknown; +char *token_white; + +string_set all_strings; // owns all the strings, only modified by the config loader thread + +DNSBL::DNSBL(char *n, char *s, char *m) { + name = n; + suffix = s; + message = m; +} + + +CONFIG::CONFIG() { + reference_count = 0; + generation = 0; + load_time = 0; + default_context = NULL; +} + + +CONFIG::~CONFIG() { + for (context_list::iterator i=contexts.begin(); i!=contexts.end(); i++) { + CONTEXT *c = *i; + delete c; + } +} + + +void CONFIG::add_context(CONTEXTP con) { + contexts.push_back(con); + if (!default_context && !con->get_parent()) { + // first global context + default_context = con; + } +} + + +CONTEXTP CONFIG::find_context(char *to, char *from) { + CONTEXTP con = NULL; + context_map::iterator i = env_to.find(from); + if (i != env_to.end()) { + con = (*i).second; + return con->find_from_context(from); + } + char *x = strchr(to, '@'); + if (x) { + x++; + i = env_to.find(x); + if (i != env_to.end()) { + con = (*i).second; + return con->find_from_context(from); + } + } + if (default_context) { + return default_context->find_from_context(from); + } + return NULL; +} + + +void CONFIG::dump() { + if (default_context) default_context->dump(); + for (context_list::iterator i=contexts.begin(); i!=contexts.end(); i++) { + CONTEXTP c = *i; + CONTEXTP p = c->get_parent(); + if (!p && (c != default_context)) c->dump(); + } +} + + +CONTEXT::CONTEXT(CONTEXTP parent_, char *name_) { + parent = parent_; + name = name_; + env_from_default = (parent) ? token_inherit : token_unknown; + content_filtering = (parent) ? parent->content_filtering : false; + content_suffix = NULL; + content_message = NULL; + host_limit = 0; + host_limit_message = NULL; + host_random = false; + tag_limit = 0; + tag_limit_message = NULL; +} + + +CONTEXT::~CONTEXT() { + for (dnsblp_map::iterator i=dnsbl_names.begin(); i!=dnsbl_names.end(); i++) { + DNSBLP d = (*i).second; + // delete the underlying DNSBL objects. + delete d; + } +} + + +char *CONTEXT::get_full_name(char *buffer, int size) { + if (!parent) return name; + const int maxlen = 1000; + char buf[maxlen]; + snprintf(buffer, size, "%s.%s", parent->get_full_name(buf, maxlen), name); + return buffer; +} + + +bool CONTEXT::cover_env_to(char *to) { + const int maxlen = 1000; + char buffer[maxlen]; + char *x = strchr(to, '@'); + if (x) x++; + else x = to; + string_set::iterator i = env_to.find(x); + if (i != env_to.end()) return true; + return (parent) ? parent->cover_env_to(to) : false; +} + + +char *CONTEXT::find_from(char *from) { + // do we have a white/black/unknown for this full from value? + string_map::iterator i = env_from.find(from); + if (i != env_from.end()) return (*i).second; + // do we have a white/black/unknown for the source domain name? + char *x = strchr(from, '@'); + if (x) { + x++; + i = env_from.find(x); + if (i != env_from.end()) return (*i).second; + } + if ((env_from_default == token_inherit) && parent) { + return parent->find_from(from); + } + return env_from_default; +} + + +CONTEXTP CONTEXT::find_from_context(char *from) { + // do we have a special child context for this full from value? + context_map::iterator j = env_from_context.find(from); + if (j != env_from_context.end()) { + CONTEXTP con = (*j).second; + return con->find_from_context(from); + } + char *x = strchr(from, '@'); + if (x) { + x++; + // do we have a special context for the source domain name? + j = env_from_context.find(x); + if (j != env_from_context.end()) { + CONTEXTP con = (*j).second; + return con->find_from_context(from); + } + } + return this; +} + + +CONTEXTP CONTEXT::find_from_context_name(char *name) { + context_map::iterator i = children.find(name); + if (i != children.end()) return (*i).second; + return NULL; +} + + +DNSBLP CONTEXT::find_dnsbl(char *name) { + dnsblp_map::iterator i = dnsbl_names.find(name); + if (i != dnsbl_names.end()) return (*i).second; + if (parent) return parent->find_dnsbl(name); + return NULL; +} + + +void CONTEXT::dump(int level) { + const int maxlen = 1000; + char indent[maxlen]; + int i = min(maxlen-1, level*4); + memset(indent, ' ', i); + indent[i] = '\0'; + printf("%s context %s { \n", indent, name); + + for (dnsblp_map::iterator i=dnsbl_names.begin(); i!=dnsbl_names.end(); i++) { + char *n = (*i).first; + DNSBL &d = *(*i).second; + printf("%s dnsbl %s %s \"%s\"; \n", indent, n, d.suffix, d.message); + } + + if (!dnsbl_list.empty()) { + printf("%s dnsbl_list", indent); + for (dnsblp_list::iterator i=dnsbl_list.begin(); i!=dnsbl_list.end(); i++) { + DNSBL &d = *(*i); + printf(" %s", d.name); + } + printf("; \n"); + } + + if (content_filtering) { + printf("%s content on { \n", indent, env_from_default); + if (content_suffix) { + printf("%s filter %s \"%s\"; \n", indent, content_suffix, content_message); + } + if (!content_host_ignore.empty()) { + printf("%s ignore { \n", indent); + for (string_set::iterator i=content_host_ignore.begin(); i!=content_host_ignore.end(); i++) { + printf("%s %s; \n", indent, *i); + } + printf("%s }; \n", indent); + } + if (!content_tlds.empty()) { + printf("%s tld { \n", indent); + printf("%s ", indent); + for (string_set::iterator i=content_tlds.begin(); i!=content_tlds.end(); i++) { + printf("%s; ", *i); + } + printf("\n%s }; \n", indent); + } + if (!html_tags.empty()) { + printf("%s html_tags { \n", indent); + printf("%s ", indent); + for (string_set::iterator i=html_tags.begin(); i!=html_tags.end(); i++) { + printf("%s; ", *i); + } + printf("\n%s }; \n", indent); + } + if (host_limit_message) { + printf("%s host_limit on %d \"%s\"; \n", indent, host_limit, host_limit_message); + } + else if (host_random) { + printf("%s host_limit soft %d; \n", indent, host_limit); + } + else { + printf("%s host_limit off; \n", indent); + } + if (tag_limit_message) { + printf("%s tag_limit on %d \"%s\"; \n", indent, tag_limit, tag_limit_message); + } + else { + printf("%s tag_limit off; \n", indent); + } + printf("%s }; \n", indent); + } + else { + printf("%s content off {}; \n", indent, env_from_default); + } + + printf("%s env_to { \n", indent); + for (string_set::iterator i=env_to.begin(); i!=env_to.end(); i++) { + printf("%s %s; \n", indent, *i); + } + printf("%s }; \n", indent); + + for (context_map::iterator i=children.begin(); i!=children.end(); i++) { + CONTEXTP c = (*i).second; + c->dump(level+1); + } + + printf("%s env_from %s { \n", indent, env_from_default); + if (!env_from.empty()) { + printf("%s // white/black/unknown \n", indent); + for (string_map::iterator i=env_from.begin(); i!=env_from.end(); i++) { + char *f = (*i).first; + char *t = (*i).second; + printf("%s %s \t %s; \n", indent, f, t); + } + } + if (!env_from_context.empty()) { + printf("%s // child contexts \n", indent); + for (context_map::iterator j=env_from_context.begin(); j!=env_from_context.end(); j++) { + char *f = (*j).first; + CONTEXTP t = (*j).second; + printf("%s %s \t %s; \n", indent, f, t->name); + } + } + printf("%s }; \n", indent); + + printf("%s }; \n", indent); +} + + +//////////////////////////////////////////////// +// helper to discard the strings held by a string_set +// +static void discard(string_set &s) { + for (string_set::iterator i=s.begin(); i!=s.end(); i++) { + free(*i); + } + s.clear(); +} + + +//////////////////////////////////////////////// +// helper to register a string in a string set +// +char* register_string(string_set &s, char *name) { + string_set::iterator i = s.find(name); + if (i != s.end()) return *i; + char *x = strdup(name); + s.insert(x); + return x; +} + + +//////////////////////////////////////////////// +// register a global string +// +char* register_string(char *name) { + return register_string(all_strings, name); +} + + +//////////////////////////////////////////////// +// +bool tsa(TOKEN &tok, char *token); +bool tsa(TOKEN &tok, char *token) { + char *have = tok.next(); + if (have == token) return true; + tok.token_error(token, have); + return false; +} + + +//////////////////////////////////////////////// +// +bool parse_dnsbl(TOKEN &tok, CONFIG &dc, CONTEXT &me); +bool parse_dnsbl(TOKEN &tok, CONFIG &dc, CONTEXT &me) { + char *name = tok.next(); + char *suf = tok.next(); + char *msg = tok.next(); + if (!tsa(tok, token_semi)) return false; + DNSBLP dns = new DNSBL(name, suf, msg); + me.add_dnsbl(name, dns); + return true; +} + + +//////////////////////////////////////////////// +// +bool parse_dnsbll(TOKEN &tok, CONFIG &dc, CONTEXT &me); +bool parse_dnsbll(TOKEN &tok, CONFIG &dc, CONTEXT &me) { + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_semi) break; + DNSBLP dns = me.find_dnsbl(have); + if (dns) { + me.add_dnsbl(dns); + } + else { + tok.token_error("dnsbl name", have); + return false; + } + } + return true; +} + + +//////////////////////////////////////////////// +// +bool parse_content(TOKEN &tok, CONFIG &dc, CONTEXT &me); +bool parse_content(TOKEN &tok, CONFIG &dc, CONTEXT &me) { + char *setting = tok.next(); + if (setting == token_on) { + me.set_content_filtering(true); + } + else if (setting == token_off) { + me.set_content_filtering(false); + } + else { + tok.token_error("on/off", setting); + return false; + } + if (!tsa(tok, token_lbrace)) return false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_filter) { + me.set_content_suffix(tok.next()); + me.set_content_message(tok.next()); + if (!tsa(tok, token_semi)) return false; + } + else if (have == token_ignore) { + if (!tsa(tok, token_lbrace)) return false; + while (true) { + if (!have) break; + char *have = tok.next(); + if (have == token_rbrace) { + break; // done + } + else { + me.add_ignore(have); + } + } + if (!tsa(tok, token_semi)) return false; + } + else if (have == token_tld) { + if (!tsa(tok, token_lbrace)) return false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) { + break; // done + } + else { + me.add_tld(have); + } + } + if (!tsa(tok, token_semi)) return false; + } + else if (have == token_html_limit) { + have = tok.next(); + if (have == token_on) { + me.set_tag_limit(tok.nextint()); + me.set_tag_message(tok.next()); + } + else if (have == token_off) { + me.set_tag_limit(0); + me.set_tag_message(NULL); + } + else { + tok.token_error("on/off", have); + return false; + } + if (!tsa(tok, token_semi)) return false; + } + else if (have == token_html_tags) { + if (!tsa(tok, token_lbrace)) return false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) { + break; // done + } + else { + me.add_tag(have); + } + } + if (!tsa(tok, token_semi)) return false; + } + else if (have == token_host_limit) { + have = tok.next(); + if (have == token_on) { + me.set_host_limit(tok.nextint()); + me.set_host_message(tok.next()); + me.set_host_random(false); + } + else if (have == token_off) { + me.set_host_limit(0); + me.set_host_message(NULL); + me.set_host_random(false); + } + else if (have == token_soft) { + me.set_host_limit(tok.nextint()); + me.set_host_message(NULL); + me.set_host_random(true); + } + else { + tok.token_error("on/off/soft", have); + return false; + } + if (!tsa(tok, token_semi)) return false; + } + else if (have == token_rbrace) { + break; // done + } + else { + tok.token_error("content keyword", have); + return false; + } + } + return tsa(tok, token_semi); +} + + +//////////////////////////////////////////////// +// +bool parse_envto(TOKEN &tok, CONFIG &dc, CONTEXT &me); +bool parse_envto(TOKEN &tok, CONFIG &dc, CONTEXT &me) { + if (!tsa(tok, token_lbrace)) return false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) break; + if (have == token_semi) { + // optional separators + } + else if (have == token_dccto) { + char *flavor = tok.next(); + if (!tsa(tok, token_lbrace)) return false; + bool keeping = false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) break; + if (have == flavor) { + keeping = true; + continue; + } + else if ((have == token_ok) || (have == token_ok2) || (have == token_many)) { + keeping = false; + continue; + } + if (have == token_envto) { + have = tok.next(); + if (keeping) { + if (me.allow_env_to(have)) { + me.add_to(have); + dc.add_to(have, &me); + } + } + } + tok.skipeol(); + } + } + else if (me.allow_env_to(have)) { + me.add_to(have); + dc.add_to(have, &me); + } + else { + tok.token_error("valid env_to address or domain name", have); + return false; + } + } + return tsa(tok, token_semi); +} + + +//////////////////////////////////////////////// +// +bool parse_envfrom(TOKEN &tok, CONFIG &dc, CONTEXT &me); +bool parse_envfrom(TOKEN &tok, CONFIG &dc, CONTEXT &me) { + char *st = tok.next(); + if ((st == token_black) || (st == token_white) || (st == token_unknown)) { + me.set_from_default(st); + } + else { + tok.push(st); + } + if (!tsa(tok, token_lbrace)) return false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) break; + if (have == token_semi) { + // optional separators + } + else if (have == token_dccfrom) { + if (!tsa(tok, token_lbrace)) return false; + bool keeping = false; + bool many = false; + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) break; + if (have == token_ok) { + keeping = true; + many = false; + continue; + } + else if (have == token_many) { + keeping = true; + many = true; + continue; + } + else if (have == token_ok2) { + keeping = false; + continue; + } + if (have == token_envfrom) { + have = tok.next(); + if (keeping) { + me.add_from(have, (many) ? token_black : token_white); + } + } + tok.skipeol(); + } + } + else { + // may be a valid email address or domain name + char *st = tok.next(); + if ((st == token_black) || (st == token_white) || (st == token_unknown)) { + me.add_from(have, st); + } + else { + CONTEXTP con = me.find_from_context_name(st); + if (con) { + me.add_from_context(have, con); + } + else { + tok.token_error("white/black/unknown or child context name", st); + return false; + } + } + } + } + return tsa(tok, token_semi); +} + + +//////////////////////////////////////////////// +// +bool parse_context(TOKEN &tok, CONFIG &dc, CONTEXTP parent); +bool parse_context(TOKEN &tok, CONFIG &dc, CONTEXTP parent) { + char *name = tok.next(); + if (!tsa(tok, token_lbrace)) return false; + CONTEXTP con = new CONTEXT(parent, name); + + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_rbrace) break; // done + if (have == token_dnsbl) { + if (!parse_dnsbl(tok, dc, *con)) return false; + } + else if (have == token_dnsbll) { + if (!parse_dnsbll(tok, dc, *con)) return false; + } + else if (have == token_content) { + if (!parse_content(tok, dc, *con)) return false; + } + else if (have == token_envto) { + if (!parse_envto(tok, dc, *con)) return false; + } + else if (have == token_envfrom) { + if (!parse_envfrom(tok, dc, *con)) return false; + } + else if (have == token_context) { + if (!parse_context(tok, dc, con)) return false; + } + else { + tok.token_error("context keyword", have); + return false; + } + } + + if (!tsa(tok, token_semi)) { + delete con; + return false; + } + dc.add_context(con); + if (parent) parent->add_context(con); + return true; +} + + +//////////////////////////////////////////////// +// parse a config file +// +bool load_conf(CONFIG &dc, char *fn) { + TOKEN tok(fn, &dc.config_files); + while (true) { + char *have = tok.next(); + if (!have) break; + if (have == token_context) { + if (!parse_context(tok, dc, NULL)) { + return false; + } + } + else { + tok.token_error(token_context, have); + return false; + } + } + return true; +} + + +//////////////////////////////////////////////// +// init the tokens +// +void token_init() { + token_black = register_string("black"); + token_content = register_string("content"); + token_context = register_string("context"); + token_dccfrom = register_string("dcc_from"); + token_dccto = register_string("dcc_to"); + token_default = register_string("default"); + token_dnsbl = register_string("dnsbl"); + token_dnsbll = register_string("dnsbl_list"); + token_envfrom = register_string("env_from"); + token_envto = register_string("env_to"); + token_filter = register_string("filter"); + token_host_limit = register_string("host_limit"); + token_html_limit = register_string("html_limit"); + token_html_tags = register_string("html_tags"); + token_ignore = register_string("ignore"); + token_include = register_string("include"); + token_inherit = register_string("inherit"); + token_lbrace = register_string("{"); + token_many = register_string("many"); + token_off = register_string("off"); + token_ok = register_string("ok"); + token_ok2 = register_string("ok2"); + token_on = register_string("on"); + token_rbrace = register_string("}"); + token_semi = register_string(";"); + token_soft = register_string("soft"); + token_tld = register_string("tld"); + token_unknown = register_string("unknown"); + token_white = register_string("white"); +} diff -r c812a06f87bf -r dd21c8e13074 src/context.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/context.h Sat Jul 09 19:24:41 2005 -0700 @@ -0,0 +1,149 @@ +#include "tokenizer.h" +#include + + +enum status {oksofar, // not rejected yet + white, // whitelisted + black, // blacklisted + reject, // rejected by a dns list + reject_tag, // too many bad html tags + reject_host}; // too many hosts/urls in body + +class DNSBL; +class CONTEXT; + +typedef map string_map; +typedef set int_set; +typedef list string_list; +typedef DNSBL * DNSBLP; +typedef list dnsblp_list; +typedef map dnsblp_map; +typedef CONTEXT * CONTEXTP; +typedef list context_list; +typedef map context_map; +typedef map ns_mapper; + +struct DNSBL { + char *name; // nickname for this dns based list + char *suffix; // blacklist suffix like blackholes.five-ten-sg.com + char *message; // error message with one or two %s operators for the ip address replacement + DNSBL(char *n, char *s, char *m); +}; + +class CONTEXT { + CONTEXTP parent; + char * name; + context_map children; // map child context names to their contexts + string_set env_to; // + string_map env_from; // map senders to white/black/unknown + context_map env_from_context; // map senders to a child context + char * env_from_default; // default value for senders that are not found in the map white/black/unknown/inherit + bool content_filtering; // + char * content_suffix; // for sbl url body filtering + char * content_message; // "" + string_set content_host_ignore;// hosts to ignore for content sbl checking + string_set content_tlds; // + string_set html_tags; // set of valid html tags + int host_limit; // limit on host names + char * host_limit_message; // error message for excessive host names + bool host_random; // pick a random selection of host names rather than error for excessive hosts + int tag_limit; // limit on bad html tags + char * tag_limit_message; // error message for excessive bad html tags + dnsblp_map dnsbl_names; // name to dnsbl mapping for lists that are available in this context and children + dnsblp_list dnsbl_list; // list of dnsbls to be used in this context + +public: + CONTEXT(CONTEXTP parent_, char *name_); + ~CONTEXT(); + CONTEXTP get_parent() {return parent;}; + char* get_full_name(char *buf, int size); + void add_context(CONTEXTP child) {children[child->name] = child;}; + bool allow_env_to(char *to) {return (parent) ? parent->cover_env_to(to) : true;}; + bool cover_env_to(char *to); + + void add_to(char *to) {env_to.insert(to);}; + void add_from(char *from, char *status) {env_from[from] = status;}; + void add_from_context(char *from, CONTEXTP con) {env_from_context[from] = con;}; + void set_from_default(char *status) {env_from_default = status;}; + char* find_from(char *from); + CONTEXTP find_from_context(char *from); + CONTEXTP find_from_context_name(char *name); + + void set_content_filtering(bool filter) {content_filtering = filter;}; + void set_content_suffix(char *suffix) {content_suffix = suffix;}; + void set_content_message(char *message) {content_message = message;}; + void add_ignore(char *host) {content_host_ignore.insert(host);}; + void add_tld(char *tld) {content_tlds.insert(tld);}; + + void set_host_limit(int limit) {host_limit = limit;}; + void set_host_message(char *message) {host_limit_message = message;}; + void set_host_random(bool random) {host_random = random;}; + void set_tag_limit(int limit) {tag_limit = limit;}; + void set_tag_message(char *message) {tag_limit_message = message;}; + void add_tag(char *tag) {html_tags.insert(tag);}; + + void add_dnsbl(char *name, DNSBLP dns) {dnsbl_names[name] = dns;}; + void add_dnsbl(DNSBLP dns) {dnsbl_list.push_back(dns);}; + DNSBLP find_dnsbl(char *name); + + void dump(int level = 0); +}; + + +struct CONFIG { + // the only mutable stuff once it has been loaded from the config file + int reference_count; // protected by the global config_mutex + // all the rest is constant after loading from the config file + int generation; + time_t load_time; + string_set config_files; + context_list contexts; // owns all the contexts, not just top level contexts + context_map env_to; // map recipient to a filtering context + CONTEXTP default_context;// for env_to values that don't have their own specific filtering context + + CONFIG(); + ~CONFIG(); + void add_context(CONTEXTP con); + void add_to(char *to, CONTEXTP con) {env_to[to] = con;}; + CONTEXTP find_context(char *to, char *from); + void dump(); +}; + +extern char *token_black; +extern char *token_content; +extern char *token_context; +extern char *token_dccfrom; +extern char *token_dccto; +extern char *token_default; +extern char *token_dnsbl; +extern char *token_dnsbll; +extern char *token_envfrom; +extern char *token_envto; +extern char *token_filter; +extern char *token_host_limit; +extern char *token_html_limit; +extern char *token_html_tags; +extern char *token_ignore; +extern char *token_include; +extern char *token_inherit; +extern char *token_lbrace; +extern char *token_many; +extern char *token_off; +extern char *token_ok; +extern char *token_ok2; +extern char *token_on; +extern char *token_rbrace; +extern char *token_semi; +extern char *token_soft; +extern char *token_tld; +extern char *token_unknown; +extern char *token_white; + +extern string_set all_strings; // owns all the strings, only modified by the config loader thread + +static void discard(string_set &s); +char* register_string(string_set &s, char *name); +char* register_string(char *name); +CONFIG *parse_config(char *fn); +bool load_conf(CONFIG &dc, char *fn); +void token_init(); diff -r c812a06f87bf -r dd21c8e13074 src/new.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/new.cpp Sat Jul 09 19:24:41 2005 -0700 @@ -0,0 +1,1389 @@ +/* + +Copyright (c) 2004, 2005 Carl Byington - 510 Software Group, released +under the GPL version 2 or any later version at your choice available at +http://www.fsf.org/licenses/gpl.txt + +Based on a sample milter Copyright (c) 2000-2003 Sendmail, Inc. and its +suppliers. Inspired by the DCC by Rhyolite Software + +-r port The port used to talk to our internal dns resolver processes +-p port The port through which the MTA will connect to this milter. +-t sec The timeout value. +-c Check the config, and print a copy to stdout. Don't start the + milter or do anything with the socket. +-d Add debug syslog entries + + +TODO: +1) Add config for max_recipients for each mail domain. Recipients in +excess of that limit will be rejected, and the entire data will be +rejected if it is sent. + +2) Add config for poison addresses. If any recipient is poison, all +recipients are rejected even if they would be whitelisted, and the +data is rejected if sent. + +3) Add option to only allow one recipient if the return path is empty. + +4) Check if the envelope from domain name primary MX points 127.0.0.0/8 + +5) Add option for using smtp connections to verify addresses from backup +mx machines. This allows the backup mx to learn the valid addresses +on the primary machine. + +*/ + + +// from sendmail sample +#include +#include +#include +#include +#include + +// needed for socket io +#include +#include +#include +#include +#include +#include +#include +#include + +// needed for thread +#include + +// needed for std c++ collections +#include +#include +#include + +// for the dns resolver +#include +#include +#include + +// misc stuff needed here +#include +#include +#include +#include /* header for waitpid() and various macros */ +#include /* header for signal functions */ + +#include "context.h" + +static char* dnsbl_version="$Id:"; + +extern "C" { + #include "libmilter/mfapi.h" + sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr); + sfsistat mlfi_envfrom(SMFICTX *ctx, char **argv); + sfsistat mlfi_envrcpt(SMFICTX *ctx, char **argv); + sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len); + sfsistat mlfi_eom(SMFICTX *ctx); + sfsistat mlfi_abort(SMFICTX *ctx); + sfsistat mlfi_close(SMFICTX *ctx); + void sig_chld(int signo); +} + +struct ns_map { + // all the strings are owned by the keys/values in the ns_host string map + string_map ns_host; // nameserver name -> host name that uses this name server + ns_mapper ns_ip; // nameserver name -> ip address of the name server +}; + +static bool debug_syslog = false; +static bool loader_run = true; // used to stop the config loader thread +static CONFIG * config = NULL; // protected by the config_mutex +static int generation = 0; // protected by the config_mutex + +static pthread_mutex_t config_mutex; +static pthread_mutex_t syslog_mutex; +static pthread_mutex_t resolve_mutex; +static pthread_mutex_t fd_pool_mutex; + +static std::set fd_pool; +static int NULL_SOCKET = -1; +static char *resolver_port = NULL; // unix domain socket to talk to the dns resolver process +static int resolver_socket = NULL_SOCKET; // socket used to listen for resolver requests +static time_t ERROR_SOCKET_TIME = 60; // number of seconds between attempts to open the spam filter socket +static time_t last_error_time; +static int resolver_sock_count = 0; // protected with fd_pool_mutex +static int resolver_pool_size = 0; // protected with fd_pool_mutex + + +// packed structure to allow a single socket write to dump the +// length and the following answer. The packing attribute is gcc specific. +struct glommer { + int length; + #ifdef NS_PACKETSZ + u_char answer[NS_PACKETSZ]; // with a resolver, we return resolver answers + #else + int answer; // without a resolver, we return a single ip4 address, 0 == no answer + #endif +} __attribute__ ((packed)); + +struct mlfiPriv; + + +//////////////////////////////////////////////// +// helper to discard the strings and objects held by an ns_map +// +static void discard(ns_map &s); +static void discard(ns_map &s) { + for (string_map::iterator i=s.ns_host.begin(); i!=s.ns_host.end(); i++) { + char *x = (*i).first; + char *y = (*i).second; + free(x); + free(y); + } + s.ns_ip.clear(); + s.ns_host.clear(); +} + +//////////////////////////////////////////////// +// helper to register a string in an ns_map +// +static void register_string(ns_map &s, char *name, char *refer); +static void register_string(ns_map &s, char *name, char *refer) { + string_map::iterator i = s.ns_host.find(name); + if (i != s.ns_host.end()) return; + char *x = strdup(name); + char *y = strdup(refer); + s.ns_ip[x] = 0; + s.ns_host[x] = y; + +} + +//////////////////////////////////////////////// +// syslog a message +// +static void my_syslog(mlfiPriv *priv, char *text); + + +// include the content scanner +#include "scanner.cpp" + + +//////////////////////////////////////////////// +// disconnect the fd from the dns resolver process +// +void my_disconnect(int sock, bool decrement = true); +void my_disconnect(int sock, bool decrement) +{ + if (sock != NULL_SOCKET) { + if (decrement) { + pthread_mutex_lock(&fd_pool_mutex); + resolver_sock_count--; + pthread_mutex_unlock(&fd_pool_mutex); + } + shutdown(sock, SHUT_RDWR); + close(sock); + } +} + + +//////////////////////////////////////////////// +// return fd connected to the dns resolver process +// +int my_connect(); +int my_connect() +{ + // if we have had recent errors, don't even try to open the socket + time_t now = time(NULL); + if ((now - last_error_time) < ERROR_SOCKET_TIME) return NULL_SOCKET; + + // nothing recent, maybe this time it will work + int sock = NULL_SOCKET; + sockaddr_un server; + memset(&server, '\0', sizeof(server)); + server.sun_family = AF_UNIX; + strncpy(server.sun_path, resolver_port, sizeof(server.sun_path)-1); + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock != NULL_SOCKET) { + bool rc = (connect(sock, (sockaddr *)&server, sizeof(server)) == 0); + if (!rc) { + my_disconnect(sock, false); + sock = NULL_SOCKET; + last_error_time = now; + } + } + else last_error_time = now; + if (sock != NULL_SOCKET) { + pthread_mutex_lock(&fd_pool_mutex); + resolver_sock_count++; + pthread_mutex_unlock(&fd_pool_mutex); + } + return sock; +} + + +//////////////////////////////////////////////// +// mail filter private data, held for us by sendmail +// +struct mlfiPriv +{ + // connection specific data + CONFIG *pc; // global context with our maps + int fd; // to talk to dns resolvers process + bool err; // did we get any errors on the resolver socket? + int ip; // ip4 address of the smtp client + map checked; // status from those lists + // message specific data + char *mailaddr; // envelope from value + char *queueid; // sendmail queue id + bool authenticated; // client authenticated? if so, suppress all dnsbl checks + bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails + bool only_whites; // every recipient is whitelisted? + string_set non_whites; // remember the non-whitelisted recipients so we can remove them if need be + recorder *memory; // memory for the content scanner + url_scanner *scanner; // object to handle body scanning + mlfiPriv(); + ~mlfiPriv(); + void reset(bool final = false); // for a new message + void get_fd(); + void return_fd(); + int my_read(char *buf, int len); + int my_write(char *buf, int len); +}; + +mlfiPriv::mlfiPriv() { + pthread_mutex_lock(&config_mutex); + pc = config; + pc->reference_count++; + pthread_mutex_unlock(&config_mutex); + get_fd(); + ip = 0; + mailaddr = NULL; + queueid = NULL; + authenticated = false; + have_whites = false; + only_whites = true; + memory = new recorder(this, &pc->html_tags, &pc->tlds); + scanner = new url_scanner(memory); +} + +mlfiPriv::~mlfiPriv() { + return_fd(); + pthread_mutex_lock(&config_mutex); + pc->reference_count--; + pthread_mutex_unlock(&config_mutex); + reset(true); +} + +void mlfiPriv::reset(bool final) { + if (mailaddr) free(mailaddr); + if (queueid) free(queueid); + discard(non_whites); + delete memory; + delete scanner; + if (!final) { + mailaddr = NULL; + queueid = NULL; + authenticated = false; + have_whites = false; + only_whites = true; + memory = new recorder(this, &pc->html_tags, &pc->tlds); + scanner = new url_scanner(memory); + } +} + +void mlfiPriv::get_fd() +{ + err = true; + fd = NULL_SOCKET; + int result = pthread_mutex_lock(&fd_pool_mutex); + if (!result) { + std::set::iterator i; + i = fd_pool.begin(); + if (i != fd_pool.end()) { + // have at least one fd in the pool + err = false; + fd = *i; + fd_pool.erase(fd); + resolver_pool_size--; + pthread_mutex_unlock(&fd_pool_mutex); + } + else { + // pool is empty, get a new fd + pthread_mutex_unlock(&fd_pool_mutex); + fd = my_connect(); + err = (fd == NULL_SOCKET); + } + } + else { + // cannot lock the pool, just get a new fd + fd = my_connect(); + err = (fd == NULL_SOCKET); + } +} + +void mlfiPriv::return_fd() +{ + if (err) { + // this fd got a socket error, so close it, rather than returning it to the pool + my_disconnect(fd); + } + else { + int result = pthread_mutex_lock(&fd_pool_mutex); + if (!result) { + if ((resolver_sock_count > resolver_pool_size*5) || (resolver_pool_size < 5)) { + // return the fd to the pool + fd_pool.insert(fd); + resolver_pool_size++; + pthread_mutex_unlock(&fd_pool_mutex); + } + else { + // more than 20% of the open resolver sockets are in the pool, and the + // pool as at least 5 sockets. that is enough, so just close this one. + pthread_mutex_unlock(&fd_pool_mutex); + my_disconnect(fd); + } + } + else { + // could not lock the pool, so just close the fd + my_disconnect(fd); + } + } +} + +int mlfiPriv::my_write(char *buf, int len) +{ + if (err) return 0; + int rs = 0; + while (len) { + int ws = write(fd, buf, len); + if (ws > 0) { + rs += ws; + len -= ws; + buf += ws; + } + else { + // peer closed the socket! + rs = 0; + err = true; + break; + } + } + return rs; +} + +int mlfiPriv::my_read(char *buf, int len) +{ + if (err) return 0; + int rs = 0; + while (len > 1) { + int ws = read(fd, buf, len); + if (ws > 0) { + rs += ws; + len -= ws; + buf += ws; + } + else { + // peer closed the socket! + rs = 0; + err = true; + break; + } + } + return rs; +} + +#define MLFIPRIV ((struct mlfiPriv *) smfi_getpriv(ctx)) + + +//////////////////////////////////////////////// +// syslog a message +// +static void my_syslog(mlfiPriv *priv, char *text) { + char buf[1000]; + if (priv) { + snprintf(buf, sizeof(buf), "%s: %s", priv->queueid, text); + text = buf; + } + pthread_mutex_lock(&syslog_mutex); + openlog("dnsbl", LOG_PID, LOG_MAIL); + syslog(LOG_NOTICE, "%s", text); + closelog(); + pthread_mutex_unlock(&syslog_mutex); +} + +static void my_syslog(char *text); +static void my_syslog(char *text) { + my_syslog(NULL, text); +} + +//////////////////////////////////////////////// +// register a global string +// +static char* register_string(char *name); +static char* register_string(char *name) { + return register_string(all_strings, name); +} + + +static char* next_token(char *delim); +static char* next_token(char *delim) { + char *name = strtok(NULL, delim); + if (!name) return name; + return register_string(name); +} + + +//////////////////////////////////////////////// +// lookup an email address in the env_from or env_to maps +// +static char* lookup1(char *email, string_map map); +static char* lookup1(char *email, string_map map) { + string_map::iterator i = map.find(email); + if (i != map.end()) return (*i).second; + char *x = strchr(email, '@'); + if (!x) return DEFAULT; + x++; + i = map.find(x); + if (i != map.end()) return (*i).second; + return DEFAULT; +} + + +//////////////////////////////////////////////// +// lookup an email address in the env_from or env_to maps +// this email address is passed in from sendmail, and will +// always be enclosed in <>. It may have mixed case, just +// as the mail client sent it. +// +static char* lookup(char* email, string_map map); +static char* lookup(char* email, string_map map) { + int n = strlen(email)-2; + if (n < 1) return DEFAULT; // malformed + char *key = strdup(email+1); + key[n] = '\0'; + for (int i=0; i 0) { + rs += ns; + if (question[rs-1] == '\0') { + // last byte read was the null terminator, we are done + break; + } + } + else { + // peer closed the socket + //my_syslog("!!child worker process, peer closed socket while reading question"); + shutdown(socket, SHUT_RDWR); + close(socket); + return; + } + } + + // find the answer +#ifdef NS_PACKETSZ + //char text[1000]; + //snprintf(text, sizeof(text), "!!child worker process has a question %s", question); + //my_syslog(text); + glom.length = res_search(question, ns_c_in, ns_t_a, glom.answer, sizeof(glom.answer)); + if (glom.length < 0) glom.length = 0; // represent all errors as zero length answers +#else + glom.length = sizeof(glom.answer); + glom.answer = 0; + struct hostent *host = gethostbyname(question); + if (host && (host->h_addrtype == AF_INET)) { + memcpy(&glom.answer, host->h_addr, sizeof(glom.answer)); + } +#endif + + // write the answer + char *buf = (char *)&glom; + int len = glom.length + sizeof(glom.length); + //snprintf(text, sizeof(text), "!!child worker process writing answer length %d for total %d", glom.length, len); + //my_syslog(text); + int ws = 0; + while (len > ws) { + int ns = write(socket, buf+ws, len-ws); + if (ns > 0) { + ws += ns; + } + else { + // peer closed the socket! + //my_syslog("!!child worker process, peer closed socket while writing answer"); + shutdown(socket, SHUT_RDWR); + close(socket); + return; + } + } + } +} + + +//////////////////////////////////////////////// +// ask a dns question and get an A record answer - we don't try +// very hard, just using the default resolver retry settings. +// If we cannot get an answer, we just accept the mail. +// +// +static int dns_interface(mlfiPriv &priv, char *question, bool maybe_ip, ns_map *nameservers); +static int dns_interface(mlfiPriv &priv, char *question, bool maybe_ip, ns_map *nameservers) { + // this part can be done without locking the resolver mutex. Each + // milter thread is talking over its own socket to a separate resolver + // process, which does the actual dns resolution. + if (priv.err) return 0; // cannot ask more questions on this socket. + priv.my_write(question, strlen(question)+1); // write the question including the null terminator + glommer glom; + char *buf = (char *)&glom; + priv.my_read(buf, sizeof(glom.length)); + buf += sizeof(glom.length); + ///char text[1000]; + ///snprintf(text, sizeof(text), "!!milter thread wrote question %s and has answer length %d", question, glom.length); + ///my_syslog(text); + if ((glom.length < 0) || (glom.length > sizeof(glom.answer))) { + priv.err = true; + return 0; // cannot process overlarge answers + } + priv.my_read(buf, glom.length); + +#ifdef NS_PACKETSZ + // now we need to lock the resolver mutex to keep the milter threads from + // stepping on each other while parsing the dns answer. + int ret_address = 0; + pthread_mutex_lock(&resolve_mutex); + if (glom.length > 0) { + // parse the answer + ns_msg handle; + ns_rr rr; + if (ns_initparse(glom.answer, glom.length, &handle) == 0) { + // look for ns names + if (nameservers) { + ns_map &ns = *nameservers; + int rrnum = 0; + while (ns_parserr(&handle, ns_s_ns, rrnum++, &rr) == 0) { + if (ns_rr_type(rr) == ns_t_ns) { + char nam[NS_MAXDNAME+1]; + char *n = nam; + const u_char *p = ns_rr_rdata(rr); + while (((n-nam) < NS_MAXDNAME) && ((p-glom.answer) < glom.length) && *p) { + size_t s = *(p++); + if (s > 191) { + // compression pointer + s = (s-192)*256 + *(p++); + if (s >= glom.length) break; // pointer outside bounds of answer + p = glom.answer + s; + s = *(p++); + } + if (s > 0) { + if ((n-nam) >= (NS_MAXDNAME-s)) break; // destination would overflow name buffer + if ((p-glom.answer) >= (glom.length-s)) break; // source outside bounds of answer + memcpy(n, p, s); + n += s; + p += s; + *(n++) = '.'; + } + } + if (n-nam) n--; // remove trailing . + *n = '\0'; // null terminate it + register_string(ns, nam, question); // ns host to lookup later + } + } + rrnum = 0; + while (ns_parserr(&handle, ns_s_ar, rrnum++, &rr) == 0) { + if (ns_rr_type(rr) == ns_t_a) { + char* nam = (char*)ns_rr_name(rr); + ns_mapper::iterator i = ns.ns_ip.find(nam); + if (i != ns.ns_ip.end()) { + // we want this ip address + int address; + memcpy(&address, ns_rr_rdata(rr), sizeof(address)); + ns.ns_ip[nam] = address; + } + } + } + } + int rrnum = 0; + while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { + if (ns_rr_type(rr) == ns_t_a) { + int address; + memcpy(&address, ns_rr_rdata(rr), sizeof(address)); + ret_address = address; + } + } + } + } + if (maybe_ip && !ret_address) { + // might be a bare ip address + in_addr ip; + if (inet_aton(question, &ip)) { + ret_address = ip.s_addr; + } + } + pthread_mutex_unlock(&resolve_mutex); + return ret_address; +#else + return glom.answer; +#endif +} + + +//////////////////////////////////////////////// +// check a single dnsbl +// +static status check_single(mlfiPriv &priv, int ip, char *suffix); +static status check_single(mlfiPriv &priv, int ip, char *suffix) { + // make a dns question + const u_char *src = (const u_char *)&ip; + if (src[0] == 127) return oksofar; // don't do dns lookups on localhost +#ifdef NS_MAXDNAME + char question[NS_MAXDNAME]; +#else + char question[1000]; +#endif + snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix); + // ask the question, if we get an A record it implies a blacklisted ip address + return (dns_interface(priv, question, false, NULL)) ? reject : oksofar; +} + + +//////////////////////////////////////////////// +// check a single dnsbl +// +static status check_single(mlfiPriv &priv, int ip, DNSBL &bl); +static status check_single(mlfiPriv &priv, int ip, DNSBL &bl) { + return check_single(priv, ip, bl.suffix); +} + + +//////////////////////////////////////////////// +// check the dnsbls specified for this recipient +// +static status check_dnsbl(mlfiPriv &priv, DNSBLLP dnsbllp, DNSBLP &rejectlist); +static status check_dnsbl(mlfiPriv &priv, DNSBLLP dnsbllp, DNSBLP &rejectlist) { + if (priv.authenticated) return oksofar; + if (!dnsbllp) return oksofar; + DNSBLL &dnsbll = *dnsbllp; + for (DNSBLL::iterator i=dnsbll.begin(); i!=dnsbll.end(); i++) { + DNSBLP dp = *i; // non null by construction + status st; + map::iterator f = priv.checked.find(dp); + if (f == priv.checked.end()) { + // have not checked this list yet + st = check_single(priv, priv.ip, *dp); + rejectlist = dp; + priv.checked[dp] = st; + } + else { + st = (*f).second; + rejectlist = (*f).first; + } + if (st == reject) return st; + } + return oksofar; +} + + +//////////////////////////////////////////////// +// check the hosts from the body against the content dnsbl +// +static status check_hosts(mlfiPriv &priv, char *&host, int &ip); +static status check_hosts(mlfiPriv &priv, char *&host, int &ip) { + CONFIG &dc = *priv.pc; + int count = 0; + ns_map nameservers; + bool ran = priv.pc->host_random; + int lim = priv.pc->host_limit; // we should not look at more than this many hosts + int cnt = priv.memory->hosts.size(); // number of hosts we could look at + int_set ips; // remove duplicate ip addresses + for (string_set::iterator i=priv.memory->hosts.begin(); i!=priv.memory->hosts.end(); i++) { + host = *i; // a reference into priv.memory->hosts, which will live until this smtp transaction is closed + + // don't bother looking up hosts on the ignore list + string_set::iterator j = priv.pc->content_host_ignore.find(host); + if (j != priv.pc->content_host_ignore.end()) continue; + + // try to only look at lim/cnt fraction of the available cnt host names in random mode + if ((cnt > lim) && (lim > 0) && ran) { + int r = rand() % cnt; + if (r >= lim) { + char buf[1000]; + snprintf(buf, sizeof(buf), "host %s skipped", host); + my_syslog(&priv, buf); + continue; + } + } + count++; + if ((count > lim) && (lim > 0) && (!ran)) { + discard(nameservers); + return reject_host; + } + ip = dns_interface(priv, host, true, &nameservers); + if (debug_syslog) { + char buf[1000]; + if (ip) { + char adr[sizeof "255.255.255.255"]; + adr[0] = '\0'; + inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); + snprintf(buf, sizeof(buf), "host %s found at %s", host, adr); + } + else { + snprintf(buf, sizeof(buf), "host %s not found", host); + } + my_syslog(&priv, buf); + } + if (ip) { + int_set::iterator i = ips.find(ip); + if (i == ips.end()) { + ips.insert(ip); + status st = check_single(priv, ip, dc.content_suffix); + if (st == reject) { + discard(nameservers); + return st; + } + } + } + } + lim *= 4; // allow average of 3 ns per host name + for (ns_mapper::iterator i=nameservers.ns_ip.begin(); i!=nameservers.ns_ip.end(); i++) { + count++; + if ((count > lim) && (lim > 0)) { + if (ran) continue; // don't complain + discard(nameservers); + return reject_host; + } + host = (*i).first; // a transient reference that needs to be replaced before we return it + ip = (*i).second; + if (!ip) ip = dns_interface(priv, host, false, NULL); + if (debug_syslog) { + char buf[200]; + if (ip) { + char adr[sizeof "255.255.255.255"]; + adr[0] = '\0'; + inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); + snprintf(buf, sizeof(buf), "ns %s found at %s", host, adr); + } + else { + snprintf(buf, sizeof(buf), "ns %s not found", host); + } + my_syslog(&priv, buf); + } + if (ip) { + int_set::iterator i = ips.find(ip); + if (i == ips.end()) { + ips.insert(ip); + status st = check_single(priv, ip, dc.content_suffix); + if (st == reject) { + string_map::iterator j = nameservers.ns_host.find(host); + if (j != nameservers.ns_host.end()) { + char *refer = (*j).second; + char buf[1000]; + snprintf(buf, sizeof(buf), "%s with nameserver %s", refer, host); + host = register_string(priv.memory->hosts, buf); // put a copy into priv.memory->hosts, and return that reference + } + else { + host = register_string(priv.memory->hosts, host); // put a copy into priv.memory->hosts, and return that reference + } + discard(nameservers); + return st; + } + } + } + } + discard(nameservers); + host = NULL; + int bin = priv.memory->binary_tags; + int bad = priv.memory->bad_html_tags; + lim = priv.pc->tag_limit; + if (3*bin > bad) return oksofar; // probably .zip or .tar.gz with random content + if ((bad > lim) && (lim > 0)) return reject_tag; + return oksofar; +} + + +//////////////////////////////////////////////// +// start of sendmail milter interfaces +// +sfsistat mlfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr) +{ + // allocate some private memory + mlfiPriv *priv = new mlfiPriv; + if (hostaddr->sa_family == AF_INET) { + priv->ip = ((struct sockaddr_in *)hostaddr)->sin_addr.s_addr; + } + + // save the private data + smfi_setpriv(ctx, (void*)priv); + + // continue processing + return SMFIS_CONTINUE; +} + +sfsistat mlfi_envfrom(SMFICTX *ctx, char **from) +{ + mlfiPriv &priv = *MLFIPRIV; + priv.mailaddr = strdup(from[0]); + priv.authenticated = (smfi_getsymval(ctx, "{auth_authen}") != NULL); + return SMFIS_CONTINUE; +} + +sfsistat mlfi_envrcpt(SMFICTX *ctx, char **rcpt) +{ + DNSBLP rejectlist = NULL; // list that caused the reject + status st = oksofar; + mlfiPriv &priv = *MLFIPRIV; + CONFIG &dc = *priv.pc; + if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i")); + char *rcptaddr = rcpt[0]; + char *dnsname = lookup(rcptaddr, dc.env_to_dnsbll); + char *fromname = lookup(rcptaddr, dc.env_to_chkfrom); + if ((strcmp(dnsname, BLACK) == 0) || + (strcmp(fromname, BLACK) == 0)) { + st = black; // two options to blacklist this recipient + } + else if (strcmp(fromname, WHITE) == 0) { + st = white; + } + else { + // check an env_from map + string_map *sm = find_from_map(dc, fromname); + if (sm != NULL) { + fromname = lookup(priv.mailaddr, *sm); // returns default if name not in map + if (strcmp(fromname, BLACK) == 0) { + st = black; // blacklist this envelope from value + } + if (strcmp(fromname, WHITE) == 0) { + st = white; // blacklist this envelope from value + } + } + } + if ((st == oksofar) && (strcmp(dnsname, WHITE) != 0)) { + // check dns lists + st = check_dnsbl(priv, find_dnsbll(dc, dnsname), rejectlist); + } + + if (st == reject) { + // reject the recipient based on some dnsbl + char adr[sizeof "255.255.255.255"]; + adr[0] = '\0'; + inet_ntop(AF_INET, (const u_char *)&priv.ip, adr, sizeof(adr)); + char buf[2000]; + snprintf(buf, sizeof(buf), rejectlist->message, adr, adr); + smfi_setreply(ctx, "550", "5.7.1", buf); + return SMFIS_REJECT; + } + else if (st == black) { + // reject the recipient based on blacklisting either from or to + smfi_setreply(ctx, "550", "5.7.1", "no such user"); + return SMFIS_REJECT; + } + else { + // accept the recipient + if (st == oksofar) { + // but remember the non-whites + register_string(priv.non_whites, rcptaddr); + priv.only_whites = false; + } + if (st == white) { + priv.have_whites = true; + } + return SMFIS_CONTINUE; + } +} + +sfsistat mlfi_body(SMFICTX *ctx, u_char *data, size_t len) +{ + mlfiPriv &priv = *MLFIPRIV; + if (priv.authenticated) return SMFIS_CONTINUE; + if (priv.only_whites) return SMFIS_CONTINUE; + if (!priv.pc->content_suffix) return SMFIS_CONTINUE; + priv.scanner->scan(data, len); + return SMFIS_CONTINUE; +} + +sfsistat mlfi_eom(SMFICTX *ctx) +{ + sfsistat rc; + mlfiPriv &priv = *MLFIPRIV; + char *host = NULL; + int ip; + status st; + // process end of message + if (priv.authenticated || + priv.only_whites || + (!priv.pc->content_suffix) || + ((st=check_hosts(priv, host, ip)) == oksofar)) rc = SMFIS_CONTINUE; + else { + if (!priv.have_whites) { + // can reject the entire message + char buf[2000]; + if (st == reject_tag) { + // rejected due to excessive bad html tags + snprintf(buf, sizeof(buf), priv.pc->tag_limit_message); + } + else if (st == reject_host) { + // rejected due to excessive unique host/urls + snprintf(buf, sizeof(buf), priv.pc->host_limit_message); + } + else { + char adr[sizeof "255.255.255.255"]; + adr[0] = '\0'; + inet_ntop(AF_INET, (const u_char *)&ip, adr, sizeof(adr)); + snprintf(buf, sizeof(buf), priv.pc->content_message, host, adr); + } + smfi_setreply(ctx, "550", "5.7.1", buf); + rc = SMFIS_REJECT; + } + else { + // need to accept it but remove the recipients that don't want it + for (string_set::iterator i=priv.non_whites.begin(); i!=priv.non_whites.end(); i++) { + char *rcpt = *i; + smfi_delrcpt(ctx, rcpt); + } + rc = SMFIS_CONTINUE; + } + } + // reset for a new message on the same connection + mlfi_abort(ctx); + return rc; +} + +sfsistat mlfi_abort(SMFICTX *ctx) +{ + mlfiPriv &priv = *MLFIPRIV; + priv.reset(); + return SMFIS_CONTINUE; +} + +sfsistat mlfi_close(SMFICTX *ctx) +{ + mlfiPriv *priv = MLFIPRIV; + if (!priv) return SMFIS_CONTINUE; + delete priv; + smfi_setpriv(ctx, NULL); + return SMFIS_CONTINUE; +} + +struct smfiDesc smfilter = +{ + "DNSBL", // filter name + SMFI_VERSION, // version code -- do not change + SMFIF_DELRCPT, // flags + mlfi_connect, // connection info filter + NULL, // SMTP HELO command filter + mlfi_envfrom, // envelope sender filter + mlfi_envrcpt, // envelope recipient filter + NULL, // header filter + NULL, // end of header + mlfi_body, // body block filter + mlfi_eom, // end of message + mlfi_abort, // message aborted + mlfi_close, // connection cleanup +}; + + +//////////////////////////////////////////////// +// reload the config +// +static CONFIG* new_conf(); +static CONFIG* new_conf() { + CONFIG *newc = new CONFIG; + pthread_mutex_lock(&config_mutex); + newc->generation = generation++; + pthread_mutex_unlock(&config_mutex); + char buf[200]; + snprintf(buf, sizeof(buf), "loading configuration generation %d", newc->generation); + my_syslog(buf); + if (load_conf(*newc, "dnsbl.conf") { + newc->load_time = time(NULL); + return newc; + } + delete newc; + return NULL; +} + + +//////////////////////////////////////////////// +// thread to watch the old config files for changes +// and reload when needed. we also cleanup old +// configs whose reference count has gone to zero. +// +static void* config_loader(void *arg); +static void* config_loader(void *arg) { + typedef set configp_set; + configp_set old_configs; + while (loader_run) { + sleep(180); // look for modifications every 3 minutes + if (!loader_run) break; + CONFIG &dc = *config; + time_t then = dc.load_time; + struct stat st; + bool reload = false; + for (string_list::iterator i=dc.config_files.begin(); i!=dc.config_files.end(); i++) { + char *fn = *i; + if (stat(fn, &st)) reload = true; // file disappeared + else if (st.st_mtime > then) reload = true; // file modified + if (reload) break; + } + if (reload) { + CONFIG *newc = new_conf(); + // replace the global config pointer + pthread_mutex_lock(&config_mutex); + CONFIG *old = config; + config = newc; + pthread_mutex_unlock(&config_mutex); + if (old) old_configs.insert(old); + } + // now look for old configs with zero ref counts + for (configp_set::iterator i=old_configs.begin(); i!=old_configs.end(); ) { + CONFIG *old = *i; + if (!old->reference_count) { + char buf[200]; + snprintf(buf, sizeof(buf), "freeing memory for old configuration generation %d", old->generation); + my_syslog(buf); + delete old; // destructor does all the work + old_configs.erase(i++); + } + else i++; + } + } + return NULL; +} + + +static void usage(char *prog); +static void usage(char *prog) +{ + fprintf(stderr, "Usage: %s [-d] [-c] -r port -p sm-sock-addr [-t timeout]\n", prog); + fprintf(stderr, "where port is for the connection to our own dns resolver processes\n"); + fprintf(stderr, " and should be local-domain-socket-file-name\n"); + fprintf(stderr, "where sm-sock-addr is for the connection to sendmail\n"); + fprintf(stderr, " and should be one of\n"); + fprintf(stderr, " inet:port@ip-address\n"); + fprintf(stderr, " local:local-domain-socket-file-name\n"); + fprintf(stderr, "-c will load and dump the config to stdout\n"); + fprintf(stderr, "-d will add some syslog debug messages\n"); +} + + + +static void setup_socket(char *sock); +static void setup_socket(char *sock) { + unlink(sock); + // sockaddr_un addr; + // memset(&addr, '\0', sizeof addr); + // addr.sun_family = AF_UNIX; + // strncpy(addr.sun_path, sock, sizeof(addr.sun_path)-1); + // int s = socket(AF_UNIX, SOCK_STREAM, 0); + // bind(s, (sockaddr*)&addr, sizeof(addr)); + // close(s); +} + + +/* + * The signal handler function -- only gets called when a SIGCHLD + * is received, ie when a child terminates + */ +void sig_chld(int signo) +{ + int status; + /* Wait for any child without blocking */ + while (waitpid(-1, &status, WNOHANG) > 0) { + // ignore child exit status, we only do this to cleanup zombies + } +} + + +int main(int argc, char**argv) +{ + token_init(); + bool check = false; + bool setconn = false; + bool setreso = false; + int c; + const char *args = "r:p:t:hcd"; + extern char *optarg; + + // Process command line options + while ((c = getopt(argc, argv, args)) != -1) { + switch (c) { + case 'r': + if (optarg == NULL || *optarg == '\0') { + fprintf(stderr, "Illegal resolver socket: %s\n", optarg); + exit(EX_USAGE); + } + resolver_port = strdup(optarg); + setup_socket(resolver_port); + setreso = true; + break; + + case 'p': + if (optarg == NULL || *optarg == '\0') { + fprintf(stderr, "Illegal sendmail socket: %s\n", optarg); + exit(EX_USAGE); + } + if (smfi_setconn(optarg) == MI_FAILURE) { + fprintf(stderr, "smfi_setconn failed\n"); + exit(EX_SOFTWARE); + } + if (strncasecmp(optarg, "unix:", 5) == 0) setup_socket(optarg + 5); + else if (strncasecmp(optarg, "local:", 6) == 0) setup_socket(optarg + 6); + setconn = true; + break; + + case 't': + if (optarg == NULL || *optarg == '\0') { + fprintf(stderr, "Illegal timeout: %s\n", optarg); + exit(EX_USAGE); + } + if (smfi_settimeout(atoi(optarg)) == MI_FAILURE) { + fprintf(stderr, "smfi_settimeout failed\n"); + exit(EX_SOFTWARE); + } + break; + + case 'c': + check = true; + break; + + case 'd': + debug_syslog = true; + break; + + case 'h': + default: + usage(argv[0]); + exit(EX_USAGE); + } + } + + if (check) { + CONFIG *conf = new_conf(); + if (conf) { + conf->dump(); + delete conf; + return 0; + } + else { + return 1; // config failed to load + } + } + + if (!setconn) { + fprintf(stderr, "%s: Missing required -p argument\n", argv[0]); + usage(argv[0]); + exit(EX_USAGE); + } + + if (!setreso) { + fprintf(stderr, "%s: Missing required -r argument\n", argv[0]); + usage(argv[0]); + exit(EX_USAGE); + } + + if (smfi_register(smfilter) == MI_FAILURE) { + fprintf(stderr, "smfi_register failed\n"); + exit(EX_UNAVAILABLE); + } + + // switch to background mode + if (daemon(1,0) < 0) { + fprintf(stderr, "daemon() call failed\n"); + exit(EX_UNAVAILABLE); + } + + // write the pid + const char *pidpath = "/var/run/dnsbl.pid"; + unlink(pidpath); + FILE *f = fopen(pidpath, "w"); + if (f) { +#ifdef linux + // from a comment in the DCC source code: + // Linux threads are broken. Signals given the + // original process are delivered to only the + // thread that happens to have that PID. The + // sendmail libmilter thread that needs to hear + // SIGINT and other signals does not, and that breaks + // scripts that need to stop milters. + // However, signaling the process group works. + fprintf(f, "-%d\n", (u_int)getpgrp()); +#else + fprintf(f, "%d\n", (u_int)getpid()); +#endif + fclose(f); + } + + // initialize the thread sync objects + pthread_mutex_init(&config_mutex, 0); + pthread_mutex_init(&syslog_mutex, 0); + pthread_mutex_init(&resolve_mutex, 0); + pthread_mutex_init(&fd_pool_mutex, 0); + + // drop root privs + struct passwd *pw = getpwnam("dnsbl"); + if (pw) { + if (setgid(pw->pw_gid) == -1) { + my_syslog("failed to switch to group dnsbl"); + } + if (setuid(pw->pw_uid) == -1) { + my_syslog("failed to switch to user dnsbl"); + } + } + + // fork off the resolver listener process + pid_t child = fork(); + if (child < 0) { + my_syslog("failed to create resolver listener process"); + exit(0); + } + if (child == 0) { + // we are the child - dns resolver listener process + resolver_socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (resolver_socket < 0) { + my_syslog("child failed to create resolver socket"); + exit(0); // failed + } + sockaddr_un server; + memset(&server, '\0', sizeof(server)); + server.sun_family = AF_UNIX; + strncpy(server.sun_path, resolver_port, sizeof(server.sun_path)-1); + //try to bind the address to the socket. + if (bind(resolver_socket, (sockaddr *)&server, sizeof(server)) < 0) { + // bind failed + shutdown(resolver_socket, SHUT_RDWR); + close(resolver_socket); + my_syslog("child failed to bind resolver socket"); + exit(0); // failed + } + //listen on the socket. + if (listen(resolver_socket, 10) < 0) { + // listen failed + shutdown(resolver_socket, SHUT_RDWR); + close(resolver_socket); + my_syslog("child failed to listen to resolver socket"); + exit(0); // failed + } + // setup sigchld handler to prevent zombies + struct sigaction act; + act.sa_handler = sig_chld; // Assign sig_chld as our SIGCHLD handler + sigemptyset(&act.sa_mask); // We don't want to block any other signals in this example + act.sa_flags = SA_NOCLDSTOP; // only want children that have terminated + if (sigaction(SIGCHLD, &act, NULL) < 0) { + my_syslog("child failed to setup SIGCHLD handler"); + exit(0); // failed + } + while (true) { + sockaddr_un client; + socklen_t clientlen = sizeof(client); + int s = accept(resolver_socket, (sockaddr *)&client, &clientlen); + if (s > 0) { + // accept worked, it did not get cancelled before we could accept it + // fork off a process to handle this connection + int newchild = fork(); + if (newchild == 0) { + // this is the worker process + // child does not need the listening socket + close(resolver_socket); + //my_syslog("child forked a worker process"); + process_resolver_requests(s); + //my_syslog("child terminated a worker process"); + exit(0); + } + else { + // this is the parent + // parent does not need the accepted socket + close(s); + } + } + } + exit(0); // make sure we don't fall thru. + } + else { + sleep(2); // allow child to get started + } + + // load the initial config + config = new_conf(); + + // only create threads after the fork() in daemon + pthread_t tid; + if (pthread_create(&tid, 0, config_loader, 0)) + my_syslog("failed to create config loader thread"); + if (pthread_detach(tid)) + my_syslog("failed to detach config loader thread"); + + time_t starting = time(NULL); + int rc = smfi_main(); + if ((rc != MI_SUCCESS) && (time(NULL) > starting+5*60)) { + my_syslog("trying to restart after smfi_main()"); + loader_run = false; // eventually the config loader thread will terminate + execvp(argv[0], argv); + } + exit((rc == MI_SUCCESS) ? 0 : EX_UNAVAILABLE); +} + diff -r c812a06f87bf -r dd21c8e13074 src/tokenizer.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/tokenizer.cpp Sat Jul 09 19:24:41 2005 -0700 @@ -0,0 +1,504 @@ +#include "context.h" + +static char* tokenizer_version="$Id:"; + +enum state {s_init, + s_token, + s_string, + s_ignore, // whitespace + s_eol, // ignore to eol + end_state, + + s_term, // token terminator + s_single, + s_string1, // first " of string + s_string2, // last " of string + s_slash, // possible start of ignore to eol + }; + +typedef state PARSE[end_state]; + +static PARSE parse_table[256] = { + // s_init s_token s_string s_ignore s_eol + { s_single, s_term, s_string, s_single, s_eol, }, // 0x00 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x01 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x02 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x03 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x04 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x05 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x06 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x07 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x08 + { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x09 + { s_ignore, s_term, s_string2, s_ignore, s_ignore, }, // 0x0a + { s_single, s_term, s_string, s_single, s_eol, }, // 0x0b + { s_single, s_term, s_string, s_single, s_eol, }, // 0x0c + { s_ignore, s_term, s_string2, s_ignore, s_eol, }, // 0x0d + { s_single, s_term, s_string, s_single, s_eol, }, // 0x0e + { s_single, s_term, s_string, s_single, s_eol, }, // 0x0f + { s_single, s_term, s_string, s_single, s_eol, }, // 0x10 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x11 xon char + { s_single, s_term, s_string, s_single, s_eol, }, // 0x12 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x13 xoff char + { s_single, s_term, s_string, s_single, s_eol, }, // 0x14 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x15 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x16 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x17 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x18 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x19 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1a + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1b + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1c + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1d + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1e + { s_single, s_term, s_string, s_single, s_eol, }, // 0x1f + { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x20 space + { s_single, s_term, s_string, s_single, s_eol, }, // 0x21 ! + { s_string1, s_term, s_string2, s_string1, s_eol, }, // 0x22 " + { s_eol, s_term, s_string, s_eol, s_eol, }, // 0x23 # + { s_single, s_term, s_string, s_single, s_eol, }, // 0x24 $ + { s_single, s_term, s_string, s_single, s_eol, }, // 0x25 % + { s_single, s_term, s_string, s_single, s_eol, }, // 0x26 & + { s_single, s_term, s_string, s_single, s_eol, }, // 0x27 ' + { s_single, s_term, s_string, s_single, s_eol, }, // 0x28 ( + { s_single, s_term, s_string, s_single, s_eol, }, // 0x29 ) + { s_single, s_term, s_string, s_single, s_eol, }, // 0x2A * + { s_single, s_term, s_string, s_single, s_eol, }, // 0x2B + + { s_single, s_term, s_string, s_single, s_eol, }, // 0x2C , + { s_single, s_token, s_string, s_single, s_eol, }, // 0x2D - + { s_single, s_token, s_string, s_single, s_eol, }, // 0x2E . + { s_slash, s_term, s_string, s_slash, s_eol, }, // 0x2F / + { s_token, s_token, s_string, s_token, s_eol, }, // 0x30 0 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x31 1 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x32 2 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x33 3 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x34 4 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x35 5 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x36 6 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x37 7 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x38 8 + { s_token, s_token, s_string, s_token, s_eol, }, // 0x39 9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3A : + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3B ; + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3C < + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3D = + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3E > + { s_single, s_term, s_string, s_single, s_eol, }, // 0x3F ? + { s_single, s_token, s_string, s_single, s_eol, }, // 0x40 @ + { s_token, s_token, s_string, s_token, s_eol, }, // 0x41 A + { s_token, s_token, s_string, s_token, s_eol, }, // 0x42 B + { s_token, s_token, s_string, s_token, s_eol, }, // 0x43 C + { s_token, s_token, s_string, s_token, s_eol, }, // 0x44 D + { s_token, s_token, s_string, s_token, s_eol, }, // 0x45 E + { s_token, s_token, s_string, s_token, s_eol, }, // 0x46 F + { s_token, s_token, s_string, s_token, s_eol, }, // 0x47 G + { s_token, s_token, s_string, s_token, s_eol, }, // 0x48 H + { s_token, s_token, s_string, s_token, s_eol, }, // 0x49 I + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4A J + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4B K + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4C L + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4D M + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4E N + { s_token, s_token, s_string, s_token, s_eol, }, // 0x4F O + { s_token, s_token, s_string, s_token, s_eol, }, // 0x50 P + { s_token, s_token, s_string, s_token, s_eol, }, // 0x51 Q + { s_token, s_token, s_string, s_token, s_eol, }, // 0x52 R + { s_token, s_token, s_string, s_token, s_eol, }, // 0x53 S + { s_token, s_token, s_string, s_token, s_eol, }, // 0x54 T + { s_token, s_token, s_string, s_token, s_eol, }, // 0x55 U + { s_token, s_token, s_string, s_token, s_eol, }, // 0x56 V + { s_token, s_token, s_string, s_token, s_eol, }, // 0x57 W + { s_token, s_token, s_string, s_token, s_eol, }, // 0x58 X + { s_token, s_token, s_string, s_token, s_eol, }, // 0x59 Y + { s_token, s_token, s_string, s_token, s_eol, }, // 0x5A Z + { s_single, s_term, s_string, s_single, s_eol, }, // 0x5B [ + { s_single, s_term, s_string, s_single, s_eol, }, // 0x5C backslash + { s_single, s_term, s_string, s_single, s_eol, }, // 0x5D ] + { s_single, s_term, s_string, s_single, s_eol, }, // 0x5E ^ + { s_single, s_token, s_string, s_single, s_eol, }, // 0x5F _ + { s_single, s_term, s_string, s_single, s_eol, }, // 0x60 ` + { s_token, s_token, s_string, s_token, s_eol, }, // 0x61 a + { s_token, s_token, s_string, s_token, s_eol, }, // 0x62 b + { s_token, s_token, s_string, s_token, s_eol, }, // 0x63 c + { s_token, s_token, s_string, s_token, s_eol, }, // 0x64 d + { s_token, s_token, s_string, s_token, s_eol, }, // 0x65 e + { s_token, s_token, s_string, s_token, s_eol, }, // 0x66 f + { s_token, s_token, s_string, s_token, s_eol, }, // 0x67 g + { s_token, s_token, s_string, s_token, s_eol, }, // 0x68 h + { s_token, s_token, s_string, s_token, s_eol, }, // 0x69 i + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6A j + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6B k + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6C l + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6D m + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6E n + { s_token, s_token, s_string, s_token, s_eol, }, // 0x6F o + { s_token, s_token, s_string, s_token, s_eol, }, // 0x70 p + { s_token, s_token, s_string, s_token, s_eol, }, // 0x71 q + { s_token, s_token, s_string, s_token, s_eol, }, // 0x72 r + { s_token, s_token, s_string, s_token, s_eol, }, // 0x73 s + { s_token, s_token, s_string, s_token, s_eol, }, // 0x74 t + { s_token, s_token, s_string, s_token, s_eol, }, // 0x75 u + { s_token, s_token, s_string, s_token, s_eol, }, // 0x76 v + { s_token, s_token, s_string, s_token, s_eol, }, // 0x77 w + { s_token, s_token, s_string, s_token, s_eol, }, // 0x78 x + { s_token, s_token, s_string, s_token, s_eol, }, // 0x79 y + { s_token, s_token, s_string, s_token, s_eol, }, // 0x7A z + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7B { + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7C | + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7D } + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7E ~ + { s_single, s_term, s_string, s_single, s_eol, }, // 0x7f + { s_single, s_term, s_string, s_single, s_eol, }, // 0x80 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x81 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x82 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x83 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x84 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x85 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x86 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x87 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x88 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x89 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8a + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8b + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8c + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8d + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8e + { s_single, s_term, s_string, s_single, s_eol, }, // 0x8f + { s_single, s_term, s_string, s_single, s_eol, }, // 0x90 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x91 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x92 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x93 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x94 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x95 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x96 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x97 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x98 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x99 + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9a + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9b + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9c + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9d + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9e + { s_single, s_term, s_string, s_single, s_eol, }, // 0x9f + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xa9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xaa + { s_single, s_term, s_string, s_single, s_eol, }, // 0xab + { s_single, s_term, s_string, s_single, s_eol, }, // 0xac + { s_single, s_term, s_string, s_single, s_eol, }, // 0xad + { s_single, s_term, s_string, s_single, s_eol, }, // 0xae + { s_single, s_term, s_string, s_single, s_eol, }, // 0xaf + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xb9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xba + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbc + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbd + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbe + { s_single, s_term, s_string, s_single, s_eol, }, // 0xbf + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xc9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xca + { s_single, s_term, s_string, s_single, s_eol, }, // 0xcb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xcc + { s_single, s_term, s_string, s_single, s_eol, }, // 0xcd + { s_single, s_term, s_string, s_single, s_eol, }, // 0xce + { s_single, s_term, s_string, s_single, s_eol, }, // 0xcf + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xd9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xda + { s_single, s_term, s_string, s_single, s_eol, }, // 0xdb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xdc + { s_single, s_term, s_string, s_single, s_eol, }, // 0xdd + { s_single, s_term, s_string, s_single, s_eol, }, // 0xde + { s_single, s_term, s_string, s_single, s_eol, }, // 0xdf + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xe9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xea + { s_single, s_term, s_string, s_single, s_eol, }, // 0xeb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xec + { s_single, s_term, s_string, s_single, s_eol, }, // 0xed + { s_single, s_term, s_string, s_single, s_eol, }, // 0xee + { s_single, s_term, s_string, s_single, s_eol, }, // 0xef + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf0 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf1 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf2 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf3 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf4 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf5 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf6 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf7 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf8 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xf9 + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfa + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfb + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfc + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfd + { s_single, s_term, s_string, s_single, s_eol, }, // 0xfe + { s_single, s_term, s_string, s_single, s_eol, }, // 0xff +}; + + +TOKEN::TOKEN(char *fn, string_set *includes) { + pushed = false; + include_files = includes; + include(fn); +} + + +TOKEN::~TOKEN() { + while (!streams.empty()) pop(); +} + + +void TOKEN::pop() { + ifstream *is = streams.front(); + char *fn = filenames.front(); + streams.pop_front(); + filenames.pop_front(); + filenamess.erase(fn); + linenumbers.pop_front(); + is->close(); + delete is; +} + + +void TOKEN::push_char(u_char c) { + pushed = true; + pushed_char = c; +} + + +bool TOKEN::next_char(u_char &uc) { + if (pushed) { + uc = pushed_char; + pushed = false; + return true; + } + while (!streams.empty() && streams.front()->eof()) { + pop(); + } + if (streams.empty()) return false; + ifstream *is = streams.front(); + uc = (u_char)is->get(); + if (is->eof()) return next_char(uc); + if (uc == (u_char)'\n') { + int &line = linenumbers.front(); + line++; + } + return true; +} + + +bool TOKEN::include(char *fn) { + string_set::iterator i = filenamess.find(fn); + if (i != filenamess.end()) { + my_syslog("redundant or recursive include file detected"); + return false; + } + ifstream *is = new ifstream; + is->open(fn); + if (is->fail()) { + char buf[1000]; + snprintf(buf, sizeof(buf), "include file %s not found", fn); + token_error(buf); + return false; + } + string_set &inc = *include_files; + inc.insert(fn); + streams.push_front(is); + filenames.push_front(fn); + filenamess.insert(fn); + linenumbers.push_front(1); + return true; +} + + +char *TOKEN::next() { + if (!pending_tokens.empty()) { + char *t = pending_tokens.front(); + pending_tokens.pop_front(); + return t; + } + if (streams.empty()) return NULL; + const int PENDING_LIMIT = 1000; + static u_char buffer[PENDING_LIMIT]; + int count = 0; + state st = s_init; + while (true) { + if (count == (PENDING_LIMIT-1)) { + token_error("token too long"); + break; + } + if (st >= end_state) { + token_error("finite state machine error"); + break; + } + u_char c; + if (!next_char(c)) break; + st = parse_table[c][st]; + switch (st) { + case s_string: + case s_token: { + buffer[count++] = c; + } break; + + case s_term: { + push_char(c); + st = s_init; + } break; + + case s_string1: { + st = s_string; + } break; + + case s_string2: { + st = s_init; + } break; + + case s_single: { + buffer[count++] = c; + st = s_init; + } break; + + case s_ignore: + case s_eol: { + } break; + + + case s_slash: { + buffer[count++] = c; + if (next_char(c)) { + if (c == (u_char)'/') { + // start of ignore to eol on // + count--; + st = s_eol; + } + else { + // not a // token, just return this single / + push_char(c); + st = s_init; + } + } + else { + // cannot get another char + st = s_init; + } + } break; + + default: { + token_error(); + token_error("unknown state %d %s \n", st, " "); + } break; + } + if (st == s_init) break; + } + + buffer[count] = '\0'; + if (count == 0) return NULL; + char *t = register_string((char*)buffer); + if (t == token_include) { + char *f = next(); // should be file name + char *s = next(); // should be semicolon + if (s == token_semi) { + include(f); + return next(); + } + else { + push(s); + push(f); + return t; + } + } + return t; +} + + +int TOKEN::nextint() { + char *t = next(); + char *e; + long i = strtol(t, &e, 10); + if (*e != '\0') { + token_error("integer", t); + return 0; + } + return (int)i; +} + + +void TOKEN::skipeol() { + while (true) { + u_char c; + if (!next_char(c)) break; + if (c == (u_char)'\n') break; + } +} + + +void TOKEN::token_error(const char *err) { + token_error(); + printf("%s \n", err); +} + + +void TOKEN::token_error(const char *fmt, int d, const char *s) { + printf(fmt, d, s); +} + + +void TOKEN::token_error(const char *fmt, const char *t, const char *h) { + if (!h) h = "null"; + printf(fmt, t, h); +} + + +void TOKEN::token_error(const char *token, const char *have) { + token_error(); + token_error("expecting %s, found %s \n", token, have); +} + + +void TOKEN::token_error() { + token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn()); +} + diff -r c812a06f87bf -r dd21c8e13074 src/tokenizer.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/tokenizer.h Sat Jul 09 19:24:41 2005 -0700 @@ -0,0 +1,49 @@ +#include +#include +#include + + +using namespace std; + +struct ltstr { + bool operator()(char* s1, char* s2) const { + return strcmp(s1, s2) < 0; + } +}; + +typedef list stream_list; +typedef list string_list; +typedef set string_set; +typedef list line_list; + +class TOKEN { + stream_list streams; + string_list filenames; + string_set filenamess; + line_list linenumbers; + string_list pending_tokens; + string_set *include_files; + bool pushed; + u_char pushed_char; + + void pop(); + bool next_char(u_char &c); + void push_char(u_char c); + +public: + TOKEN(char *fn, string_set *includes); + ~TOKEN(); + bool include(char *fn); + char *next(); // return next token + int nextint(); + void skipeol(); // skip to eol + void push(char *token) {pending_tokens.push_front(token);}; + char *cur_fn() {return filenames.front();}; + int cur_line() {return linenumbers.front();}; + void token_error(const char *err); + void token_error(const char *fmt, int d, const char *s); + void token_error(const char *fmt, const char *t, const char *h); + void token_error(const char *token, const char *have); + void token_error(); +}; + diff -r c812a06f87bf -r dd21c8e13074 tld.conf --- a/tld.conf Sat Apr 02 12:21:06 2005 -0800 +++ b/tld.conf Sat Jul 09 19:24:41 2005 -0700 @@ -1,267 +1,267 @@ # # icann tlds from http://www.icann.org/registries/listing.html # -tld areo -tld biz -tld com -tld coop -tld edu -tld gov -tld info -tld int -tld mil -tld museum -tld name -tld net -tld org -tld pro +aero +biz +com +coop +edu +gov +info +int +mil +museum +name +net +org +pro # # icann tlds from http://www.iana.org/cctld/cctld-whois.htm 2004-06-04 # -tld ac # Ascension Island -tld ad # Andorra -tld ae # United Arab Emirates -tld af # Afghanistan -tld ag # Antigua and Barbuda -tld ai # Anguilla -tld al # Albania -tld am # Armenia -tld an # Netherlands Antilles -tld ao # Angola -tld aq # Antarctica -tld ar # Argentina -tld as # American Samoa -tld at # Austria -tld au # Australia -tld aw # Aruba -tld ax # Aland Islands -tld az # Azerbaijan -tld ba # Bosnia and Herzegovina -tld bb # Barbados -tld bd # Bangladesh -tld be # Belgium -tld bf # Burkina Faso -tld bg # Bulgaria -tld bh # Bahrain -tld bi # Burundi -tld bj # Benin -tld bm # Bermuda -tld bn # Brunei Darussalam -tld bo # Bolivia -tld br # Brazil -tld bs # Bahamas -tld bt # Bhutan -tld bv # Bouvet Island -tld bw # Botswana -tld by # Belarus -tld bz # Belize -tld ca # Canada -tld cc # Cocos (Keeling) Islands -tld cd # Congo, The Democratic Republic of the -tld cf # Central African Republic -tld cg # Congo, Republic of -tld ch # Switzerland -tld ci # Cote d'Ivoire -tld ck # Cook Islands -tld cl # Chile -tld cm # Cameroon -tld cn # China -tld co # Colombia -tld cr # Costa Rica -tld cs # Serbia and Montenegro -tld cu # Cuba -tld cv # Cape Verde -tld cx # Christmas Island -tld cy # Cyprus -tld cz # Czech Republic -tld de # Germany -tld dj # Djibouti -tld dk # Denmark -tld dm # Dominica -tld do # Dominican Republic -tld dz # Algeria -tld ec # Ecuador -tld ee # Estonia -tld eg # Egypt -tld eh # Western Sahara -tld er # Eritrea -tld es # Spain -tld et # Ethiopia -tld fi # Finland -tld fj # Fiji -tld fk # Falkland Islands (Malvinas) -tld fm # Micronesia, Federal State of -tld fo # Faroe Islands -tld fr # France -tld ga # Gabon -tld gb # United Kingdom -tld gd # Grenada -tld ge # Georgia -tld gf # French Guiana -tld gg # Guernsey -tld gh # Ghana -tld gi # Gibraltar -tld gl # Greenland -tld gm # Gambia -tld gn # Guinea -tld gp # Guadeloupe -tld gq # Equatorial Guinea -tld gr # Greece -tld gs # South Georgia and the South Sandwich Islands -tld gt # Guatemala -tld gu # Guam -tld gw # Guinea-Bissau -tld gy # Guyana -tld hk # Hong Kong -tld hm # Heard and McDonald Islands -tld hn # Honduras -tld hr # Croatia/Hrvatska -tld ht # Haiti -tld hu # Hungary -tld id # Indonesia -tld ie # Ireland -tld il # Israel -tld im # Isle of Man -tld in # India -tld io # British Indian Ocean Territory -tld iq # Iraq -tld ir # Iran, Islamic Republic of -tld is # Iceland -tld it # Italy -tld je # Jersey -tld jm # Jamaica -tld jo # Jordan -tld jp # Japan -tld ke # Kenya -tld kg # Kyrgyzstan -tld kh # Cambodia -tld ki # Kiribati -tld km # Comoros -tld kn # Saint Kitts and Nevis -tld kp # Korea, Democratic People's Republic -tld kr # Korea, Republic of -tld kw # Kuwait -tld ky # Cayman Islands -tld kz # Kazakhstan -tld la # Lao People's Democratic Republic -tld lb # Lebanon -tld lc # Saint Lucia -tld li # Liechtenstein -tld lk # Sri Lanka -tld lr # Liberia -tld ls # Lesotho -tld lt # Lithuania -tld lu # Luxembourg -tld lv # Latvia -tld ly # Libyan Arab Jamahiriya -tld ma # Morocco -tld mc # Monaco -tld md # Moldova, Republic of -tld mg # Madagascar -tld mh # Marshall Islands -tld mk # Macedonia, The Former Yugoslav Republic of -tld ml # Mali -tld mm # Myanmar -tld mn # Mongolia -tld mo # Macau -tld mp # Northern Mariana Islands -tld mq # Martinique -tld mr # Mauritania -tld ms # Montserrat -tld mt # Malta -tld mu # Mauritius -tld mv # Maldives -tld mw # Malawi -tld mx # Mexico -tld my # Malaysia -tld mz # Mozambique -tld na # Namibia -tld nc # New Caledonia -tld ne # Niger -tld nf # Norfolk Island -tld ng # Nigeria -tld ni # Nicaragua -tld nl # Netherlands -tld no # Norway -tld np # Nepal -tld nr # Nauru -tld nu # Niue -tld nz # New Zealand -tld om # Oman -tld pa # Panama -tld pe # Peru -tld pf # French Polynesia -tld pg # Papua New Guinea -tld ph # Philippines -tld pk # Pakistan -tld pl # Poland -tld pm # Saint Pierre and Miquelon -tld pn # Pitcairn Island -tld pr # Puerto Rico -tld ps # Palestinian Territories -tld pt # Portugal -tld pw # Palau -tld py # Paraguay -tld qa # Qatar -tld re # Reunion Island -tld ro # Romania -tld ru # Russian Federation -tld rw # Rwanda -tld sa # Saudi Arabia -tld sb # Solomon Islands -tld sc # Seychelles -tld sd # Sudan -tld se # Sweden -tld sg # Singapore -tld sh # Saint Helena -tld si # Slovenia -tld sj # Svalbard and Jan Mayen Islands -tld sk # Slovak Republic -tld sl # Sierra Leone -tld sm # San Marino -tld sn # Senegal -tld so # Somalia -tld sr # Suriname -tld st # Sao Tome and Principe -tld sv # El Salvador -tld sy # Syrian Arab Republic -tld sz # Swaziland -tld tc # Turks and Caicos Islands -tld td # Chad -tld tf # French Southern Territories -tld tg # Togo -tld th # Thailand -tld tj # Tajikistan -tld tk # Tokelau -tld tl # Timor-Leste -tld tm # Turkmenistan -tld tn # Tunisia -tld to # Tonga -tld tp # East Timor -tld tr # Turkey -tld tt # Trinidad and Tobago -tld tv # Tuvalu -tld tw # Taiwan -tld tz # Tanzania -tld ua # Ukraine -tld ug # Uganda -tld uk # United Kingdom -tld um # United States Minor Outlying Islands -tld us # United States -tld uy # Uruguay -tld uz # Uzbekistan -tld va # Holy See (Vatican City State) -tld vc # Saint Vincent and the Grenadines -tld ve # Venezuela -tld vg # Virgin Islands, British -tld vi # Virgin Islands, U.S. -tld vn # Vietnam -tld vu # Vanuatu -tld wf # Wallis and Futuna Islands -tld ws # Western Samoa -tld ye # Yemen -tld yt # Mayotte -tld yu # Yugoslavia -tld za # South Africa -tld zm # Zambia -tld zw # Zimbabwe +ac # Ascension Island +ad # Andorra +ae # United Arab Emirates +af # Afghanistan +ag # Antigua and Barbuda +ai # Anguilla +al # Albania +am # Armenia +an # Netherlands Antilles +ao # Angola +aq # Antarctica +ar # Argentina +as # American Samoa +at # Austria +au # Australia +aw # Aruba +ax # Aland Islands +az # Azerbaijan +ba # Bosnia and Herzegovina +bb # Barbados +bd # Bangladesh +be # Belgium +bf # Burkina Faso +bg # Bulgaria +bh # Bahrain +bi # Burundi +bj # Benin +bm # Bermuda +bn # Brunei Darussalam +bo # Bolivia +br # Brazil +bs # Bahamas +bt # Bhutan +bv # Bouvet Island +bw # Botswana +by # Belarus +bz # Belize +ca # Canada +cc # Cocos (Keeling) Islands +cd # Congo, The Democratic Republic of the +cf # Central African Republic +cg # Congo, Republic of +ch # Switzerland +ci # Cote d'Ivoire +ck # Cook Islands +cl # Chile +cm # Cameroon +cn # China +co # Colombia +cr # Costa Rica +cs # Serbia and Montenegro +cu # Cuba +cv # Cape Verde +cx # Christmas Island +cy # Cyprus +cz # Czech Republic +de # Germany +dj # Djibouti +dk # Denmark +dm # Dominica +do # Dominican Republic +dz # Algeria +ec # Ecuador +ee # Estonia +eg # Egypt +eh # Western Sahara +er # Eritrea +es # Spain +et # Ethiopia +fi # Finland +fj # Fiji +fk # Falkland Islands (Malvinas) +fm # Micronesia, Federal State of +fo # Faroe Islands +fr # France +ga # Gabon +gb # United Kingdom +gd # Grenada +ge # Georgia +gf # French Guiana +gg # Guernsey +gh # Ghana +gi # Gibraltar +gl # Greenland +gm # Gambia +gn # Guinea +gp # Guadeloupe +gq # Equatorial Guinea +gr # Greece +gs # South Georgia and the South Sandwich Islands +gt # Guatemala +gu # Guam +gw # Guinea-Bissau +gy # Guyana +hk # Hong Kong +hm # Heard and McDonald Islands +hn # Honduras +hr # Croatia/Hrvatska +ht # Haiti +hu # Hungary +id # Indonesia +ie # Ireland +il # Israel +im # Isle of Man +in # India +io # British Indian Ocean Territory +iq # Iraq +ir # Iran, Islamic Republic of +is # Iceland +it # Italy +je # Jersey +jm # Jamaica +jo # Jordan +jp # Japan +ke # Kenya +kg # Kyrgyzstan +kh # Cambodia +ki # Kiribati +km # Comoros +kn # Saint Kitts and Nevis +kp # Korea, Democratic People's Republic +kr # Korea, Republic of +kw # Kuwait +ky # Cayman Islands +kz # Kazakhstan +la # Lao People's Democratic Republic +lb # Lebanon +lc # Saint Lucia +li # Liechtenstein +lk # Sri Lanka +lr # Liberia +ls # Lesotho +lt # Lithuania +lu # Luxembourg +lv # Latvia +ly # Libyan Arab Jamahiriya +ma # Morocco +mc # Monaco +md # Moldova, Republic of +mg # Madagascar +mh # Marshall Islands +mk # Macedonia, The Former Yugoslav Republic of +ml # Mali +mm # Myanmar +mn # Mongolia +mo # Macau +mp # Northern Mariana Islands +mq # Martinique +mr # Mauritania +ms # Montserrat +mt # Malta +mu # Mauritius +mv # Maldives +mw # Malawi +mx # Mexico +my # Malaysia +mz # Mozambique +na # Namibia +nc # New Caledonia +ne # Niger +nf # Norfolk Island +ng # Nigeria +ni # Nicaragua +nl # Netherlands +no # Norway +np # Nepal +nr # Nauru +nu # Niue +nz # New Zealand +om # Oman +pa # Panama +pe # Peru +pf # French Polynesia +pg # Papua New Guinea +ph # Philippines +pk # Pakistan +pl # Poland +pm # Saint Pierre and Miquelon +pn # Pitcairn Island +pr # Puerto Rico +ps # Palestinian Territories +pt # Portugal +pw # Palau +py # Paraguay +qa # Qatar +re # Reunion Island +ro # Romania +ru # Russian Federation +rw # Rwanda +sa # Saudi Arabia +sb # Solomon Islands +sc # Seychelles +sd # Sudan +se # Sweden +sg # Singapore +sh # Saint Helena +si # Slovenia +sj # Svalbard and Jan Mayen Islands +sk # Slovak Republic +sl # Sierra Leone +sm # San Marino +sn # Senegal +so # Somalia +sr # Suriname +st # Sao Tome and Principe +sv # El Salvador +sy # Syrian Arab Republic +sz # Swaziland +tc # Turks and Caicos Islands +td # Chad +tf # French Southern Territories +tg # Togo +th # Thailand +tj # Tajikistan +tk # Tokelau +tl # Timor-Leste +tm # Turkmenistan +tn # Tunisia +to # Tonga +tp # East Timor +tr # Turkey +tt # Trinidad and Tobago +tv # Tuvalu +tw # Taiwan +tz # Tanzania +ua # Ukraine +ug # Uganda +uk # United Kingdom +um # United States Minor Outlying Islands +us # United States +uy # Uruguay +uz # Uzbekistan +va # Holy See (Vatican City State) +vc # Saint Vincent and the Grenadines +ve # Venezuela +vg # Virgin Islands, British +vi # Virgin Islands, U.S. +vn # Vietnam +vu # Vanuatu +wf # Wallis and Futuna Islands +ws # Western Samoa +ye # Yemen +yt # Mayotte +yu # Yugoslavia +za # South Africa +zm # Zambia +zw # Zimbabwe