Mercurial > dnsbl
changeset 41:d95af8129dfa
updates for 3.2, changing file layout, add queueid to messages
author | carl |
---|---|
date | Mon, 05 Jul 2004 10:52:02 -0700 |
parents | dc3d8d1aa2d2 |
children | afcf403709ef |
files | ChangeLog dnsbl.conf dnsbl.rc dnsbl.spec.in install.bash package.bash src/dnsbl.cpp src/package src/scanner.cpp |
diffstat | 9 files changed, 271 insertions(+), 77 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ChangeLog Mon Jul 05 10:52:02 2004 -0700 @@ -0,0 +1,17 @@ + $Id$ + +3.2 2004-07-05 + + Add .spec file for building rpms, contributed by John Gunkel + <jgunkel@palliser.ca> + + Changes to file layout suggested by John to conform to RedHat/LSB + standards. + + Change parser to handle &#xnnn; obfuscated urls with charaters + specified in hex. + + Make bad_html tags more sensitive to binary tags, to reduce false + positives in .zip or .tar.gz file attachments. + + Add sendmail queueid to the dnsbl syslog messages.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dnsbl.conf Mon Jul 05 10:52:02 2004 -0700 @@ -0,0 +1,35 @@ +############################################## +# content scanning parameters +# +content sbl-xbl.spamhaus.org 'Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s' +host_limit 20 'Mail containing too many host names rejected' +html_limit 20 'Mail containing excessive bad html tags rejected' +include html-tags.conf +include tld.conf + + +############################################## +# define the dnsbls to use +# +dnsbl LOCAL blackholes.five-ten-sg.com 'Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s' +#dnsbl SPEWS blackholes.spews.org 'Mail from %s rejected - spews; see http://www.spews.org/ask.cgi?x=%s' +dnsbl SBL sbl-xbl.spamhaus.org 'Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s' + + +############################################## +# define the (default and other) lists of dnsbls to use +# +dnsbl_list DEFAULT LOCAL SBL + + +############################################## +# define the (default and other) env_from maps +# + +############################################## +# specify dnsbl_lists and env_from maps to use for specific recipients +# + +############################################## +# specify dnsbl_lists and env_from maps to use for clients domains +#
--- a/dnsbl.rc Mon Jun 07 21:07:40 2004 -0700 +++ b/dnsbl.rc Mon Jul 05 10:52:02 2004 -0700 @@ -20,12 +20,26 @@ start) # Start daemons. echo -n "Starting dnsbl-milter: " - cd /var/dnsbl # conf file is here - ./dnsbl -d -p local:/var/run/dnsbl.sock + if [ ! -f /var/lock/subsys/dnsbl ]; then + cd /etc/dnsbl # conf file is here + su -l dnsbl -s /bin/sh -c "/usr/sbin/dnsbl -d -p local:/var/run/dnsbl/dnsbl.sock " RETVAL=$? - sleep 3 + pid=`pidof -s /usr/sbin/dnsbl` + if [ $pid ] + then + success "Starting dnsbl milter:" + touch /var/lock/subsys/dnsbl echo - [ $RETVAL -eq 0 ] && touch /var/lock/subsys/dnsbl + else + failure "Starting dnsbl milter:" + echo + fi + else + echo -n "already running! " + failure "dnsbl milter already running!" + echo + fi + ;; stop) # Stop daemons. @@ -49,3 +63,5 @@ exit 1 esac exit $RETVAL + +# $Id$
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dnsbl.spec.in Mon Jul 05 10:52:02 2004 -0700 @@ -0,0 +1,95 @@ +Summary: DNSBL Sendmail Milter +Name: dnsbl +Version: 3.1 +Release: 1 +Copyright: GPL +Group: System Environment/Daemons +Source: http://www.five-ten-sg.com/util/dnsbl.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version} + +BuildRequires: sendmail-devel >= 8.12.1 +Requires(pre): /usr/sbin/groupadd +Requires(pre): /usr/sbin/useradd +Requires(pre): /usr/bin/getent +Requires(postun): /usr/sbin/userdel +Requires(postun): /usr/sbin/groupdel +Requires(post,preun): /sbin/chkconfig +Requires: sendmail + + +%description +We present here a mechanism whereby the backup mail server can use the correct set of DNSBLs for each recipient for each message. As a side-effect, it gives us the ability to customize the set of DNSBLs on a per-recipient basis, so that fred@example.com could use SPEWS and the SBL, where all other users @example.com use only the SBL. + +This milter will also decode (base64, mime, html entity, url encodings) and scan for HTTP and HTTPS URLs and bare hostnames in the body of the mail. If any of those host names have A or NS records on the SBL (or a single configurable DNSBL), the mail will be rejected unless previously whitelisted. This milter also counts the number of invalid HTML tags, and can reject mail if that count exceeds your specified limit. + +The DNSBL milter reads a text configuration file (dnsbl.conf) on startup, and whenever the config file (or any of the referenced include files) is changed. The entire configuration file is case insensitive. + +%prep +# %setup -q -n Zope-%{zope_version} +# %patch0 -p1 +# %patch1 -p1 + +%setup + + +%build +pwd +g++ -c $CXXFLAGS -pthread dnsbl.cpp +g++ -o dnsbl dnsbl.o /usr/lib/libresolv.a -lmilter -pthread + +%install +# hoisted from install.bash +#DST=/var/dnsbl +DST=%{buildroot} +if [ "%{buildroot}" = "/" -o -z "%{buildroot}" ] ; then + echo sorry, you probably do not want me to delete the old buildroot at %{buildroot} + exit 1 +fi + +rm -rf %{buildroot} +mkdir -p %{buildroot}/etc/dnsbl + +install -m 644 dnsbl.conf %{buildroot}/etc/dnsbl/dnsbl.conf +install -m 644 html-tags.conf %{buildroot}/etc/dnsbl/html-tags.conf +install -m 644 tld.conf %{buildroot}/etc/dnsbl/tld.conf + +mkdir -p %{buildroot}/usr/sbin +install -m 755 dnsbl %{buildroot}/usr/sbin/dnsbl + +mkdir -p %{buildroot}/etc/init.d +install -m 755 dnsbl.rc %{buildroot}/etc/init.d/dnsbl + +mkdir -p %{buildroot}/var/run/dnsbl + +%pre +/usr/bin/getent passwd dnsbl || + useradd -r -d /etc/dnsbl -M -c "dnsbl pseudo-user" -s /sbin/nologin dnsbl +%post +/sbin/chkconfig --add dnsbl +/sbin/chkconfig dnsbl on +/sbin/service dnsbl start + +# [JOG] TODO: spew out a message indicating what should be added to sendmail.mc + +%preun +if [ $1 -eq 0 ]; then + /sbin/service dnsbl stop || : + /sbin/chkconfig --del dnsbl + userdel dnsbl || : +fi + +%postun + +%clean + +%files +%defattr(-,root,root) +%config /etc/dnsbl/ +/etc/init.d/dnsbl +/usr/sbin/dnsbl +%dir %attr(0750,dnsbl,root) /var/run/dnsbl + +%changelog +Revision 1.1 2004/06/30 10:08:48 jgunkel@palliser.ca +Initial revision of spec file. Need to add a better description, docs and a sendmail.mc message +
--- a/install.bash Mon Jun 07 21:07:40 2004 -0700 +++ b/install.bash Mon Jul 05 10:52:02 2004 -0700 @@ -24,9 +24,17 @@ ##################### +# ensure the user is created +/usr/bin/getent passwd dnsbl || useradd -r -d /etc/dnsbl -M -c "dnsbl pseudo-user" -s /sbin/nologin dnsbl # install the milter -DST=/var/dnsbl +DST=/etc/dnsbl mkdir -p $DST +if [ -f /var/dnsbl/dnsbl.conf ]; then + # move the conf files to the new location + mv /var/dnsbl/*conf $DST + rm /var/dnsbl/dnsbl # remove the old binary + rmdir /var/dnsbl +fi if [ ! -f $DST/dnsbl.conf ]; then cp dnsbl.conf $DST fi @@ -36,7 +44,7 @@ if [ ! -f $DST/tld.conf ]; then cp tld.conf $DST fi -mv -f dnsbl $DST +mv -f dnsbl /usr/sbin/dnsbl cp dnsbl.rc /etc/rc.d/init.d/dnsbl chmod 755 /etc/rc.d/init.d/dnsbl /sbin/chkconfig --add dnsbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/package.bash Mon Jul 05 10:52:02 2004 -0700 @@ -0,0 +1,27 @@ +#!/bin/bash + +VER=dnsbl-3.2 +mkdir $VER + target1=/home/httpd/html/510sg/util/dnsbl.tar.gz + target2=/home/httpd/html/510sg/dnsbl.conf + target3=/home/httpd/html/510sg/dnsbl.html + + cp sample1.conf $VER/dnsbl.conf + cp sample.conf $VER + cp html-tags.conf $VER + cp tld.conf $VER + cp *cpp $VER + cp *rc $VER + cp install.bash $VER + cp *spec $VER + cp LICENSE $VER + cp RELEASE_NOTES $VER + tar cfvz $target1 $VER + + cp sample.conf $target2 + cp dnsbl.html $target3 + + scp $target1 ns1:$target1 + scp $target2 ns1:$target2 + scp $target3 ns1:$target3 +rm -rf $VER
--- a/src/dnsbl.cpp Mon Jun 07 21:07:40 2004 -0700 +++ b/src/dnsbl.cpp Mon Jul 05 10:52:02 2004 -0700 @@ -179,6 +179,8 @@ static pthread_mutex_t syslog_mutex; static pthread_mutex_t resolve_mutex; +struct mlfiPriv; + //////////////////////////////////////////////// // helper to discard the strings and objects held by an ns_map @@ -229,14 +231,7 @@ //////////////////////////////////////////////// // syslog a message // -static void my_syslog(char *text); -static void my_syslog(char *text) { - pthread_mutex_lock(&syslog_mutex); - openlog("dnsbl", LOG_PID, LOG_MAIL); - syslog(LOG_NOTICE, "%s", text); - closelog(); - pthread_mutex_unlock(&syslog_mutex); -} +static void my_syslog(mlfiPriv *priv, char *text); // include the content scanner @@ -254,6 +249,7 @@ map<DNSBLP, status> checked; // status from those lists // message specific data char *mailaddr; // envelope from value + char *queueid; // sendmail queue id bool authenticated; // client authenticated? if so, suppress all dnsbl checks bool have_whites; // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails bool only_whites; // every recipient is whitelisted? @@ -271,10 +267,11 @@ pthread_mutex_unlock(&config_mutex); ip = 0; mailaddr = NULL; + queueid = NULL; authenticated = false; have_whites = false; only_whites = true; - memory = new recorder(&pc->html_tags, &pc->tlds); + memory = new recorder(this, &pc->html_tags, &pc->tlds); scanner = new url_scanner(memory); } mlfiPriv::~mlfiPriv() { @@ -285,15 +282,17 @@ } void mlfiPriv::reset(bool final) { if (mailaddr) free(mailaddr); + if (queueid) free(queueid); discard(non_whites); delete memory; delete scanner; if (!final) { mailaddr = NULL; + queueid = NULL; authenticated = false; have_whites = false; only_whites = true; - memory = new recorder(&pc->html_tags, &pc->tlds); + memory = new recorder(this, &pc->html_tags, &pc->tlds); scanner = new url_scanner(memory); } } @@ -302,6 +301,27 @@ //////////////////////////////////////////////// +// syslog a message +// +static void my_syslog(mlfiPriv *priv, char *text) { + char buf[1000]; + if (priv) { + snprintf(buf, sizeof(buf), "%s %s", priv->queueid, text); + text = buf; + } + pthread_mutex_lock(&syslog_mutex); + openlog("dnsbl", LOG_PID, LOG_MAIL); + syslog(LOG_NOTICE, "%s", text); + closelog(); + pthread_mutex_unlock(&syslog_mutex); +} + +static void my_syslog(char *text); +static void my_syslog(char *text) { + my_syslog(NULL, text); +} + +//////////////////////////////////////////////// // register a global string // static char* register_string(char *name); @@ -584,7 +604,7 @@ else { snprintf(buf, sizeof(buf), "host %s not found", host); } - my_syslog(buf); + my_syslog(&priv, buf); } if (ip) { status st = check_single(ip, dc.content_suffix); @@ -615,7 +635,7 @@ else { snprintf(buf, sizeof(buf), "ns %s not found", host); } - my_syslog(buf); + my_syslog(&priv, buf); } if (ip) { status st = check_single(ip, dc.content_suffix); @@ -631,7 +651,7 @@ int bin = priv.memory->binary_tags; int bad = priv.memory->bad_html_tags; lim = priv.pc->tag_limit; - if (bin > bad) return oksofar; // probably .zip or .tar.gz with random content + if (3*bin > bad) return oksofar; // probably .zip or .tar.gz with random content if ((bad > lim) && (lim > 0)) return reject_tag; return oksofar; } @@ -669,6 +689,7 @@ status st = oksofar; mlfiPriv &priv = *MLFIPRIV; CONFIG &dc = *priv.pc; + if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i"); char *rcptaddr = rcpt[0]; char *dnsname = lookup(rcptaddr, dc.env_to_dnsbll); char *fromname = lookup(rcptaddr, dc.env_to_chkfrom);
--- a/src/package Mon Jun 07 21:07:40 2004 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -#!/bin/bash - -VER=dnsbl-3.1 -mkdir $VER - target1=/home/httpd/html/510sg/util/dnsbl.tar.gz - target2=/home/httpd/html/510sg/dnsbl.conf - target3=/home/httpd/html/510sg/dnsbl.html - - cp sample.conf $VER/dnsbl.conf - cp html-tags.conf $VER - cp tld.conf $VER - cp *cpp $VER - cp *rc $VER - cp install.bash $VER - cp LICENSE $VER - tar cfvz $target1 $VER - - cp sample.conf $target2 - cp dnsbl.html $target3 - - echo scp $target1 ns1:$target1 - echo scp $target2 ns1:$target2 - echo scp $target3 ns1:$target3 - bt1=`basename $target1` - bt2=`basename $target2` - bt3=`basename $target3` - scp $target1 ams:/tmp/$bt1 - scp $target2 ams:/tmp/$bt2 - scp $target3 ams:/tmp/$bt3 - ssh -t ams "scp /tmp/$bt1 ns1.five-ten-sg.com:$target1" - ssh -t ams "scp /tmp/$bt2 ns1.five-ten-sg.com:$target2" - ssh -t ams "scp /tmp/$bt3 ns1.five-ten-sg.com:$target3" -rm -rf $VER
--- a/src/scanner.cpp Mon Jun 07 21:07:40 2004 -0700 +++ b/src/scanner.cpp Mon Jul 05 10:52:02 2004 -0700 @@ -14,6 +14,7 @@ // object to record things we see in the body content struct recorder { + mlfiPriv *priv; // needed for syslog string_set *html_tags; // valid tags string_set *tlds; // valid tlds string_set hosts; @@ -26,7 +27,8 @@ void new_tag(char *tag); void binary(); }; -recorder::recorder(string_set *html_tags_, string_set *tlds_) { +recorder::recorder(mlfiPriv *priv_, string_set *html_tags_, string_set *tlds_) { + priv = priv_; html_tags = html_tags_; tlds = tlds_; bad_html_tags = 0; @@ -54,7 +56,7 @@ // only log the first 10 bad tags char buf[200]; snprintf(buf, sizeof(buf), "bad html tag %s", tag); - my_syslog(buf); + my_syslog(priv, buf); } } } @@ -189,12 +191,12 @@ {h_init, h_end, t_init, t_end, t_end, t_end, t_end, t_com3, t_com3, t_end, t_end, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3E > {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x3F ? {h_init, h_host, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x40 @ - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x44 D - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x45 E - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x46 F + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x41 A + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x42 B + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x43 C + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x44 D + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x45 E + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x46 F {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x47 G {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x48 H {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x49 I @@ -212,7 +214,7 @@ {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_num, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x59 Y {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x5A Z {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5B [ @@ -221,12 +223,12 @@ {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5E ^ {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x5F _ {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x60 ` - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x61 a - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x62 b - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x63 c - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x64 d - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x65 e - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_init, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x66 f + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x61 a + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x62 b + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x63 c + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x64 d + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x65 e + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_1, d_2, e_init, e_init, e_num, m_init, m_1, m_2, b_init, b_64, b_64, b_64, }, // 0x66 f {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x67 g {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x68 h {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x69 i @@ -244,7 +246,7 @@ {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w - {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x + {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_num, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x79 y {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url, u_url, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x7A z {h_init, h_end, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init, }, // 0x7B { @@ -1036,10 +1038,16 @@ ////////////////////////////// - // html entity decoder &#nnn; + // html entity decoder &#nnn; &#xnnn; case e_semi: { pending[--count] = '\0'; // null terminate the digit string by overwriting the semicolon + u_char f = pending[2]; + if ((f == 'x') || (f == 'X')) { + pending[0] = strtol((const char *)pending+2, (char **)NULL, 16); + } + else { pending[0] = atoi((const char *)pending+2); + } count = 1; st = e_init; } // fall thru