changeset 41:d95af8129dfa

updates for 3.2, changing file layout, add queueid to messages
author carl
date Mon, 05 Jul 2004 10:52:02 -0700
parents dc3d8d1aa2d2
children afcf403709ef
files ChangeLog dnsbl.conf dnsbl.rc dnsbl.spec.in install.bash package.bash src/dnsbl.cpp src/package src/scanner.cpp
diffstat 9 files changed, 271 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ChangeLog	Mon Jul 05 10:52:02 2004 -0700
@@ -0,0 +1,17 @@
+    $Id$
+
+3.2 2004-07-05
+
+    Add .spec file for building rpms, contributed by John Gunkel
+    <jgunkel@palliser.ca>
+
+    Changes to file layout suggested by John to conform to RedHat/LSB
+    standards.
+
+    Change parser to handle &#xnnn; obfuscated urls with charaters
+    specified in hex.
+
+    Make bad_html tags more sensitive to binary tags, to reduce false
+    positives in .zip or .tar.gz file attachments.
+
+    Add sendmail queueid to the dnsbl syslog messages.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dnsbl.conf	Mon Jul 05 10:52:02 2004 -0700
@@ -0,0 +1,35 @@
+##############################################
+# content scanning parameters
+#
+content         sbl-xbl.spamhaus.org        'Mail containing %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s'
+host_limit      20                          'Mail containing too many host names rejected'
+html_limit      20                          'Mail containing excessive bad html tags rejected'
+include html-tags.conf
+include tld.conf
+
+
+##############################################
+# define the dnsbls to use
+#
+dnsbl   LOCAL   blackholes.five-ten-sg.com  'Mail from %s rejected - local; see http://www.five-ten-sg.com/blackhole.php?%s'
+#dnsbl   SPEWS   blackholes.spews.org        'Mail from %s rejected - spews; see http://www.spews.org/ask.cgi?x=%s'
+dnsbl   SBL     sbl-xbl.spamhaus.org        'Mail from %s rejected - sbl; see http://www.spamhaus.org/query/bl?ip=%s'
+
+
+##############################################
+# define the (default and other) lists of dnsbls to use
+#
+dnsbl_list  DEFAULT LOCAL SBL
+
+
+##############################################
+# define the (default and other) env_from maps
+#
+
+##############################################
+# specify dnsbl_lists and env_from maps to use for specific recipients
+#
+
+##############################################
+# specify dnsbl_lists and env_from maps to use for clients domains
+#
--- a/dnsbl.rc	Mon Jun 07 21:07:40 2004 -0700
+++ b/dnsbl.rc	Mon Jul 05 10:52:02 2004 -0700
@@ -20,12 +20,26 @@
   start)
         # Start daemons.
         echo -n "Starting dnsbl-milter: "
-        cd /var/dnsbl   # conf file is here
-        ./dnsbl -d -p local:/var/run/dnsbl.sock
+        if [ ! -f /var/lock/subsys/dnsbl ]; then
+            cd /etc/dnsbl   # conf file is here
+            su -l dnsbl -s /bin/sh -c "/usr/sbin/dnsbl -d -p local:/var/run/dnsbl/dnsbl.sock "
         RETVAL=$?
-        sleep 3
+            pid=`pidof -s /usr/sbin/dnsbl`
+            if [ $pid ]
+            then
+                success "Starting dnsbl milter:"
+                touch /var/lock/subsys/dnsbl
         echo
-        [ $RETVAL -eq 0 ] && touch /var/lock/subsys/dnsbl
+            else
+                failure "Starting dnsbl milter:"
+                echo
+            fi
+        else
+            echo -n "already running! "
+            failure "dnsbl milter already running!"
+            echo
+        fi
+
         ;;
   stop)
         # Stop daemons.
@@ -49,3 +63,5 @@
         exit 1
 esac
 exit $RETVAL
+
+# $Id$
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dnsbl.spec.in	Mon Jul 05 10:52:02 2004 -0700
@@ -0,0 +1,95 @@
+Summary: DNSBL Sendmail Milter
+Name: dnsbl
+Version: 3.1
+Release: 1
+Copyright: GPL
+Group: System Environment/Daemons
+Source: http://www.five-ten-sg.com/util/dnsbl.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}
+
+BuildRequires:  sendmail-devel >= 8.12.1
+Requires(pre):  /usr/sbin/groupadd
+Requires(pre):  /usr/sbin/useradd
+Requires(pre):  /usr/bin/getent
+Requires(postun):       /usr/sbin/userdel
+Requires(postun):       /usr/sbin/groupdel
+Requires(post,preun):   /sbin/chkconfig
+Requires:       sendmail
+
+
+%description
+We present here a mechanism whereby the backup mail server can use the correct set of DNSBLs for each recipient for each message. As a side-effect, it gives us the ability to customize the set of DNSBLs on a per-recipient basis, so that fred@example.com could use SPEWS and the SBL, where all other users @example.com use only the SBL.
+
+This milter will also decode (base64, mime, html entity, url encodings) and scan for HTTP and HTTPS URLs and bare hostnames in the body of the mail. If any of those host names have A or NS records on the SBL (or a single configurable DNSBL), the mail will be rejected unless previously whitelisted. This milter also counts the number of invalid HTML tags, and can reject mail if that count exceeds your specified limit.
+
+The DNSBL milter reads a text configuration file (dnsbl.conf) on startup, and whenever the config file (or any of the referenced include files) is changed. The entire configuration file is case insensitive.
+
+%prep
+# %setup -q -n Zope-%{zope_version}
+# %patch0 -p1
+# %patch1 -p1
+
+%setup
+
+
+%build
+pwd
+g++ -c $CXXFLAGS -pthread dnsbl.cpp
+g++ -o dnsbl dnsbl.o /usr/lib/libresolv.a -lmilter -pthread
+
+%install
+# hoisted from install.bash
+#DST=/var/dnsbl
+DST=%{buildroot}
+if [ "%{buildroot}" = "/" -o -z "%{buildroot}" ] ; then
+	echo sorry, you probably do not want me to delete the old buildroot at %{buildroot}
+	exit 1
+fi
+
+rm -rf %{buildroot}
+mkdir -p %{buildroot}/etc/dnsbl
+
+install -m 644 dnsbl.conf %{buildroot}/etc/dnsbl/dnsbl.conf
+install -m 644 html-tags.conf %{buildroot}/etc/dnsbl/html-tags.conf
+install -m 644 tld.conf %{buildroot}/etc/dnsbl/tld.conf
+
+mkdir -p %{buildroot}/usr/sbin
+install -m 755 dnsbl %{buildroot}/usr/sbin/dnsbl
+
+mkdir -p %{buildroot}/etc/init.d
+install -m 755 dnsbl.rc %{buildroot}/etc/init.d/dnsbl
+
+mkdir -p %{buildroot}/var/run/dnsbl
+
+%pre
+/usr/bin/getent passwd dnsbl ||
+  useradd -r -d /etc/dnsbl -M -c "dnsbl pseudo-user" -s /sbin/nologin dnsbl
+%post
+/sbin/chkconfig --add dnsbl
+/sbin/chkconfig dnsbl on
+/sbin/service dnsbl start
+
+# [JOG] TODO: spew out a message indicating what should be added to sendmail.mc
+
+%preun
+if [ $1 -eq 0 ]; then
+   /sbin/service dnsbl stop || :
+   /sbin/chkconfig --del dnsbl
+   userdel dnsbl || :
+fi
+
+%postun
+
+%clean
+
+%files
+%defattr(-,root,root)
+%config /etc/dnsbl/
+/etc/init.d/dnsbl
+/usr/sbin/dnsbl
+%dir %attr(0750,dnsbl,root) /var/run/dnsbl
+
+%changelog
+Revision 1.1  2004/06/30 10:08:48  jgunkel@palliser.ca
+Initial revision of spec file. Need to add a better description, docs and a sendmail.mc message
+
--- a/install.bash	Mon Jun 07 21:07:40 2004 -0700
+++ b/install.bash	Mon Jul 05 10:52:02 2004 -0700
@@ -24,9 +24,17 @@
 
 
 #####################
+# ensure the user is created
+/usr/bin/getent passwd dnsbl || useradd -r -d /etc/dnsbl -M -c "dnsbl pseudo-user" -s /sbin/nologin dnsbl
 # install the milter
-DST=/var/dnsbl
+DST=/etc/dnsbl
 mkdir -p $DST
+if [ -f /var/dnsbl/dnsbl.conf ]; then
+    # move the conf files to the new location
+    mv /var/dnsbl/*conf $DST
+    rm /var/dnsbl/dnsbl # remove the old binary
+    rmdir /var/dnsbl
+fi
 if [ ! -f $DST/dnsbl.conf ]; then
     cp dnsbl.conf $DST
 fi
@@ -36,7 +44,7 @@
 if [ ! -f $DST/tld.conf ]; then
     cp tld.conf $DST
 fi
-mv -f dnsbl $DST
+mv -f dnsbl /usr/sbin/dnsbl
 cp dnsbl.rc /etc/rc.d/init.d/dnsbl
 chmod 755 /etc/rc.d/init.d/dnsbl
 /sbin/chkconfig --add dnsbl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/package.bash	Mon Jul 05 10:52:02 2004 -0700
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+VER=dnsbl-3.2
+mkdir $VER
+    target1=/home/httpd/html/510sg/util/dnsbl.tar.gz
+    target2=/home/httpd/html/510sg/dnsbl.conf
+    target3=/home/httpd/html/510sg/dnsbl.html
+
+    cp sample1.conf   $VER/dnsbl.conf
+    cp sample.conf    $VER
+    cp html-tags.conf $VER
+    cp tld.conf       $VER
+    cp *cpp           $VER
+    cp *rc            $VER
+    cp install.bash   $VER
+    cp *spec          $VER
+    cp LICENSE        $VER
+    cp RELEASE_NOTES  $VER
+    tar cfvz $target1 $VER
+
+    cp sample.conf $target2
+    cp dnsbl.html  $target3
+
+    scp $target1 ns1:$target1
+    scp $target2 ns1:$target2
+    scp $target3 ns1:$target3
+rm -rf $VER
--- a/src/dnsbl.cpp	Mon Jun 07 21:07:40 2004 -0700
+++ b/src/dnsbl.cpp	Mon Jul 05 10:52:02 2004 -0700
@@ -179,6 +179,8 @@
 static pthread_mutex_t  syslog_mutex;
 static pthread_mutex_t  resolve_mutex;
 
+struct mlfiPriv;
+
 
 ////////////////////////////////////////////////
 // helper to discard the strings and objects held by an ns_map
@@ -229,14 +231,7 @@
 ////////////////////////////////////////////////
 // syslog a message
 //
-static void my_syslog(char *text);
-static void my_syslog(char *text) {
-    pthread_mutex_lock(&syslog_mutex);
-        openlog("dnsbl", LOG_PID, LOG_MAIL);
-        syslog(LOG_NOTICE, "%s", text);
-        closelog();
-    pthread_mutex_unlock(&syslog_mutex);
-}
+static void my_syslog(mlfiPriv *priv, char *text);
 
 
 // include the content scanner
@@ -254,6 +249,7 @@
     map<DNSBLP, status> checked;    // status from those lists
     // message specific data
     char    *mailaddr;      // envelope from value
+    char    *queueid;       // sendmail queue id
     bool    authenticated;  // client authenticated? if so, suppress all dnsbl checks
     bool    have_whites;    // have at least one whitelisted recipient? need to accept content and remove all non-whitelisted recipients if it fails
     bool    only_whites;    // every recipient is whitelisted?
@@ -271,10 +267,11 @@
     pthread_mutex_unlock(&config_mutex);
     ip            = 0;
     mailaddr      = NULL;
+    queueid       = NULL;
     authenticated = false;
     have_whites   = false;
     only_whites   = true;
-    memory        = new recorder(&pc->html_tags, &pc->tlds);
+    memory        = new recorder(this, &pc->html_tags, &pc->tlds);
     scanner       = new url_scanner(memory);
 }
 mlfiPriv::~mlfiPriv() {
@@ -285,15 +282,17 @@
 }
 void mlfiPriv::reset(bool final) {
     if (mailaddr) free(mailaddr);
+    if (queueid)  free(queueid);
     discard(non_whites);
     delete memory;
     delete scanner;
     if (!final) {
         mailaddr      = NULL;
+        queueid       = NULL;
         authenticated = false;
         have_whites   = false;
         only_whites   = true;
-        memory        = new recorder(&pc->html_tags, &pc->tlds);
+        memory        = new recorder(this, &pc->html_tags, &pc->tlds);
         scanner       = new url_scanner(memory);
     }
 }
@@ -302,6 +301,27 @@
 
 
 ////////////////////////////////////////////////
+// syslog a message
+//
+static void my_syslog(mlfiPriv *priv, char *text) {
+    char buf[1000];
+    if (priv) {
+        snprintf(buf, sizeof(buf), "%s %s", priv->queueid, text);
+        text = buf;
+    }
+    pthread_mutex_lock(&syslog_mutex);
+        openlog("dnsbl", LOG_PID, LOG_MAIL);
+        syslog(LOG_NOTICE, "%s", text);
+        closelog();
+    pthread_mutex_unlock(&syslog_mutex);
+}
+
+static void my_syslog(char *text);
+static void my_syslog(char *text) {
+    my_syslog(NULL, text);
+}
+
+////////////////////////////////////////////////
 // register a global string
 //
 static char* register_string(char *name);
@@ -584,7 +604,7 @@
             else {
                 snprintf(buf, sizeof(buf), "host %s not found", host);
             }
-            my_syslog(buf);
+            my_syslog(&priv, buf);
         }
         if (ip) {
             status st = check_single(ip, dc.content_suffix);
@@ -615,7 +635,7 @@
             else {
                 snprintf(buf, sizeof(buf), "ns %s not found", host);
             }
-            my_syslog(buf);
+            my_syslog(&priv, buf);
         }
         if (ip) {
             status st = check_single(ip, dc.content_suffix);
@@ -631,7 +651,7 @@
     int bin = priv.memory->binary_tags;
     int bad = priv.memory->bad_html_tags;
     lim = priv.pc->tag_limit;
-    if (bin > bad) return oksofar;  // probably .zip or .tar.gz with random content
+    if (3*bin > bad) return oksofar;    // probably .zip or .tar.gz with random content
     if ((bad > lim) && (lim > 0)) return reject_tag;
     return oksofar;
 }
@@ -669,6 +689,7 @@
     status st = oksofar;
     mlfiPriv &priv = *MLFIPRIV;
     CONFIG &dc = *priv.pc;
+    if (!priv.queueid) priv.queueid = strdup(smfi_getsymval(ctx, "i");
     char *rcptaddr = rcpt[0];
     char *dnsname  = lookup(rcptaddr, dc.env_to_dnsbll);
     char *fromname = lookup(rcptaddr, dc.env_to_chkfrom);
--- a/src/package	Mon Jun 07 21:07:40 2004 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-VER=dnsbl-3.1
-mkdir $VER
-    target1=/home/httpd/html/510sg/util/dnsbl.tar.gz
-    target2=/home/httpd/html/510sg/dnsbl.conf
-    target3=/home/httpd/html/510sg/dnsbl.html
-
-    cp sample.conf    $VER/dnsbl.conf
-    cp html-tags.conf $VER
-    cp tld.conf       $VER
-    cp *cpp           $VER
-    cp *rc            $VER
-    cp install.bash   $VER
-    cp LICENSE        $VER
-    tar cfvz $target1 $VER
-
-    cp sample.conf $target2
-    cp dnsbl.html  $target3
-
-    echo scp $target1 ns1:$target1
-    echo scp $target2 ns1:$target2
-    echo scp $target3 ns1:$target3
-    bt1=`basename $target1`
-    bt2=`basename $target2`
-    bt3=`basename $target3`
-    scp $target1 ams:/tmp/$bt1
-    scp $target2 ams:/tmp/$bt2
-    scp $target3 ams:/tmp/$bt3
-    ssh -t ams "scp /tmp/$bt1 ns1.five-ten-sg.com:$target1"
-    ssh -t ams "scp /tmp/$bt2 ns1.five-ten-sg.com:$target2"
-    ssh -t ams "scp /tmp/$bt3 ns1.five-ten-sg.com:$target3"
-rm -rf $VER
--- a/src/scanner.cpp	Mon Jun 07 21:07:40 2004 -0700
+++ b/src/scanner.cpp	Mon Jul 05 10:52:02 2004 -0700
@@ -14,6 +14,7 @@
 // object to record things we see in the body content
 struct recorder
 {
+    mlfiPriv    *priv;      // needed for syslog
     string_set  *html_tags; // valid tags
     string_set  *tlds;      // valid tlds
     string_set  hosts;
@@ -26,7 +27,8 @@
     void new_tag(char *tag);
     void binary();
 };
-recorder::recorder(string_set *html_tags_, string_set *tlds_) {
+recorder::recorder(mlfiPriv *priv_, string_set *html_tags_, string_set *tlds_) {
+    priv          = priv_;
     html_tags     = html_tags_;
     tlds          = tlds_;
     bad_html_tags = 0;
@@ -54,7 +56,7 @@
             // only log the first 10 bad tags
             char buf[200];
             snprintf(buf, sizeof(buf), "bad html tag %s", tag);
-            my_syslog(buf);
+            my_syslog(priv, buf);
         }
     }
 }
@@ -189,12 +191,12 @@
     {h_init, h_end,  t_init, t_end,  t_end,  t_end,  t_end,  t_com3, t_com3, t_end,  t_end,  u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x3E >
     {h_init, h_end,  t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x3F ?
     {h_init, h_host, t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x40 @
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x41 A
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x42 B
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x43 C
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x44 D
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x45 E
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x46 F
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x41 A
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x42 B
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x43 C
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x44 D
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x45 E
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x46 F
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x47 G
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x48 H
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x49 I
@@ -212,7 +214,7 @@
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x55 U
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x56 V
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x57 W
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x58 X
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_num,  m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x58 X
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x59 Y
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x5A Z
     {h_init, h_end,  t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x5B [
@@ -221,12 +223,12 @@
     {h_init, h_end,  t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x5E ^
     {h_init, h_end,  t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x5F _
     {h_init, h_end,  t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x60 `
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x61 a
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x62 b
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x63 c
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x64 d
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x65 e
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_init, m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x66 f
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x61 a
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x62 b
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x63 c
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x64 d
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x65 e
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_1,    d_2,    e_init, e_init, e_num,  m_init, m_1,    m_2,    b_init, b_64,   b_64,   b_64,    },  // 0x66 f
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x67 g
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_http, u_http, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x68 h
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x69 i
@@ -244,7 +246,7 @@
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x75 u
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x76 v
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x77 w
-    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x78 x
+    {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_num,  m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x78 x
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x79 y
     {h_host, h_host, t_init, t_tag2, t_tag2, t_tag2, t_tag2, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_url,  u_url,  d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x7A z
     {h_init, h_end,  t_init, t_disc, t_disc, t_disc, t_disc, t_com3, t_com3, t_com3, t_disc, u_init, u_init, u_init, u_reco, d_init, d_init, d_init, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_init, b_init, b_init,  },  // 0x7B {
@@ -1036,10 +1038,16 @@
 
 
             //////////////////////////////
-            //  html entity decoder &#nnn;
+            //  html entity decoder &#nnn;  &#xnnn;
             case e_semi: {
                 pending[--count] = '\0';  // null terminate the digit string by overwriting the semicolon
+                u_char f = pending[2];
+                if ((f == 'x') || (f == 'X')) {
+                    pending[0] = strtol((const char *)pending+2, (char **)NULL, 16);
+                }
+                else {
                 pending[0] = atoi((const char *)pending+2);
+                }
                 count = 1;
                 st    = e_init;
                 } // fall thru