diff src/scanner.cpp @ 19:b8f5fa3dd5b8

fix problems in the state transitions causing impossible states
author carl
date Fri, 30 Apr 2004 22:44:56 -0700
parents 041ea016b684
children 06de5ab6a232
line wrap: on
line diff
--- a/src/scanner.cpp	Fri Apr 30 00:32:44 2004 -0700
+++ b/src/scanner.cpp	Fri Apr 30 22:44:56 2004 -0700
@@ -35,7 +35,7 @@
             // counter for number of columns in the table
             end_state,
 
-            // temporary mime states
+            // temporary states
             h_end,
             t_end,
             u_reco,
@@ -134,7 +134,7 @@
     {h_host, h_host, t_init, t_disc, u_http, u_http, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x50 P
     {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x51 Q
     {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x52 R
-    {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x53 S
+    {h_host, h_host, t_init, t_disc, u_http, u_http, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x53 S
     {h_host, h_host, t_init, t_disc, u_http, u_http, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x54 T
     {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x55 U
     {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x56 V
@@ -166,7 +166,7 @@
     {h_host, h_host, t_init, t_disc, u_http, u_http, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x70 p
     {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x71 q
     {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x72 r
-    {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x73 s
+    {h_host, h_host, t_init, t_disc, u_http, u_http, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x73 s
     {h_host, h_host, t_init, t_disc, u_http, u_http, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x74 t
     {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x75 u
     {h_host, h_host, t_init, t_disc, u_init, u_init, u_url,  u_url,  e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64,   b_64,   b_64,    },  // 0x76 v
@@ -849,6 +849,7 @@
     fsa(state init, fsa* next1_, fsa* next2_, string_set *hosts_);
     void push(u_char *buf, int len);
     void pusher();
+    void error(char *err);
 };
 
 fsa::fsa(state init_, fsa *next1_, fsa *next2_, string_set *hosts_) {
@@ -860,6 +861,12 @@
     hosts = hosts_;
 }
 
+void fsa::error(char *err) {
+    count = 0;
+    st    = init;
+    if (err) my_syslog(err);
+}
+
 void fsa::pusher() {
     if (next1) next1->push(pending, count);
     if (next2) next2->push(pending, count);
@@ -868,12 +875,9 @@
 
 void fsa::push(u_char *buf, int len) {
     for (int i=0; i<len; i++) {
+        if (count == (PENDING_LIMIT-1)) error(NULL);
+        if (st >= end_state)            error("finite state machine impossible state");
         u_char c = buf[i];
-        // guard against buffer overflow
-        if (count == (PENDING_LIMIT-1)) {
-            pusher();
-            st = init;
-        }
         pending[count++] = c;
         st = parse_table[c][st];
         switch (st) {
@@ -892,6 +896,7 @@
                         }
                     }
                 }
+                st = h_init;
                 } // fall thru
 
             case h_init: {
@@ -915,20 +920,25 @@
             //////////////////////////////
             //  url recognizer
             case u_sla: {
-                if ((count < 6) || (7 < count)) {
+                if ((count < 6) || (8 < count)) {   // allow http:// or https://
                     count = 0;
                     st    = u_init;
                 }
                 } break;
 
             case u_reco: {
-                if (count > 12) {
-                    pending[count-1] = 0;
+                if (count > 13) {   // need some minimal length host name after the protocol
+                    pending[--count] = '\0';  // null terminate host name by overwriting the terminator
+                    char *p = NULL;
                     if (strncasecmp((const char *)pending, "http://", 7) == 0) {
-                        char *p = (char *)pending + 7;
-                        if (strchr(p, '.')) register_string(*hosts, p); // require at least one . in a dns name
+                        p = (char *)pending + 7;
                     }
+                    if (strncasecmp((const char *)pending, "https://", 8) == 0) {
+                        p = (char *)pending + 8;
                 }
+                    if (p && strchr(p, '.')) register_string(*hosts, p); // require at least one . in a dns name
+                }
+                st = u_init;
                 } // fall thru
 
             case u_init: {