Mercurial > dnsbl
comparison src/scanner.cpp @ 19:b8f5fa3dd5b8
fix problems in the state transitions causing impossible states
author | carl |
---|---|
date | Fri, 30 Apr 2004 22:44:56 -0700 |
parents | 041ea016b684 |
children | 06de5ab6a232 |
comparison
equal
deleted
inserted
replaced
18:041ea016b684 | 19:b8f5fa3dd5b8 |
---|---|
33 b_64, | 33 b_64, |
34 | 34 |
35 // counter for number of columns in the table | 35 // counter for number of columns in the table |
36 end_state, | 36 end_state, |
37 | 37 |
38 // temporary mime states | 38 // temporary states |
39 h_end, | 39 h_end, |
40 t_end, | 40 t_end, |
41 u_reco, | 41 u_reco, |
42 e_semi, | 42 e_semi, |
43 m_2, | 43 m_2, |
132 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4E N | 132 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4E N |
133 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4F O | 133 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x4F O |
134 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x50 P | 134 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x50 P |
135 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x51 Q | 135 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x51 Q |
136 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x52 R | 136 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x52 R |
137 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x53 S | 137 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x53 S |
138 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x54 T | 138 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x54 T |
139 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U | 139 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x55 U |
140 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V | 140 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x56 V |
141 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W | 141 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x57 W |
142 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X | 142 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x58 X |
164 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6E n | 164 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6E n |
165 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6F o | 165 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x6F o |
166 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x70 p | 166 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x70 p |
167 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x71 q | 167 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x71 q |
168 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x72 r | 168 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x72 r |
169 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x73 s | 169 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x73 s |
170 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x74 t | 170 {h_host, h_host, t_init, t_disc, u_http, u_http, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x74 t |
171 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u | 171 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x75 u |
172 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v | 172 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x76 v |
173 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w | 173 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x77 w |
174 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x | 174 {h_host, h_host, t_init, t_disc, u_init, u_init, u_url, u_url, e_init, e_init, e_init, m_init, m_init, m_init, b_init, b_64, b_64, b_64, }, // 0x78 x |
847 string_set *hosts; | 847 string_set *hosts; |
848 | 848 |
849 fsa(state init, fsa* next1_, fsa* next2_, string_set *hosts_); | 849 fsa(state init, fsa* next1_, fsa* next2_, string_set *hosts_); |
850 void push(u_char *buf, int len); | 850 void push(u_char *buf, int len); |
851 void pusher(); | 851 void pusher(); |
852 void error(char *err); | |
852 }; | 853 }; |
853 | 854 |
854 fsa::fsa(state init_, fsa *next1_, fsa *next2_, string_set *hosts_) { | 855 fsa::fsa(state init_, fsa *next1_, fsa *next2_, string_set *hosts_) { |
855 count = 0; | 856 count = 0; |
856 st = init_; | 857 st = init_; |
858 next1 = next1_; | 859 next1 = next1_; |
859 next2 = next2_; | 860 next2 = next2_; |
860 hosts = hosts_; | 861 hosts = hosts_; |
861 } | 862 } |
862 | 863 |
864 void fsa::error(char *err) { | |
865 count = 0; | |
866 st = init; | |
867 if (err) my_syslog(err); | |
868 } | |
869 | |
863 void fsa::pusher() { | 870 void fsa::pusher() { |
864 if (next1) next1->push(pending, count); | 871 if (next1) next1->push(pending, count); |
865 if (next2) next2->push(pending, count); | 872 if (next2) next2->push(pending, count); |
866 count = 0; | 873 count = 0; |
867 } | 874 } |
868 | 875 |
869 void fsa::push(u_char *buf, int len) { | 876 void fsa::push(u_char *buf, int len) { |
870 for (int i=0; i<len; i++) { | 877 for (int i=0; i<len; i++) { |
878 if (count == (PENDING_LIMIT-1)) error(NULL); | |
879 if (st >= end_state) error("finite state machine impossible state"); | |
871 u_char c = buf[i]; | 880 u_char c = buf[i]; |
872 // guard against buffer overflow | |
873 if (count == (PENDING_LIMIT-1)) { | |
874 pusher(); | |
875 st = init; | |
876 } | |
877 pending[count++] = c; | 881 pending[count++] = c; |
878 st = parse_table[c][st]; | 882 st = parse_table[c][st]; |
879 switch (st) { | 883 switch (st) { |
880 | 884 |
881 ////////////////////////////// | 885 ////////////////////////////// |
890 register_string(*hosts, (char*)pending); | 894 register_string(*hosts, (char*)pending); |
891 break; | 895 break; |
892 } | 896 } |
893 } | 897 } |
894 } | 898 } |
899 st = h_init; | |
895 } // fall thru | 900 } // fall thru |
896 | 901 |
897 case h_init: { | 902 case h_init: { |
898 count = 0; | 903 count = 0; |
899 } break; | 904 } break; |
913 } break; | 918 } break; |
914 | 919 |
915 ////////////////////////////// | 920 ////////////////////////////// |
916 // url recognizer | 921 // url recognizer |
917 case u_sla: { | 922 case u_sla: { |
918 if ((count < 6) || (7 < count)) { | 923 if ((count < 6) || (8 < count)) { // allow http:// or https:// |
919 count = 0; | 924 count = 0; |
920 st = u_init; | 925 st = u_init; |
921 } | 926 } |
922 } break; | 927 } break; |
923 | 928 |
924 case u_reco: { | 929 case u_reco: { |
925 if (count > 12) { | 930 if (count > 13) { // need some minimal length host name after the protocol |
926 pending[count-1] = 0; | 931 pending[--count] = '\0'; // null terminate host name by overwriting the terminator |
932 char *p = NULL; | |
927 if (strncasecmp((const char *)pending, "http://", 7) == 0) { | 933 if (strncasecmp((const char *)pending, "http://", 7) == 0) { |
928 char *p = (char *)pending + 7; | 934 p = (char *)pending + 7; |
929 if (strchr(p, '.')) register_string(*hosts, p); // require at least one . in a dns name | |
930 } | 935 } |
936 if (strncasecmp((const char *)pending, "https://", 8) == 0) { | |
937 p = (char *)pending + 8; | |
938 } | |
939 if (p && strchr(p, '.')) register_string(*hosts, p); // require at least one . in a dns name | |
931 } | 940 } |
941 st = u_init; | |
932 } // fall thru | 942 } // fall thru |
933 | 943 |
934 case u_init: { | 944 case u_init: { |
935 count = 0; // discard all characters | 945 count = 0; // discard all characters |
936 } break; | 946 } break; |