Mercurial > dnsbl
comparison src/scanner.cpp @ 8:dbe18921f741
integration work on url scanner
author | carl |
---|---|
date | Thu, 22 Apr 2004 11:25:45 -0700 |
parents | 93ff6d1ef647 |
children | 8c65411cd7ab |
comparison
equal
deleted
inserted
replaced
7:93ff6d1ef647 | 8:dbe18921f741 |
---|---|
1 // normal stuff | 1 static char* scanner_version="$Id$"; |
2 #include <stdio.h> | |
3 #include <stdlib.h> | |
4 | |
5 // needed for std c++ collections | |
6 #include <set> | |
7 #include <map> | |
8 #include <list> | |
9 | |
10 // for the dns resolver | |
11 #include <netinet/in.h> | |
12 #include <arpa/nameser.h> | |
13 #include <resolv.h> | |
14 | |
15 // misc stuff needed here | |
16 #include <ctype.h> | |
17 #include <fstream> | |
18 | |
19 static char* version="$Id$"; | |
20 | 2 |
21 using namespace std; | 3 using namespace std; |
22 | |
23 enum status {oksofar, // not rejected yet | |
24 white, // whitelisted by envelope from | |
25 black, // blacklisted by envelope from or to | |
26 reject}; // rejected by a dns list | |
27 | 4 |
28 enum state {// url decoder states | 5 enum state {// url decoder states |
29 u_init, | 6 u_init, |
30 u_http, | 7 u_http, |
31 u_sla, | 8 u_sla, |
839 0, // 0xfd | 816 0, // 0xfd |
840 0, // 0xfe | 817 0, // 0xfe |
841 0, // 0xff | 818 0, // 0xff |
842 }; | 819 }; |
843 | 820 |
844 #define PENDING_LIMIT 1000 | 821 #define PENDING_LIMIT 100 |
845 struct fsa { | 822 struct fsa { |
846 u_char pending[PENDING_LIMIT]; | 823 u_char pending[PENDING_LIMIT]; |
847 int count; | 824 int count; |
848 state st; | 825 state st; |
849 state init; | 826 state init; |
850 fsa* next; | 827 fsa* next; |
851 | 828 string_set *urls; |
852 fsa(state init, fsa* next_); | 829 |
830 fsa(state init, fsa* next_, string_set *urls_); | |
853 void push(u_char *buf, int len); | 831 void push(u_char *buf, int len); |
854 }; | 832 }; |
855 | 833 |
856 fsa::fsa(state init_, fsa* next_) { | 834 fsa::fsa(state init_, fsa *next_, string_set *urls_) { |
857 count = 0; | 835 count = 0; |
858 st = init_; | 836 st = init_; |
859 init = init_; | 837 init = init_; |
860 next = next_; | 838 next = next_; |
839 urls = urls_; | |
861 } | 840 } |
862 | 841 |
863 void fsa::push(u_char *buf, int len) { | 842 void fsa::push(u_char *buf, int len) { |
864 for (int i=0; i<len; i++) { | 843 for (int i=0; i<len; i++) { |
865 u_char c = buf[i]; | 844 u_char c = buf[i]; |
882 } | 861 } |
883 } break; | 862 } break; |
884 | 863 |
885 case u_reco: { | 864 case u_reco: { |
886 pending[count-1] = 0; | 865 pending[count-1] = 0; |
887 if (strncasecmp((const char *)pending, "http://", 7) == 0) fprintf(stdout, "%s\n", pending); | 866 if (strncasecmp((const char *)pending, "http://", 7) == 0) { |
867 urls->insert(strdup((const char *)pending+7)); | |
868 } | |
888 } // fall thru | 869 } // fall thru |
889 | 870 |
890 case u_init: { | 871 case u_init: { |
891 count = 0; // discard all characters | 872 count = 0; // discard all characters |
892 } break; | 873 } break; |
977 } break; | 958 } break; |
978 } | 959 } |
979 } | 960 } |
980 } | 961 } |
981 | 962 |
982 | 963 struct url_scanner { |
983 | 964 fsa *urls_parser; |
984 //////////////////////////////////////////////// | 965 fsa *html_parser; |
985 // ask a dns question and get an A record answer | 966 fsa *mime_parser; |
986 // | 967 fsa *b64_parser; |
987 static unsigned long dns_interface(char *question); | 968 |
988 static unsigned long dns_interface(char *question) { | 969 url_scanner(string_set *urls); |
989 u_char answer[NS_PACKETSZ]; | 970 ~url_scanner(); |
990 int length = res_search(question, ns_c_in, ns_t_a, answer, sizeof(answer)); | 971 void scan(u_char *buffer, size_t length); |
991 if (length < 0) return oksofar; // error in getting answer | 972 }; |
992 // parse the answer | 973 |
993 ns_msg handle; | 974 url_scanner::url_scanner(string_set *urls) { |
994 ns_rr rr; | 975 urls_parser = new fsa(u_init, NULL, urls); |
995 if (ns_initparse(answer, length, &handle) != 0) return oksofar; | 976 html_parser = new fsa(e_init, urls_parser, NULL); |
996 int rrnum = 0; | 977 mime_parser = new fsa(m_init, html_parser, NULL); |
997 while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { | 978 b64_parser = new fsa(b_init, mime_parser, NULL); |
998 if (ns_rr_type(rr) == ns_t_a) { | |
999 unsigned long address; | |
1000 memcpy(&address, ns_rr_rdata(rr), sizeof(address)); | |
1001 return reject; | |
1002 } | |
1003 } | |
1004 return 0; | |
1005 } | 979 } |
1006 | 980 |
1007 //////////////////////////////////////////////// | 981 url_scanner::~url_scanner() { |
1008 // check a single dnsbl - we don't try very hard, just | 982 delete urls_parser; |
1009 // using the default resolver retry settings. If we cannot | 983 delete html_parser; |
1010 // get an answer, we just accept the mail. The caller | 984 delete mime_parser; |
1011 // must ensure thread safety. | 985 delete b64_parser; |
1012 // | |
1013 static status check_single(int ip, char *suffix); | |
1014 static status check_single(int ip, char *suffix) { | |
1015 // make a dns question | |
1016 const u_char *src = (const u_char *)&ip; | |
1017 if (src[0] == 127) return oksofar; // don't do dns lookups on localhost | |
1018 char question[NS_MAXDNAME]; | |
1019 snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix); | |
1020 // ask the question, if we get an A record it implies a blacklisted ip address | |
1021 unsigned long ans = dns_interface(question); | |
1022 return (ans) ? reject : oksofar; | |
1023 } | 986 } |
1024 | 987 |
1025 | 988 void url_scanner::scan(u_char *buffer, size_t length) { |
1026 //////////////////////////////////////////////// | 989 b64_parser->push(buffer, length); |
1027 // scan a file for URLs | |
1028 // | |
1029 static void scan_file(char *fn, fsa& parser); | |
1030 static void scan_file(char *fn, fsa& parser) { | |
1031 const int LINE_SIZE = 2000; | |
1032 char line[LINE_SIZE]; | |
1033 ifstream is(fn); | |
1034 while (!is.eof()) { | |
1035 is.getline(line, LINE_SIZE-1); | |
1036 int n = strlen(line); | |
1037 line[n++] = '\n'; | |
1038 parser.push((u_char*)line, n); | |
1039 } | |
1040 is.close(); | |
1041 } | 990 } |
1042 | 991 |
1043 | |
1044 int main(int argc, char**argv) | |
1045 { | |
1046 char *fn = argv[1]; | |
1047 fsa *urls_parser = new fsa(u_init, NULL); | |
1048 fsa *html_parser = new fsa(e_init, urls_parser); | |
1049 fsa *mime_parser = new fsa(m_init, html_parser); | |
1050 fsa *b64_parser = new fsa(b_init, mime_parser); | |
1051 if (fn) scan_file(fn, *b64_parser); | |
1052 return 0; | |
1053 } |