Mercurial > dnsbl
diff src/scanner.cpp @ 8:dbe18921f741
integration work on url scanner
author | carl |
---|---|
date | Thu, 22 Apr 2004 11:25:45 -0700 |
parents | 93ff6d1ef647 |
children | 8c65411cd7ab |
line wrap: on
line diff
--- a/src/scanner.cpp Thu Apr 22 08:38:07 2004 -0700 +++ b/src/scanner.cpp Thu Apr 22 11:25:45 2004 -0700 @@ -1,30 +1,7 @@ -// normal stuff -#include <stdio.h> -#include <stdlib.h> - -// needed for std c++ collections -#include <set> -#include <map> -#include <list> - -// for the dns resolver -#include <netinet/in.h> -#include <arpa/nameser.h> -#include <resolv.h> - -// misc stuff needed here -#include <ctype.h> -#include <fstream> - -static char* version="$Id$"; +static char* scanner_version="$Id$"; using namespace std; -enum status {oksofar, // not rejected yet - white, // whitelisted by envelope from - black, // blacklisted by envelope from or to - reject}; // rejected by a dns list - enum state {// url decoder states u_init, u_http, @@ -841,23 +818,25 @@ 0, // 0xff }; -#define PENDING_LIMIT 1000 +#define PENDING_LIMIT 100 struct fsa { u_char pending[PENDING_LIMIT]; int count; state st; state init; fsa* next; + string_set *urls; - fsa(state init, fsa* next_); + fsa(state init, fsa* next_, string_set *urls_); void push(u_char *buf, int len); }; -fsa::fsa(state init_, fsa* next_) { +fsa::fsa(state init_, fsa *next_, string_set *urls_) { count = 0; st = init_; init = init_; next = next_; + urls = urls_; } void fsa::push(u_char *buf, int len) { @@ -884,7 +863,9 @@ case u_reco: { pending[count-1] = 0; - if (strncasecmp((const char *)pending, "http://", 7) == 0) fprintf(stdout, "%s\n", pending); + if (strncasecmp((const char *)pending, "http://", 7) == 0) { + urls->insert(strdup((const char *)pending+7)); + } } // fall thru case u_init: { @@ -979,75 +960,32 @@ } } - +struct url_scanner { + fsa *urls_parser; + fsa *html_parser; + fsa *mime_parser; + fsa *b64_parser; -//////////////////////////////////////////////// -// ask a dns question and get an A record answer -// -static unsigned long dns_interface(char *question); -static unsigned long dns_interface(char *question) { - u_char answer[NS_PACKETSZ]; - int length = res_search(question, ns_c_in, ns_t_a, answer, sizeof(answer)); - if (length < 0) return oksofar; // error in getting answer - // parse the answer - ns_msg handle; - ns_rr rr; - if (ns_initparse(answer, length, &handle) != 0) return oksofar; - int rrnum = 0; - while (ns_parserr(&handle, ns_s_an, rrnum++, &rr) == 0) { - if (ns_rr_type(rr) == ns_t_a) { - unsigned long address; - memcpy(&address, ns_rr_rdata(rr), sizeof(address)); - return reject; - } - } - return 0; + url_scanner(string_set *urls); + ~url_scanner(); + void scan(u_char *buffer, size_t length); +}; + +url_scanner::url_scanner(string_set *urls) { + urls_parser = new fsa(u_init, NULL, urls); + html_parser = new fsa(e_init, urls_parser, NULL); + mime_parser = new fsa(m_init, html_parser, NULL); + b64_parser = new fsa(b_init, mime_parser, NULL); } -//////////////////////////////////////////////// -// check a single dnsbl - we don't try very hard, just -// using the default resolver retry settings. If we cannot -// get an answer, we just accept the mail. The caller -// must ensure thread safety. -// -static status check_single(int ip, char *suffix); -static status check_single(int ip, char *suffix) { - // make a dns question - const u_char *src = (const u_char *)&ip; - if (src[0] == 127) return oksofar; // don't do dns lookups on localhost - char question[NS_MAXDNAME]; - snprintf(question, sizeof(question), "%u.%u.%u.%u.%s.", src[3], src[2], src[1], src[0], suffix); - // ask the question, if we get an A record it implies a blacklisted ip address - unsigned long ans = dns_interface(question); - return (ans) ? reject : oksofar; +url_scanner::~url_scanner() { + delete urls_parser; + delete html_parser; + delete mime_parser; + delete b64_parser; } - -//////////////////////////////////////////////// -// scan a file for URLs -// -static void scan_file(char *fn, fsa& parser); -static void scan_file(char *fn, fsa& parser) { - const int LINE_SIZE = 2000; - char line[LINE_SIZE]; - ifstream is(fn); - while (!is.eof()) { - is.getline(line, LINE_SIZE-1); - int n = strlen(line); - line[n++] = '\n'; - parser.push((u_char*)line, n); - } - is.close(); +void url_scanner::scan(u_char *buffer, size_t length) { + b64_parser->push(buffer, length); } - -int main(int argc, char**argv) -{ - char *fn = argv[1]; - fsa *urls_parser = new fsa(u_init, NULL); - fsa *html_parser = new fsa(e_init, urls_parser); - fsa *mime_parser = new fsa(m_init, html_parser); - fsa *b64_parser = new fsa(b_init, mime_parser); - if (fn) scan_file(fn, *b64_parser); - return 0; -}