view src/scanner.h @ 143:ecb40aa3eaa5 stable-5-23

require two periods for ip addresses
author carl
date Tue, 10 Oct 2006 19:12:16 -0700
parents aa07452e641b
children 812c80305f26
line wrap: on
line source

/*

Copyright (c) 2006 Carl Byington - 510 Software Group, released under
the GPL version 2 or any later version at your choice available at
http://www.fsf.org/licenses/gpl.txt

*/

#ifndef scanner_include
#define scanner_include

#include "dnsbl.h"

////////////////////////////////////////////////
// memory for the content scanner
//
class recorder
{
	mlfiPriv	*priv;		// needed for syslog
	string_set	*html_tags; // valid tags
	string_set	*tlds;		// valid tlds
	string_set	*cctlds;	  // valid cctlds
	string_set	hosts;
	int 		bad_html_tags;
	int 		binary_tags;

public:
	recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &cctlds_);
	~recorder() 								{ empty(); };
	void empty();
	void new_url(char *host);
	void new_tag(char *tag);
	void binary();
	mlfiPriv   *get_priv()						{ return priv;																		};
	string_set *get_cctlds()					{ return cctlds;																	};
	string_set *get_tlds()						{ return tlds;																		};
	string_set &get_hosts() 					{ return hosts; 																	};
	bool		excessive_bad_tags(int limit)	{ return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
	bool		excessive_hosts(int limit)		{ return (limit > 0) && (hosts.size() > limit); 									};
};


////////////////////////////////////////////////
// the content scanner
//
class fsa;
class url_scanner {
	fsa *host_parser;
	fsa *tags_parser;
	fsa *urls_parser;
	fsa *urld_parser;
	fsa *html_parser;
	fsa *mime_parser;
	fsa *b64_parser;
	fsa *uu_parser;

public:
	url_scanner(recorder *memory);
	~url_scanner();
	void scan(u_char *buffer, size_t length);
};

#endif