view src/scanner.h @ 75:1142e46be550

start coding on new config syntax
author carl
date Wed, 13 Jul 2005 23:04:14 -0700
parents b7449114ebb0
children 81f1e400e8ab
line wrap: on
line source

#ifndef scanner_include
#define scanner_include

#include "dnsbl.h"

////////////////////////////////////////////////
// memory for the content scanner
//
class recorder
{
    mlfiPriv    *priv;      // needed for syslog
    string_set  *html_tags; // valid tags
    string_set  *tlds;      // valid tlds
    string_set  hosts;
    int         bad_html_tags;
    int         binary_tags;

public:
    recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_);
    ~recorder();
    void empty();
    void new_url(char *host);
    void new_tag(char *tag);
    void binary();
    mlfiPriv   *get_priv()                      {return priv;                                                                      };
    string_set *get_tlds()                      {return tlds;                                                                      };
    string_set &get_hosts()                     {return hosts;                                                                     };
    bool        excessive_bad_tags(int limit)   {return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
    bool        excessive_hosts(int limit)      {return (limit > 0) && (hosts.size() > limit);                                     };
};


////////////////////////////////////////////////
// the content scanner
//
class fsa;
class url_scanner {
    fsa *host_parser;
    fsa *tags_parser;
    fsa *urls_parser;
    fsa *urld_parser;
    fsa *html_parser;
    fsa *mime_parser;
    fsa *b64_parser;
    fsa *uu_parser;

public:
    url_scanner(recorder *memory);
    ~url_scanner();
    void scan(u_char *buffer, size_t length);
};

#endif