view src/scanner.h @ 267:db12f6028f8b

Added tag stable-6-0-33 for changeset 582cfb9c4031
author Carl Byington <carl@five-ten-sg.com>
date Sat, 21 Jul 2012 13:13:07 -0700
parents c0d2e99c0a1d
children f92f24950bd3
line wrap: on
line source

/*

Copyright (c) 2007 Carl Byington - 510 Software Group, released under
the GPL version 3 or any later version at your choice available at
http://www.gnu.org/licenses/gpl-3.0.txt

*/

#ifndef scanner_include
#define scanner_include

#include "dnsbl.h"

////////////////////////////////////////////////
// memory for the content scanner
//
class recorder
{
    mlfiPriv    *priv;      // needed for syslog
    string_set  *html_tags; // valid tags
    string_set  *tlds;      // valid tlds
    string_set  *cctlds;    // valid cctlds
    string_set  hosts;
    size_t      bad_html_tags;
    size_t      binary_tags;

public:
    recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &cctlds_);
    ~recorder()                                 { empty(); };
    void empty();
    void new_url(const char *host);
    void new_tag(const char *tag);
    void binary();
    void syslog(const char *buf)                { my_syslog(priv, buf);                                                             };
    mlfiPriv   *get_priv()                      { return priv;                                                                      };
    string_set *get_cctlds()                    { return cctlds;                                                                    };
    string_set *get_tlds()                      { return tlds;                                                                      };
    string_set &get_hosts()                     { return hosts;                                                                     };
    bool        excessive_bad_tags(size_t limit){ return (limit > 0) && (bad_html_tags > limit) && (bad_html_tags > 3*binary_tags); };
    bool        excessive_hosts(size_t limit)   { return (limit > 0) && (hosts.size() > limit);                                     };

};


////////////////////////////////////////////////
// the content scanner
//
class fsa;
class url_scanner {
    fsa *host_parser;
    fsa *tags_parser;
    fsa *urls_parser;
    fsa *urld_parser;
    fsa *html_parser;
    fsa *mime_parser;
    fsa *b64_parser;
    fsa *uu_parser;

public:
    url_scanner(recorder *memory);
    ~url_scanner();
    void scan(u_char *buffer, size_t length);
};

#endif