comparison src/scanner.cpp @ 270:f92f24950bd3 stable-6-0-35

Use mozilla prefix list for tld checking, Enable surbl/uribl/dbl rhs lists
author Carl Byington <carl@five-ten-sg.com>
date Mon, 09 Sep 2013 15:15:53 -0700
parents ef97c7cd4a6e
children a99b6c1f5f67
comparison
equal deleted inserted replaced
269:6d2a11f0ae41 270:f92f24950bd3
1187 1187
1188 1188
1189 //////////////////////////////////////////////// 1189 ////////////////////////////////////////////////
1190 // 1190 //
1191 // 1191 //
1192 recorder::recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &cctlds_) { 1192 recorder::recorder(mlfiPriv *priv_, string_set &html_tags_, string_set &tlds_, string_set &tldwilds_, string_set &tldnots_) {
1193 priv = priv_; 1193 priv = priv_;
1194 html_tags = &html_tags_; 1194 html_tags = &html_tags_;
1195 tlds = &tlds_; 1195 tlds = &tlds_;
1196 cctlds = &cctlds_; 1196 tldwilds = &tldwilds_;
1197 tldnots = &tldnots_;
1197 bad_html_tags = 0; 1198 bad_html_tags = 0;
1198 binary_tags = 0; 1199 binary_tags = 0;
1199 } 1200 }
1200 void recorder::empty() { 1201 void recorder::empty() {
1201 bad_html_tags = 0; 1202 bad_html_tags = 0;
1266 else { 1267 else {
1267 for (int i=0; i<count; i++) pending[i] = tolower(pending[i]); 1268 for (int i=0; i<count; i++) pending[i] = tolower(pending[i]);
1268 // is last component a tld? 1269 // is last component a tld?
1269 string_set::iterator i = memory->get_tlds()->find(p2+1); 1270 string_set::iterator i = memory->get_tlds()->find(p2+1);
1270 if (i != memory->get_tlds()->end()) memory->new_url((char*)pending); 1271 if (i != memory->get_tlds()->end()) memory->new_url((char*)pending);
1272 else {
1273 i = memory->get_tldwilds()->find(p2+1);
1274 if (i != memory->get_tldwilds()->end()) memory->new_url((char*)pending);
1275 }
1271 } 1276 }
1272 } 1277 }
1273 } 1278 }
1274 } 1279 }
1275 1280