comparison src/scanner.cpp @ 44:6b79046b18c2

changes for 3.2
author carl
date Fri, 09 Jul 2004 07:53:02 -0700
parents afcf403709ef
children 05957b18b2e6
comparison
equal deleted inserted replaced
43:acbe44bbba22 44:6b79046b18c2
957 // not an email address or message id 957 // not an email address or message id
958 char *p1 = strchr((const char *)pending, '.'); 958 char *p1 = strchr((const char *)pending, '.');
959 char *p2 = strrchr((const char *)pending, '.'); 959 char *p2 = strrchr((const char *)pending, '.');
960 if (p1 && (p1 != p2)) { 960 if (p1 && (p1 != p2)) {
961 // have two periods, so three components 961 // have two periods, so three components
962 for (int i=1; i<count; i++) pending[i] = tolower(pending[i]); 962 for (int i=0; i<count; i++) pending[i] = tolower(pending[i]);
963 // is last component a tld? 963 // is last component a tld?
964 string_set::iterator i = memory->tlds->find(p2+1); 964 string_set::iterator i = memory->tlds->find(p2+1);
965 if (i != memory->tlds->end()) memory->new_url((char*)pending); 965 if (i != memory->tlds->end()) memory->new_url((char*)pending);
966 } 966 }
967 } 967 }
1011 pending[--count] = '\0'; // null terminate host name by overwriting the terminator 1011 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
1012 char *p = strrchr((const char *)pending, '/'); 1012 char *p = strrchr((const char *)pending, '/');
1013 if (p && // have a leading / 1013 if (p && // have a leading /
1014 strchr(p, '.') && // require at least one . in a dns name 1014 strchr(p, '.') && // require at least one . in a dns name
1015 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol 1015 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol
1016 memory->new_url(++p); // we seem to have a host name, skip the last / 1016 // we seem to have a host name
1017 p++; // skip the last /
1018 int c = strlen(p);
1019 for (int i=0; i<c; i++) p[i] = tolower(p[i]);
1020 memory->new_url(p); // record it
1017 } 1021 }
1018 } 1022 }
1019 st = u_init; 1023 st = u_init;
1020 } // fall thru 1024 } // fall thru
1021 1025