comparison src/scanner.cpp @ 52:a84752107aca

host names cannot have consecutive periods.
author carl
date Thu, 15 Jul 2004 23:01:41 -0700
parents 05957b18b2e6
children a39c813e8f7b
comparison
equal deleted inserted replaced
51:9f2971c692d0 52:a84752107aca
955 pending[--count] = '\0'; // null terminate host name by overwriting the terminator 955 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
956 if (!strchr((const char *)pending, '@')) { 956 if (!strchr((const char *)pending, '@')) {
957 // not an email address or message id 957 // not an email address or message id
958 char *p1 = strchr((const char *)pending, '.'); 958 char *p1 = strchr((const char *)pending, '.');
959 char *p2 = strrchr((const char *)pending, '.'); 959 char *p2 = strrchr((const char *)pending, '.');
960 if (p1 && (p1 != p2)) { 960 char *p3 = strstr((const char *)pending, "..");
961 // have two periods, so three components 961 if (p1 && (p1 != p2) & !p3) {
962 // have two periods, so at least three components, and no empty components
962 for (int i=0; i<count; i++) pending[i] = tolower(pending[i]); 963 for (int i=0; i<count; i++) pending[i] = tolower(pending[i]);
963 // is last component a tld? 964 // is last component a tld?
964 string_set::iterator i = memory->tlds->find(p2+1); 965 string_set::iterator i = memory->tlds->find(p2+1);
965 if (i != memory->tlds->end()) memory->new_url((char*)pending); 966 if (i != memory->tlds->end()) memory->new_url((char*)pending);
966 } 967 }
1008 // url recognizer 1009 // url recognizer
1009 case u_reco: { 1010 case u_reco: {
1010 if (count > 13) { // need some minimal length host name after the protocol 1011 if (count > 13) { // need some minimal length host name after the protocol
1011 pending[--count] = '\0'; // null terminate host name by overwriting the terminator 1012 pending[--count] = '\0'; // null terminate host name by overwriting the terminator
1012 char *p = strrchr((const char *)pending, '/'); 1013 char *p = strrchr((const char *)pending, '/');
1013 if (p && // have a leading / 1014 if (p && // have a leading /
1014 strchr(p, '.') && // require at least one . in a dns name 1015 strchr(p, '.') && // require at least one . in a dns name
1016 !strstr(p, "..") && // no empty components in the dns name
1015 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol 1017 (strncasecmp((const char *)pending, "http", 4) == 0)) { // must start with protocol
1016 // we seem to have a host name 1018 // we seem to have a host name
1017 p++; // skip the last / 1019 p++; // skip the last /
1018 int c = strlen(p); 1020 int c = strlen(p);
1019 for (int i=0; i<c; i++) p[i] = tolower(p[i]); 1021 for (int i=0; i<c; i++) p[i] = tolower(p[i]);