diff src/readpst.c @ 363:3a1d25c579c6 stable-0-6-68

allow folders containing multiple item types; better detection of valid internet headers
author Carl Byington <carl@five-ten-sg.com>
date Mon, 29 Aug 2016 09:50:24 -0700
parents 6abc3054cba2
children e4c414ff8fa2
line wrap: on
line diff
--- a/src/readpst.c	Wed Jul 06 12:14:55 2016 -0700
+++ b/src/readpst.c	Mon Aug 29 09:50:24 2016 -0700
@@ -18,13 +18,12 @@
 #define C_TIME_SIZE 500
 
 struct file_ll {
-    char *name;
+    char *name[PST_TYPE_MAX];
     char *dname;
-    FILE * output;
+    FILE * output[PST_TYPE_MAX];
     int32_t stored_count;
     int32_t item_count;
     int32_t skip_count;
-    int32_t type;
 };
 
 int       grim_reaper();
@@ -36,11 +35,11 @@
 void      version();
 char*     mk_kmail_dir(char* fname);
 int       close_kmail_dir();
-char*     mk_recurse_dir(char* dir, int32_t folder_type);
+void      mk_recurse_dir(char* dir);
 int       close_recurse_dir();
-char*     mk_separate_dir(char *dir);
+void      mk_separate_dir(char *dir);
 int       close_separate_dir();
-void      mk_separate_file(struct file_ll *f, char *extension, int openit);
+void      mk_separate_file(struct file_ll *f, int32_t t, char *extension, int openit);
 void      close_separate_file(struct file_ll *f);
 char*     my_stristr(char *haystack, char *needle);
 void      check_filename(char *fname);
@@ -244,7 +243,6 @@
     pst_item *item = NULL;
 
     DEBUG_ENT("process");
-    memset(&ff, 0, sizeof(ff));
     create_enter_dir(&ff, outeritem);
 
     for (; d_ptr; d_ptr = d_ptr->next) {
@@ -308,25 +306,18 @@
                 DEBUG_INFO(("skipping contact: not in output type list\n"));
             }
             else {
-                if (!ff.type) ff.type = item->type;
-                if ((ff.type != PST_TYPE_CONTACT) && (mode != MODE_SEPARATE)) {
-                    ff.skip_count++;
-                    DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type));
+                ff.item_count++;
+                if (mode == MODE_SEPARATE) mk_separate_file(&ff, PST_TYPE_CONTACT, (mode_EX) ? ".vcf" : "", 1);
+                if (contact_mode == CMODE_VCARD) {
+                    pst_convert_utf8_null(item, &item->comment);
+                    write_vcard(ff.output[PST_TYPE_CONTACT], item, item->contact, item->comment.str);
                 }
                 else {
-                    ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".vcf" : "", 1);
-                    if (contact_mode == CMODE_VCARD) {
-                        pst_convert_utf8_null(item, &item->comment);
-                        write_vcard(ff.output, item, item->contact, item->comment.str);
-                    }
-                    else {
-                        pst_convert_utf8(item, &item->contact->fullname);
-                        pst_convert_utf8(item, &item->contact->address1);
-                        fprintf(ff.output, "%s <%s>\n", item->contact->fullname.str, item->contact->address1.str);
-                    }
-                    if (mode == MODE_SEPARATE) close_separate_file(&ff);
+                    pst_convert_utf8(item, &item->contact->fullname);
+                    pst_convert_utf8(item, &item->contact->address1);
+                    fprintf(ff.output[PST_TYPE_CONTACT], "%s <%s>\n", item->contact->fullname.str, item->contact->address1.str);
                 }
+                if (mode == MODE_SEPARATE) close_separate_file(&ff);
             }
 
         } else if (item->email && ((item->type == PST_TYPE_NOTE) || (item->type == PST_TYPE_SCHEDULE) || (item->type == PST_TYPE_REPORT))) {
@@ -336,46 +327,39 @@
                 DEBUG_INFO(("skipping email: not in output type list\n"));
             }
             else {
-                if (!ff.type) ff.type = item->type;
-                if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT) && (mode != MODE_SEPARATE)) {
-                    ff.skip_count++;
-                    DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type));
+                char *extra_mime_headers = NULL;
+                ff.item_count++;
+                if (mode == MODE_SEPARATE) {
+                    // process this single email message, possibly forking
+                    pid_t parent = getpid();
+                    pid_t child = try_fork(item->file_as.str);
+                    if (child == 0) {
+                        // we are the child process, or the original parent if no children were available
+                        pid_t me = getpid();
+                        mk_separate_file(&ff, PST_TYPE_NOTE, (mode_EX) ? ".eml" : "", 1);
+                        write_normal_email(ff.output[PST_TYPE_NOTE], ff.name[PST_TYPE_NOTE], item, mode, mode_MH, &pstfile, save_rtf_body, PST_TYPE_NOTE, &extra_mime_headers);
+                        close_separate_file(&ff);
+                        if (mode_MSG) {
+                            mk_separate_file(&ff, PST_TYPE_NOTE, ".msg", 0);
+                            write_msg_email(ff.name[PST_TYPE_NOTE], item, &pstfile);
+                        }
+#ifdef HAVE_FORK
+#ifdef HAVE_SEMAPHORE_H
+                        if (me != parent) {
+                            // we really were a child, forked for the sole purpose of processing this message
+                            // free my child count slot before really exiting, since
+                            // all I am doing here is waiting for my children to exit
+                            sem_post(global_children);
+                            grim_reaper(1); // wait for all my child processes to exit - there should not be any
+                            exit(0);        // really exit
+                        }
+#endif
+#endif
+                    }
                 }
                 else {
-                    char *extra_mime_headers = NULL;
-                    ff.item_count++;
-                    if (mode == MODE_SEPARATE) {
-                        // process this single email message, possibly forking
-                        pid_t parent = getpid();
-                        pid_t child = try_fork(item->file_as.str);
-                        if (child == 0) {
-                            // we are the child process, or the original parent if no children were available
-                            pid_t me = getpid();
-                            mk_separate_file(&ff, (mode_EX) ? ".eml" : "", 1);
-                            write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, 0, &extra_mime_headers);
-                            close_separate_file(&ff);
-                            if (mode_MSG) {
-                                mk_separate_file(&ff, ".msg", 0);
-                                write_msg_email(ff.name, item, &pstfile);
-                            }
-#ifdef HAVE_FORK
-#ifdef HAVE_SEMAPHORE_H
-                            if (me != parent) {
-                                // we really were a child, forked for the sole purpose of processing this message
-                                // free my child count slot before really exiting, since
-                                // all I am doing here is waiting for my children to exit
-                                sem_post(global_children);
-                                grim_reaper(1); // wait for all my child processes to exit - there should not be any
-                                exit(0);        // really exit
-                            }
-#endif
-#endif
-                        }
-                    }
-                    else {
-                        // process this single email message, cannot fork since not separate mode
-                        write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, 0, &extra_mime_headers);
-                    }
+                    // process this single email message, cannot fork since not separate mode
+                    write_normal_email(ff.output[PST_TYPE_NOTE], ff.name[PST_TYPE_NOTE], item, mode, mode_MH, &pstfile, save_rtf_body, 0, &extra_mime_headers);
                 }
             }
 
@@ -386,18 +370,11 @@
                 DEBUG_INFO(("skipping journal entry: not in output type list\n"));
             }
             else {
-                if (!ff.type) ff.type = item->type;
-                if ((ff.type != PST_TYPE_JOURNAL) && (mode != MODE_SEPARATE)) {
-                    ff.skip_count++;
-                    DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type));
-                }
-                else {
-                    ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "", 1);
-                    write_journal(ff.output, item);
-                    fprintf(ff.output, "\n");
-                    if (mode == MODE_SEPARATE) close_separate_file(&ff);
-                }
+                ff.item_count++;
+                if (mode == MODE_SEPARATE) mk_separate_file(&ff, PST_TYPE_JOURNAL, (mode_EX) ? ".ics" : "", 1);
+                write_journal(ff.output[PST_TYPE_JOURNAL], item);
+                fprintf(ff.output[PST_TYPE_JOURNAL], "\n");
+                if (mode == MODE_SEPARATE) close_separate_file(&ff);
             }
 
         } else if (item->appointment && (item->type == PST_TYPE_APPOINTMENT)) {
@@ -407,24 +384,17 @@
                 DEBUG_INFO(("skipping appointment: not in output type list\n"));
             }
             else {
-                if (!ff.type) ff.type = item->type;
-                if ((ff.type != PST_TYPE_APPOINTMENT) && (mode != MODE_SEPARATE)) {
-                    ff.skip_count++;
-                    DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type));
-                }
-                else {
-                    ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "", 1);
-                    write_schedule_part_data(ff.output, item, NULL, NULL);
-                    fprintf(ff.output, "\n");
-                    if (mode == MODE_SEPARATE) close_separate_file(&ff);
-                }
+                ff.item_count++;
+                if (mode == MODE_SEPARATE) mk_separate_file(&ff, PST_TYPE_APPOINTMENT, (mode_EX) ? ".ics" : "", 1);
+                write_schedule_part_data(ff.output[PST_TYPE_APPOINTMENT], item, NULL, NULL);
+                fprintf(ff.output[PST_TYPE_APPOINTMENT], "\n");
+                if (mode == MODE_SEPARATE) close_separate_file(&ff);
             }
 
         } else if (item->message_store) {
             // there should only be one message_store, and we have already done it
             ff.skip_count++;
-            DEBUG_WARN(("item with message store content, type %i %s folder type %i, skipping it\n", item->type, item->ascii_type, ff.type));
+            DEBUG_WARN(("item with message store content, type %i %s, skipping it\n", item->type, item->ascii_type));
 
         } else {
             ff.skip_count++;
@@ -847,11 +817,55 @@
 }
 
 
-// this will create a directory by that name,
-// then make an mbox file inside that directory.
-char *mk_recurse_dir(char *dir, int32_t folder_type) {
+char *item_type_to_name(int32_t item_type) {
+    char *name;
+    switch (item_type) {
+        case PST_TYPE_APPOINTMENT:
+            name = "calendar";
+            break;
+        case PST_TYPE_CONTACT:
+            name = "contacts";
+            break;
+        case PST_TYPE_JOURNAL:
+            name = "journal";
+            break;
+        case PST_TYPE_STICKYNOTE:
+        case PST_TYPE_TASK:
+        case PST_TYPE_NOTE:
+        case PST_TYPE_OTHER:
+        case PST_TYPE_REPORT:
+        default:
+            name = "mbox";
+            break;
+    }
+    return name;
+}
+
+
+int32_t reduced_item_type(int32_t item_type) {
+    int32_t reduced;
+    switch (item_type) {
+        case PST_TYPE_APPOINTMENT:
+        case PST_TYPE_CONTACT:
+        case PST_TYPE_JOURNAL:
+            reduced = item_type;
+            break;
+        case PST_TYPE_STICKYNOTE:
+        case PST_TYPE_TASK:
+        case PST_TYPE_NOTE:
+        case PST_TYPE_OTHER:
+        case PST_TYPE_REPORT:
+        default:
+            reduced = PST_TYPE_NOTE;
+            break;
+    }
+    return reduced;
+}
+
+
+// this will create a directory by that name
+void mk_recurse_dir(char *dir) {
     int x;
-    char *out_name;
     DEBUG_ENT("mk_recurse_dir");
     check_filename(dir);
     if (D_MKDIR (dir)) {
@@ -864,27 +878,7 @@
         x = errno;
         DIE(("mk_recurse_dir: Cannot change to directory %s: %s\n", dir, strerror(x)));
     }
-    switch (folder_type) {
-        case PST_TYPE_APPOINTMENT:
-            out_name = strdup("calendar");
-            break;
-        case PST_TYPE_CONTACT:
-            out_name = strdup("contacts");
-            break;
-        case PST_TYPE_JOURNAL:
-            out_name = strdup("journal");
-            break;
-        case PST_TYPE_STICKYNOTE:
-        case PST_TYPE_TASK:
-        case PST_TYPE_NOTE:
-        case PST_TYPE_OTHER:
-        case PST_TYPE_REPORT:
-        default:
-            out_name = strdup("mbox");
-            break;
-    }
     DEBUG_RET();
-    return out_name;
 }
 
 
@@ -900,7 +894,7 @@
 }
 
 
-char *mk_separate_dir(char *dir) {
+void mk_separate_dir(char *dir) {
     size_t dirsize = strlen(dir) + 10;
     char dir_name[dirsize];
     int x = 0, y = 0;
@@ -953,9 +947,7 @@
 #endif
     }
 
-    // we don't return a filename here cause it isn't necessary.
     DEBUG_RET();
-    return NULL;
 }
 
 
@@ -971,17 +963,17 @@
 }
 
 
-void mk_separate_file(struct file_ll *f, char *extension, int openit) {
+void mk_separate_file(struct file_ll *f, int32_t t, char *extension, int openit) {
     DEBUG_ENT("mk_separate_file");
     DEBUG_INFO(("opening next file to save email\n"));
     if (f->item_count > 999999999) { // bigger than nine 9's
         DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n"));
     }
-    sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count, extension);
-    check_filename(f->name);
+    sprintf(f->name[t], SEP_MAIL_FILE_TEMPLATE, f->item_count, extension);
+    check_filename(f->name[t]);
     if (openit) {
-        if (!(f->output = fopen(f->name, "w"))) {
-            DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name));
+        if (!(f->output[t] = fopen(f->name[t], "w"))) {
+            DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name[t]));
         }
     }
     DEBUG_RET();
@@ -989,16 +981,19 @@
 
 
 void close_separate_file(struct file_ll *f) {
+    int32_t t;
     DEBUG_ENT("close_separate_file");
-    if (f->output) {
-        struct stat st;
-        fclose(f->output);
-        stat(f->name, &st);
-        if (!st.st_size) {
-            DEBUG_WARN(("removing empty output file %s\n", f->name));
-            remove(f->name);
+    for (t=0; t<PST_TYPE_MAX; t++) {
+        if (f->output[t]) {
+            struct stat st;
+            fclose(f->output[t]);
+            stat(f->name[t], &st);
+            if (!st.st_size) {
+                DEBUG_WARN(("removing empty output file %s\n", f->name[t]));
+                remove(f->name[t]);
+            }
+            f->output[t] = NULL;
         }
-        f->output = NULL;
     }
     DEBUG_RET();
 }
@@ -1227,16 +1222,19 @@
     // there are surely others. the problem is - given an arbitrary character
     // string, is it a valid (or even reasonable) set of rfc822 headers?
     if (header) {
-        if ((strncasecmp(header, "X-Barracuda-URL: ", 17) == 0) ||
-            (strncasecmp(header, "X-ASG-Debug-ID: ",  16) == 0) ||
-            (strncasecmp(header, "Return-Path: ",     13) == 0) ||
-            (strncasecmp(header, "Received: ",        10) == 0) ||
-            (strncasecmp(header, "Subject: ",          9) == 0) ||
-            (strncasecmp(header, "Date: ",             6) == 0) ||
-            (strncasecmp(header, "From: ",             6) == 0) ||
-            (strncasecmp(header, "X-x: ",              5) == 0) ||
-            (strncasecmp(header, "Microsoft Mail Internet Headers", 31) == 0)) {
-            return 1;
+        if ((strncasecmp(header, "Content-Type: ",                  14) == 0) ||
+            (strncasecmp(header, "Date: ",                           6) == 0) ||
+            (strncasecmp(header, "From: ",                           6) == 0) ||
+            (strncasecmp(header, "MIME-Version: ",                  14) == 0) ||
+            (strncasecmp(header, "Microsoft Mail Internet Headers", 31) == 0) ||
+            (strncasecmp(header, "Received: ",                      10) == 0) ||
+            (strncasecmp(header, "Return-Path: ",                   13) == 0) ||
+            (strncasecmp(header, "Subject: ",                        9) == 0) ||
+            (strncasecmp(header, "To: ",                             4) == 0) ||
+            (strncasecmp(header, "X-ASG-Debug-ID: ",                16) == 0) ||
+            (strncasecmp(header, "X-Barracuda-URL: ",               17) == 0) ||
+            (strncasecmp(header, "X-x: ",                            5) == 0)) {
+            return 1;                                 
         }
         else {
             if (strlen(header) > 2) {
@@ -2176,15 +2174,23 @@
 
 void create_enter_dir(struct file_ll* f, pst_item *item)
 {
+    memset(f, 0, sizeof(*f));
+    f->stored_count = (item->folder) ? item->folder->item_count : 0;
     pst_convert_utf8(item, &item->file_as);
-    f->type         = item->type;
-    f->stored_count = (item->folder) ? item->folder->item_count : 0;
+    f->dname = (char*) pst_malloc(strlen(item->file_as.str)+1);
+    strcpy(f->dname, item->file_as.str);
 
     DEBUG_ENT("create_enter_dir");
     if (mode == MODE_KMAIL)
-        f->name = mk_kmail_dir(item->file_as.str);
+        f->name[0] = mk_kmail_dir(item->file_as.str);
     else if (mode == MODE_RECURSE) {
-        f->name = mk_recurse_dir(item->file_as.str, f->type);
+        int32_t t;
+        mk_recurse_dir(item->file_as.str);
+        for (t=0; t<PST_TYPE_MAX; t++) {
+            if (t == reduced_item_type(t)) {
+                f->name[t] = strdup(item_type_to_name(t));
+            }
+        }
         if (mode_thunder) {
             FILE *type_file = fopen(".type", "w");
             fprintf(type_file, "%d\n", item->type);
@@ -2193,46 +2199,47 @@
     } else if (mode == MODE_SEPARATE) {
         // do similar stuff to recurse here.
         mk_separate_dir(item->file_as.str);
-        f->name = (char*) pst_malloc(file_name_len);
-        memset(f->name, 0, file_name_len);
+        f->name[0] = (char*) pst_malloc(file_name_len);
+        memset(f->name[0], 0, file_name_len);
     } else {
-        f->name = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1);
-        sprintf(f->name, OUTPUT_TEMPLATE, item->file_as.str);
+        f->name[0] = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1);
+        sprintf(f->name[0], OUTPUT_TEMPLATE, item->file_as.str);
     }
 
-    f->dname = (char*) pst_malloc(strlen(item->file_as.str)+1);
-    strcpy(f->dname, item->file_as.str);
-
-    if (overwrite != 1) {
-        int x = 0;
-        char *temp = (char*) pst_malloc (strlen(f->name)+10); //enough room for 10 digits
+    if (mode != MODE_SEPARATE) {
+        int32_t t;
+        for (t=0; t<PST_TYPE_MAX; t++) {
+            if (f->name[t]) {
+                if (!overwrite) {
+                    int x = 0;
+                    char *temp = (char*) pst_malloc (strlen(f->name[t])+10); //enough room for 10 digits
 
-        sprintf(temp, "%s", f->name);
-        check_filename(temp);
-        while ((f->output = fopen(temp, "r"))) {
-            DEBUG_INFO(("need to increase filename because one already exists with that name\n"));
-            DEBUG_INFO(("- increasing it to %s%d\n", f->name, x));
-            x++;
-            sprintf(temp, "%s%08d", f->name, x);
-            DEBUG_INFO(("- trying \"%s\"\n", f->name));
-            if (x == 99999999) {
-                DIE(("create_enter_dir: Why can I not create a folder %s? I have tried %i extensions...\n", f->name, x));
+                    sprintf(temp, "%s", f->name[t]);
+                    check_filename(temp);
+                    while ((f->output[t] = fopen(temp, "r"))) {
+                        DEBUG_INFO(("need to increase filename because one already exists with that name\n"));
+                        DEBUG_INFO(("- increasing it to %s%d\n", f->name, x));
+                        x++;
+                        sprintf(temp, "%s%08d", f->name, x);
+                        DEBUG_INFO(("- trying \"%s\"\n", f->name));
+                        if (x == 99999999) {
+                            DIE(("create_enter_dir: Why can I not create a folder %s? I have tried %i extensions...\n", f->name, x));
+                        }
+                        fclose(f->output[t]);
+                    }
+                    if (x > 0) { //then the f->name should change
+                        free (f->name[t]);
+                        f->name[t] = temp;
+                    } else {
+                        free(temp);
+                    }
+                }
+                check_filename(f->name[t]);
+                if (!(f->output[t] = fopen(f->name[t], "w"))) {
+                    DIE(("create_enter_dir: Could not open file \"%s\" for write\n", f->name[t]));
+                }
+                DEBUG_INFO(("f->name = %s\nitem->folder_name = %s\n", f->name[t], item->file_as.str));
             }
-            fclose(f->output);
-        }
-        if (x > 0) { //then the f->name should change
-            free (f->name);
-            f->name = temp;
-        } else {
-            free(temp);
-        }
-    }
-
-    DEBUG_INFO(("f->name = %s\nitem->folder_name = %s\n", f->name, item->file_as.str));
-    if (mode != MODE_SEPARATE) {
-        check_filename(f->name);
-        if (!(f->output = fopen(f->name, "w"))) {
-            DIE(("create_enter_dir: Could not open file \"%s\" for write\n", f->name));
         }
     }
     DEBUG_RET();
@@ -2241,6 +2248,7 @@
 
 void close_enter_dir(struct file_ll *f)
 {
+    int32_t t;
     DEBUG_INFO(("processed item count for folder %s is %i, skipped %i, total %i \n",
                 f->dname, f->item_count, f->skip_count, f->stored_count));
     if (output_mode != OUTPUT_QUIET) {
@@ -2249,18 +2257,21 @@
             fflush(stdout);
         pst_debug_unlock();
     }
-    if (f->output) {
-        if (mode == MODE_SEPARATE) DEBUG_WARN(("close_enter_dir finds open separate file\n"));
-        struct stat st;
-        fclose(f->output);
-        stat(f->name, &st);
-        if (!st.st_size) {
-            DEBUG_WARN(("removing empty output file %s\n", f->name));
-            remove(f->name);
+    for (t=0; t<PST_TYPE_MAX; t++) {
+        if (f->output[t]) {
+            if (mode == MODE_SEPARATE) DEBUG_WARN(("close_enter_dir finds open separate file\n"));
+            struct stat st;
+            fclose(f->output[t]);
+            stat(f->name[t], &st);
+            if (!st.st_size) {
+                DEBUG_WARN(("removing empty output file %s\n", f->name[t]));
+                remove(f->name[t]);
+            }
+            f->output[t] = NULL;
         }
-        f->output = NULL;
+        free(f->name[t]);
+        f->name[t] = NULL;
     }
-    free(f->name);
     free(f->dname);
 
     if (mode == MODE_KMAIL)