changeset 233:1d50ff3c5091

better rfc822 embedded message decoding
author Carl Byington <carl@five-ten-sg.com>
date Thu, 10 Sep 2009 22:49:24 -0700
parents f8dc1b7201ad
children ed0cb66b23d4
files ChangeLog NEWS libpst.spec.in regression/regression-tests.bash src/libpst.c src/libpst.h src/readpst.c
diffstat 7 files changed, 88 insertions(+), 53 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Thu Sep 10 15:29:13 2009 -0700
+++ b/ChangeLog	Thu Sep 10 22:49:24 2009 -0700
@@ -9,6 +9,8 @@
     * patch from Chris White to avoid segfault with embedded appointments.
     * patch from Roberto Polli to add creation of some Thunderbird specific meta files.
     * patch from Justin Greer to ignore b5 tables at offset zero.
+    * output type filtering can now be used to handle folders with multiple item types.
+    * better decoding of rfc822 embedded message attachments.
 
 LibPST 0.6.42 (2009-09-03)
 ===============================
--- a/NEWS	Thu Sep 10 15:29:13 2009 -0700
+++ b/NEWS	Thu Sep 10 22:49:24 2009 -0700
@@ -1,4 +1,4 @@
-0.6.43  2009-09-10 patches from Justin Greer, Chris White, Roberto Polli
+0.6.43  2009-09-10 patches from Justin Greer, Chris White, Roberto Polli; better rfc822 embedded message decoding
 0.6.42  2009-09-03 patch from Fridrich Strba to build with DJGPP DOS cross-compiler
 0.6.41  2009-06-23 fix ax_python detection - should not use locate command
 0.6.40  2009-06-23 fedora 11 has python2.6, remove pdf version of the man pages
--- a/libpst.spec.in	Thu Sep 10 15:29:13 2009 -0700
+++ b/libpst.spec.in	Thu Sep 10 22:49:24 2009 -0700
@@ -149,7 +149,8 @@
 * Thu Sep 10 2009 Carl Byington <carl@five-ten-sg.com> - 0.6.43-1
 - decode more of the pst format, some minor bug fixes
 - add support for code pages 1200 and 1201.
-- add readpst -t option to select output item types
+- add readpst -t option to select output item types, which can
+  now be used to process folders containing mixed item types.
 - fix segfault with embedded appointments
 - add readpst -u option for Thunderbird mode .size and .type files
 
--- a/regression/regression-tests.bash	Thu Sep 10 15:29:13 2009 -0700
+++ b/regression/regression-tests.bash	Thu Sep 10 22:49:24 2009 -0700
@@ -52,7 +52,8 @@
     rm -rf output$n
     mkdir output$n
     #val ../src/readpst $jobs -r -D -cv -o output$n            $fn
-    $val ../src/readpst $jobs -r -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
+    #$val ../src/readpst $jobs -r -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
+    $val ../src/readpst $jobs -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
     #../src/getidblock -p $fn 0 >$ba.fulldump
 }
 
--- a/src/libpst.c	Thu Sep 10 15:29:13 2009 -0700
+++ b/src/libpst.c	Thu Sep 10 22:49:24 2009 -0700
@@ -3286,7 +3286,7 @@
             tail = i2_ptr;
             if (id2_rec.child_id) {
                 if ((i_ptr = pst_getID(pf, id2_rec.child_id)) == NULL) {
-                    DEBUG_WARN(("child id [%#"PRIi64"] not found\n", id2_rec.child_id));
+                    DEBUG_WARN(("child id [%#"PRIx64"] not found\n", id2_rec.child_id));
                 }
                 else {
                     i2_ptr->child = pst_build_id2(pf, i_ptr);
@@ -3885,7 +3885,7 @@
     size_t r;
     int noenc = (int)(i_id & 2);   // disable encryption
     DEBUG_ENT("pst_ff_getIDblock_dec");
-    DEBUG_INFO(("for id %#"PRIi64"\n", i_id));
+    DEBUG_INFO(("for id %#"PRIx64"\n", i_id));
     r = pst_ff_getIDblock(pf, i_id, buf);
     if ((pf->encryption) && !(noenc)) {
         (void)pst_decrypt(i_id, *buf, r, pf->encryption);
@@ -3929,7 +3929,7 @@
     ptr = pst_getID2(id2_head, id2);
 
     if (!ptr) {
-        DEBUG_WARN(("Cannot find id2 value %#"PRIi64"\n", id2));
+        DEBUG_WARN(("Cannot find id2 value %#"PRIx64"\n", id2));
         DEBUG_RET();
         return 0;
     }
--- a/src/libpst.h	Thu Sep 10 15:29:13 2009 -0700
+++ b/src/libpst.h	Thu Sep 10 22:49:24 2009 -0700
@@ -33,7 +33,6 @@
 #define PST_TYPE_OTHER      13
 #define PST_TYPE_REPORT     14
 
-
 // defines types of possible encryption
 #define PST_NO_ENCRYPT   0
 #define PST_COMP_ENCRYPT 1
@@ -72,6 +71,15 @@
 #define PST_APP_RECUR_MONTHLY     3
 #define PST_APP_RECUR_YEARLY      4
 
+// define attachment types
+#define PST_ATTACH_NONE             0
+#define PST_ATTACH_BY_VALUE         1
+#define PST_ATTACH_BY_REF           2
+#define PST_ATTACH_BY_REF_RESOLV    3
+#define PST_ATTACH_BY_REF_ONLY      4
+#define PST_ATTACH_EMBEDDED         5
+#define PST_ATTACH_OLE              6
+
 
 typedef struct pst_entryid {
     int32_t u1;
--- a/src/readpst.c	Thu Sep 10 15:29:13 2009 -0700
+++ b/src/readpst.c	Thu Sep 10 22:49:24 2009 -0700
@@ -274,82 +274,90 @@
             }
 
         } else if (item->contact && (item->type == PST_TYPE_CONTACT)) {
-            if (!ff.type) ff.type = item->type;
             DEBUG_INFO(("Processing Contact\n"));
-            if (ff.type != PST_TYPE_CONTACT) {
-                ff.skip_count++;
-                DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type));
-            }
-            else if (!(output_type_mode & OTMODE_CONTACT)) {
+            if (!(output_type_mode & OTMODE_CONTACT)) {
                 ff.skip_count++;
                 DEBUG_INFO(("skipping contact: not in output type list\n"));
             }
             else {
-                ff.item_count++;
-                if (mode == MODE_SEPARATE) mk_separate_file(&ff);
-                if (contact_mode == CMODE_VCARD) {
-                    pst_convert_utf8_null(item, &item->comment);
-                    write_vcard(ff.output, item, item->contact, item->comment.str);
+                if (!ff.type) ff.type = item->type;
+                if (ff.type != PST_TYPE_CONTACT) {
+                    ff.skip_count++;
+                    DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type));
                 }
                 else {
-                    pst_convert_utf8(item, &item->contact->fullname);
-                    pst_convert_utf8(item, &item->contact->address1);
-                    fprintf(ff.output, "%s <%s>\n", item->contact->fullname.str, item->contact->address1.str);
+                    ff.item_count++;
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff);
+                    if (contact_mode == CMODE_VCARD) {
+                        pst_convert_utf8_null(item, &item->comment);
+                        write_vcard(ff.output, item, item->contact, item->comment.str);
+                    }
+                    else {
+                        pst_convert_utf8(item, &item->contact->fullname);
+                        pst_convert_utf8(item, &item->contact->address1);
+                        fprintf(ff.output, "%s <%s>\n", item->contact->fullname.str, item->contact->address1.str);
+                    }
                 }
             }
 
         } else if (item->email && ((item->type == PST_TYPE_NOTE) || (item->type == PST_TYPE_SCHEDULE) || (item->type == PST_TYPE_REPORT))) {
-            if (!ff.type) ff.type = item->type;
             DEBUG_INFO(("Processing Email\n"));
-            if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) {
-                ff.skip_count++;
-                DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type));
-            }
-            else if (!(output_type_mode & OTMODE_EMAIL)) {
+            if (!(output_type_mode & OTMODE_EMAIL)) {
                 ff.skip_count++;
                 DEBUG_INFO(("skipping email: not in output type list\n"));
             }
             else {
-                char *extra_mime_headers = NULL;
-                ff.item_count++;
-                if (mode == MODE_SEPARATE) mk_separate_file(&ff);
-                write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers);
+                if (!ff.type) ff.type = item->type;
+                if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) {
+                    ff.skip_count++;
+                    DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type));
+                }
+                else {
+                    char *extra_mime_headers = NULL;
+                    ff.item_count++;
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff);
+                    write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers);
+                }
             }
 
         } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) {
-            if (!ff.type) ff.type = item->type;
             DEBUG_INFO(("Processing Journal Entry\n"));
-            if (ff.type != PST_TYPE_JOURNAL) {
-                ff.skip_count++;
-                DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type));
-            }
-            else if (!(output_type_mode & OTMODE_JOURNAL)) {
+            if (!(output_type_mode & OTMODE_JOURNAL)) {
                 ff.skip_count++;
                 DEBUG_INFO(("skipping journal entry: not in output type list\n"));
             }
             else {
-                ff.item_count++;
-                if (mode == MODE_SEPARATE) mk_separate_file(&ff);
-                write_journal(ff.output, item);
-                fprintf(ff.output, "\n");
+                if (!ff.type) ff.type = item->type;
+                if (ff.type != PST_TYPE_JOURNAL) {
+                    ff.skip_count++;
+                    DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type));
+                }
+                else {
+                    ff.item_count++;
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff);
+                    write_journal(ff.output, item);
+                    fprintf(ff.output, "\n");
+                }
             }
 
         } else if (item->appointment && (item->type == PST_TYPE_APPOINTMENT)) {
-            if (!ff.type) ff.type = item->type;
             DEBUG_INFO(("Processing Appointment Entry\n"));
-            if (ff.type != PST_TYPE_APPOINTMENT) {
-                ff.skip_count++;
-                DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type));
-            }
-            else if (!(output_type_mode & OTMODE_APPOINTMENT)) {
+            if (!(output_type_mode & OTMODE_APPOINTMENT)) {
                 ff.skip_count++;
                 DEBUG_INFO(("skipping appointment: not in output type list\n"));
             }
             else {
-                ff.item_count++;
-                if (mode == MODE_SEPARATE) mk_separate_file(&ff);
-                write_appointment(ff.output, item, 0);
-                fprintf(ff.output, "\n");
+                if (!ff.type) ff.type = item->type;
+                if (ff.type != PST_TYPE_APPOINTMENT) {
+                    ff.skip_count++;
+                    DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type));
+                }
+                else {
+                    ff.item_count++;
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff);
+                    write_appointment(ff.output, item, 0);
+                    fprintf(ff.output, "\n");
+                }
             }
 
         } else if (item->message_store) {
@@ -1313,6 +1321,9 @@
     strncpy(body_charset, pst_default_charset(item, sizeof(buffer_charset), buffer_charset), sizeof(body_charset));
     body_charset[sizeof(body_charset)-1] = '\0';
     body_report[0] = '\0';
+    if (item->email->report_text.str && !item->body.str) {
+        strncpy(body_report, "delivery-status", sizeof(body_report));
+    }
 
     // setup default sender
     pst_convert_utf8(item, &item->email->sender_address);
@@ -1464,7 +1475,8 @@
     // needed or used by mail clients
     pst_convert_utf8_null(item, &item->email->sender_address);
     if (item->email->sender_address.str && !strchr(item->email->sender_address.str, '@')
-                                        && strcmp(item->email->sender_address.str, ".")) {
+                                        && strcmp(item->email->sender_address.str, ".")
+                                        && (strlen(item->email->sender_address.str) > 0)) {
         fprintf(f_output, "X-libpst-forensic-sender: %s\n", item->email->sender_address.str);
     }
 
@@ -1564,8 +1576,19 @@
             pst_convert_utf8_null(item, &attach->filename2);
             pst_convert_utf8_null(item, &attach->mimetype);
             DEBUG_INFO(("Attempting Attachment encoding\n"));
-            if (!attach->data.data && attach->mimetype.str && !strcmp(attach->mimetype.str, RFC822)) {
+            if (attach->method == PST_ATTACH_EMBEDDED) {
                 DEBUG_INFO(("seem to have special embedded message attachment\n"));
+                char *m = NULL;
+                if (attach->mimetype.str) {
+                    DEBUG_INFO(("already has a mime-type of %s\n", attach->mimetype.str));
+                    free(attach->mimetype.str);
+                }
+                attach->mimetype.str = strdup(RFC822);
+                attach->mimetype.is_utf8 = 1;
+                write_embedded_message(f_output, attach, boundary, pst, &m);
+            }
+            else if (!attach->data.data && attach->mimetype.str && !strcmp(attach->mimetype.str, RFC822)) {
+                DEBUG_INFO(("seem to have embedded message attachment\n"));
                 find_rfc822_headers(extra_mime_headers);
                 write_embedded_message(f_output, attach, boundary, pst, extra_mime_headers);
             }