changeset 100:1e4a7610d525

fixes from Justin Greer to add -D option to include deleted items, to add missing email headers, to fix bug in my_stristr()
author Carl Byington <carl@five-ten-sg.com>
date Thu, 02 Oct 2008 15:29:36 -0700
parents b7f456946c5b
children 1fc33da23175
files ChangeLog NEWS configure.in libpst.spec.in regression/regression-tests.bash src/libpst.c src/readpst.c xml/libpst.in
diffstat 8 files changed, 151 insertions(+), 80 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sun Sep 28 17:08:52 2008 -0700
+++ b/ChangeLog	Thu Oct 02 15:29:36 2008 -0700
@@ -1,7 +1,10 @@
-LibPST 0.6.20 (2008-09-28)
+LibPST 0.6.20 (2008-10-02)
 ===============================
     	* add configure option --enable-dii=no to remove dependency on libgd.
 	* many fixes in pst2ldif by Robert Harris.
+	* add -D option to include deleted items, from Justin Greer
+        * fix from Justin Greer to add missing email headers
+        * fix from Justin Greer for my_stristr()
 
 LibPST 0.6.19 (2008-09-14)
 ===============================
--- a/NEWS	Sun Sep 28 17:08:52 2008 -0700
+++ b/NEWS	Thu Oct 02 15:29:36 2008 -0700
@@ -1,4 +1,4 @@
-0.6.20  2008-09-28 add configure option --enable-dii=no, fixes from Robert Harris for pst2ldif.
+0.6.20  2008-10-02 add configure option --enable-dii=no, fixes from Robert Harris for pst2ldif.
 0.6.19  2008-09-14 Initial work on a .so shared library from Bharath Acharya.
 0.6.18  2008-08-28 Fixes for iconv on Mac from Justin Greer.
 0.6.17  2008-08-05 More fixes for 32/64 bit portability on big endian ppc.
--- a/configure.in	Sun Sep 28 17:08:52 2008 -0700
+++ b/configure.in	Thu Oct 02 15:29:36 2008 -0700
@@ -83,8 +83,9 @@
 SAVELIBS="${LIBS}"
 AC_SEARCH_LIBS([iconv_open], [iconv])
 if test "x${SAVELIBS}" != "x${LIBS}"; then
-    all_libraries="/usr/lib $all_libraries"
+    all_libraries="-L/usr/lib $all_libraries"
     AC_SUBST(all_libraries)
+    AC_MSG_WARN([This should be building on Mac OSX, adding /usr/lib to the library path])
 fi
 
 # The following lines adds the --enable-pst-debug option to configure:
--- a/libpst.spec.in	Sun Sep 28 17:08:52 2008 -0700
+++ b/libpst.spec.in	Thu Oct 02 15:29:36 2008 -0700
@@ -47,9 +47,12 @@
 
 
 %changelog
-* Sun Sep 28 2008 Carl Byington <carl@five-ten-sg.com> - 0.6.20-1
+* Thu Oct 02 2008 Carl Byington <carl@five-ten-sg.com> - 0.6.20-1
 - add configure option --enable-dii=no to remove dependency on libgd.
 - many fixes in pst2ldif by Robert Harris.
+- add -D option to include deleted items, from Justin Greer
+- fix from Justin Greer to add missing email headers
+- fix from Justin Greer for my_stristr()
 
 * Sun Sep 14 2008 Carl Byington <carl@five-ten-sg.com> - 0.6.19-1
 - Fix base64 encoding that could create long lines.
--- a/regression/regression-tests.bash	Sun Sep 28 17:08:52 2008 -0700
+++ b/regression/regression-tests.bash	Thu Oct 02 15:29:36 2008 -0700
@@ -1,72 +1,54 @@
 #!/bin/bash
 
+
+function dodii()
+{
+    n="$1"
+    fn="$2"
+    rm -rf output$n
+    mkdir output$n
+    $val ../src/pst2dii -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output$n -O mydii$n -d dumper $fn >$fn.dii.err 2>&1
+         ../src/readpstlog -f I dumper >$fn.log
+    rm -f dumper
+}
+
+
+function dopst()
+{
+    n="$1"
+    fn="$2"
+    rm -rf output$n
+    mkdir output$n
+    $val ../src/readpst -cv -o output$n -d dumper $fn >$fn.pst.err 2>&1
+         ../src/readpstlog -f I dumper >$fn.log
+    $val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $fn >$fn.ldif.err 2>&1
+    rm -f dumper
+}
+
+
+
 val="valgrind --leak-check=full"
-#val=''
+val=''
 
 pushd ..
 make || exit
 popd
 
-for i in {1..13}; do
-    rm -rf output$i
-    mkdir output$i
-done
-
 if [ "$1" == "dii" ]; then
-    hash=$(md5sum ams.pst)
-    pre="$hash
-    bates-"
-    $val  ../src/pst2dii  -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "$pre" -o output1 -O mydii -d dumper ams.pst
-          ../src/readpstlog -f I dumper >ams.log
-    $val  ../src/pst2dii  -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output2 -O mydii2 -d dumper sample_64.pst
-          ../src/readpstlog -f I dumper >sample_64.log
-    $val  ../src/pst2dii  -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output3 -O mydii3 -d dumper test.pst
-          ../src/readpstlog -f I dumper >test.log
-          ../src/pst2dii  -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output4 -O mydii4 -d dumper big_mail.pst
-          ../src/readpstlog -f I dumper >big_mail.log
-    exit
+    dodii 1 ams.pst
+    dodii 2 sample_64.pst
+    dodii 3 test.pst
+    dodii 4 big_mail.pst
+else
+    dopst  1 ams.pst
+    dopst  2 sample_64.pst
+    dopst  3 test.pst
+    dopst  4 big_mail.pst
+    dopst  5 mbmg.archive.pst
+    dopst  6 Single2003-read.pst
+    dopst  7 Single2003-unread.pst
+    dopst  8 ol2k3high.pst
+    dopst  9 ol97high.pst
+    dopst 10 returned_message.pst
 fi
 
-$val  ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' ams.pst >ams.err  2>&1
-exit
-
-$val  ../src/readpst -cv    -o output1 -d dumper ams.pst             >out1.err 2>&1
-      ../src/readpstlog -f I dumper >ams1.log
-
-$val  ../src/readpst -cl -r -o output2 ams.pst                       >out2.err 2>&1
-$val  ../src/readpst -S     -o output3 ams.pst                       >out3.err 2>&1
-$val  ../src/readpst -M     -o output4 -d dumper ams.pst             >out4.err 2>&1
-      ../src/readpstlog -f I dumper >ams.log
-
-$val  ../src/readpst        -o output5 -d dumper mbmg.archive.pst    >out5.err 2>&1
-      ../src/readpstlog -f I dumper >mbmg.archive.log
-
-$val  ../src/readpst        -o output6 -d dumper test.pst            >out6.err 2>&1
-      ../src/readpstlog -f I dumper >test.log
-
-$val  ../src/readpst -cv    -o output7 -d dumper sample_64.pst       >out7.err 2>&1
-      ../src/readpstlog -f I dumper >sample_64.log
-
-#$val  ../src/readpst -cv    -o output8 -d dumper big_mail.pst        >out8.err 2>&1
-#      ../src/readpstlog -f I dumper >big_mail.log
-
-$val  ../src/readpst -cv    -o output9 -d dumper Single2003-read.pst >out9.err 2>&1
-      ../src/readpstlog -f I dumper >Single2003-read.log
-
-$val  ../src/readpst -cv    -o output10 -d dumper Single2003-unread.pst >out10.err 2>&1
-      ../src/readpstlog -f I dumper >Single2003-unread.log
-
-$val  ../src/readpst -cv    -o output11 -d dumper ol2k3high.pst      >out11.err 2>&1
-      ../src/readpstlog -f I dumper >ol2k3high.log
-
-$val  ../src/readpst -cv    -o output12 -d dumper ol97high.pst       >out12.err 2>&1
-      ../src/readpstlog -f I dumper >ol97high.log
-
-$val  ../src/readpst -cv    -o output13 -d dumper returned_message.pst >out13.err 2>&1
-      ../src/readpstlog -f I dumper >returned_message.log
-
-$val  ../src/lspst -d dumper ams.pst                                 >out14.err 2>&1
-      ../src/readpstlog -f I dumper >ams.log
-
-rm -f dumper
-
--- a/src/libpst.c	Sun Sep 28 17:08:52 2008 -0700
+++ b/src/libpst.c	Thu Oct 02 15:29:36 2008 -0700
@@ -874,7 +874,7 @@
                 lostfound_ptr = lostfound_ptr->next;
             }
             if (!lostfound_ptr) {
-                DEBUG_WARN(("ERROR -- cannot find parent with id %#"PRIx64". Adding to lost/found\n", parent_id));
+                DEBUG_WARN(("ERROR -- cannot find parent with id %#"PRIx64". Adding id %#"PRIx64" to lost/found\n", parent_id, d_ptr->id));
                 lostfound_ptr = (struct cache_list_node*) xmalloc(sizeof(struct cache_list_node));
                 lostfound_ptr->prev   = NULL;
                 lostfound_ptr->next   = lostfound_head;
@@ -883,7 +883,7 @@
                 lostfound_head = lostfound_ptr;
             } else {
                 parent = lostfound_ptr->ptr;
-                DEBUG_INDEX(("Found parent (%#"PRIx64") in Lost and Found\n", parent->id));
+                DEBUG_INDEX(("Found parent (%#"PRIx64") in lost/found\n", parent->id));
             }
         }
 
@@ -1062,7 +1062,7 @@
             lostfound_shd = NULL;
             while (lostfound_ptr) {
                 if (lostfound_ptr->parent == d_ptr->id) {
-                    DEBUG_INDEX(("Found a child  (%#"PRIx64") of the current record. Joining to main structure.\n", lostfound_ptr->ptr->id));
+                    DEBUG_INDEX(("Found a lost/found child (%#"PRIx64") of the current record. Joining to main structure.\n", lostfound_ptr->ptr->id));
                     parent = d_ptr;
                     d_ptr = lostfound_ptr->ptr;
                     parent->no_child++;
@@ -1132,7 +1132,7 @@
         // free the lost and found
         while (lostfound_head) {
             lostfound_ptr = lostfound_head->next;
-            WARN(("unused lost/found item with parent %#"PRIx64"))", lostfound_head->parent));
+            WARN(("unused lost/found item %#"PRIx64" with parent %#"PRIx64, lostfound_head->parent, lostfound_head->ptr->id));
             free(lostfound_head);
             lostfound_head = lostfound_ptr;
         }
--- a/src/readpst.c	Sun Sep 28 17:08:52 2008 -0700
+++ b/src/readpst.c	Thu Oct 02 15:29:36 2008 -0700
@@ -91,6 +91,10 @@
 #define CMODE_VCARD 0
 #define CMODE_LIST  1
 
+// output mode for deleted items
+#define DMODE_EXCLUDE 0
+#define DMODE_INCLUDE 1
+
 // output settings for RTF bodies
 // filename for the attachment
 #define RTF_ATTACH_NAME "rtf-body.rtf"
@@ -102,6 +106,7 @@
 int mode_MH = 0;
 int output_mode = OUTPUT_NORMAL;
 int contact_mode = CMODE_VCARD;
+int deleted_mode = DMODE_EXCLUDE;
 int overwrite = 0;
 int save_rtf_body = 1;
 pst_file pstfile;
@@ -133,7 +138,7 @@
                 DEBUG_EMAIL(("item->email->subject->subj = %p\n", item->email->subject->subj));
             }
             if (item) {
-                if (item->folder && d_ptr->child && strcasecmp(item->file_as, "Deleted Items")) {
+                if (item->folder && d_ptr->child && (deleted_mode == DMODE_INCLUDE || strcasecmp(item->file_as, "Deleted Items"))) {
                     //if this is a non-empty folder other than deleted items, we want to recurse into it
                     if (output_mode != OUTPUT_QUIET) printf("Processing Folder \"%s\"\n", item->file_as);
                     process(item, d_ptr->child);
@@ -223,7 +228,7 @@
     prog_name = argv[0];
 
     // command-line option handling
-    while ((c = getopt(argc, argv, "bCc:d:hko:qrSMVw"))!= -1) {
+    while ((c = getopt(argc, argv, "bCc:Dd:hko:qrSMVw"))!= -1) {
         switch (c) {
         case 'b':
             save_rtf_body = 0;
@@ -241,6 +246,9 @@
                 exit(0);
             }
             break;
+        case 'D':
+            deleted_mode = DMODE_INCLUDE;
+            break;
         case 'd':
             d_log = optarg;
             break;
@@ -413,6 +421,7 @@
     printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name);
     printf("OPTIONS:\n");
     printf("\t-C\t- Decrypt (compressible encryption) the entire file and output on stdout (not typically useful)\n");
+    printf("\t-D\t- Include deleted items in output\n");
     printf("\t-M\t- MH. Write emails in the MH format\n");
     printf("\t-S\t- Separate. Write emails in the separate format\n");
     printf("\t-V\t- Version. Display program version\n");
@@ -653,6 +662,8 @@
         x++; // advance the search in the haystack
     }
     DEBUG_RET();
+    // If the haystack ended before our search finished, it's not a match.
+    if (*y != '\0') return NULL;
     return z;
 }
 
@@ -790,6 +801,8 @@
     time_t em_time;
     char *c_time;
     pst_item_attach* current_attach;
+    int has_from, has_subject, has_to, has_cc, has_bcc, has_date;
+    has_from = has_subject = has_to = has_cc = has_bcc = has_date = 0;
     DEBUG_ENT("write_normal_email");
 
     // convert the sent date if it exists, or set it to a fixed date
@@ -860,18 +873,44 @@
                 DEBUG_WARN(("found a ':' during the my_stristr, but not after that..\n"));
             }
         }
+
+        // Check if the header block has all the necessary headers.
+        if (my_stristr(item->email->header, "\nFrom:") || (strncasecmp(item->email->header, "From: ", 6) == 0) || my_stristr(item->email->header, "\nX-From:")) {
+            DEBUG_EMAIL(("header block has From header\n"));
+            has_from = 1;
+        }
+        if (my_stristr(item->email->header, "\nTo:") || (strncasecmp(item->email->header, "To: ", 4) == 0)) {
+            DEBUG_EMAIL(("header block has To header\n"));
+            has_to = 1;
+        }
+        if (my_stristr(item->email->header, "\nSubject:") || (strncasecmp(item->email->header, "Subject: ", 9) == 0)) {
+            DEBUG_EMAIL(("header block has Subject header\n"));
+            has_subject = 1;
+        }
+        if (my_stristr(item->email->header, "\nDate:") || (strncasecmp(item->email->header, "Date: ", 6) == 0)) {
+            DEBUG_EMAIL(("header block has Date header\n"));
+            has_date = 1;
+        }
+        if (my_stristr(item->email->header, "\nCC:") || (strncasecmp(item->email->header, "CC: ", 4) == 0)) {
+            DEBUG_EMAIL(("header block has CC header\n"));
+            has_cc = 1;
+        }
+        if (my_stristr(item->email->header, "\nBCC:") || (strncasecmp(item->email->header, "BCC: ", 5) == 0)) {
+            DEBUG_EMAIL(("header block has BCC header\n"));
+            has_bcc = 1;
+        }
     }
 
     if (!boundary && (item->attach || (item->email->body && item->email->htmlbody)
                  || item->email->rtf_compressed || item->email->encrypted_body
                  || item->email->encrypted_htmlbody)) {
-      // we need to create a boundary here.
-      DEBUG_EMAIL(("must create own boundary. oh dear.\n"));
-      boundary = malloc(50 * sizeof(char)); // allow 50 chars for boundary
-      boundary[0] = '\0';
-      sprintf(boundary, "--boundary-LibPST-iamunique-%i_-_-", rand());
-      DEBUG_EMAIL(("created boundary is %s\n", boundary));
-      boundary_created = 1;
+        // we need to create a boundary here.
+        DEBUG_EMAIL(("must create own boundary. oh dear.\n"));
+        boundary = malloc(50 * sizeof(char)); // allow 50 chars for boundary
+        boundary[0] = '\0';
+        sprintf(boundary, "--boundary-LibPST-iamunique-%i_-_-", rand());
+        DEBUG_EMAIL(("created boundary is %s\n", boundary));
+        boundary_created = 1;
     }
 
     DEBUG_EMAIL(("About to print Header\n"));
@@ -895,6 +934,39 @@
             temp[1] = '\0'; // stop after first \n
         }
 
+        // Write out any fields that weren't included in the header.
+        if (!has_from) {
+            temp = item->email->outlook_sender;
+            if (!temp) temp = "";
+            fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp);
+        }
+
+        if (!has_subject) {
+            if (item->email->subject && item->email->subject->subj) {
+                fprintf(f_output, "Subject: %s\n", item->email->subject->subj);
+            } else {
+                fprintf(f_output, "Subject: \n");
+            }
+        }
+
+        if (!has_to && item->email->sentto_address) {
+            fprintf(f_output, "To: %s\n", item->email->sentto_address);
+        }
+
+        if (!has_cc && item->email->cc_address) {
+            fprintf(f_output, "Cc: %s\n", item->email->cc_address);
+        }
+
+        if (!has_bcc && item->email->bcc_address) {
+            fprintf(f_output, "Bcc: %s\n", item->email->bcc_address);
+        }
+
+        if (!has_date && item->email->sent_date) {
+            char c_time[C_TIME_SIZE];
+            strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time));
+            fprintf(f_output, "Date: %s\n", c_time);
+        }
+
         // Now, write out the header...
         soh = skip_header_prologue(item->email->header);
         if (mode != MODE_SEPARATE) {
@@ -925,13 +997,16 @@
         if (!temp) temp = "";
         fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp);
 
-        if (item->email->subject) {
+        if (item->email->subject && item->email->subject->subj) {
             fprintf(f_output, "Subject: %s\n", item->email->subject->subj);
         } else {
             fprintf(f_output, "Subject: \n");
         }
 
-        fprintf(f_output, "To: %s\n", item->email->sentto_address);
+        if (item->email->sentto_address) {
+            fprintf(f_output, "To: %s\n", item->email->sentto_address);
+        }
+
         if (item->email->cc_address) {
             fprintf(f_output, "Cc: %s\n", item->email->cc_address);
         }
--- a/xml/libpst.in	Sun Sep 28 17:08:52 2008 -0700
+++ b/xml/libpst.in	Thu Oct 02 15:29:36 2008 -0700
@@ -52,6 +52,7 @@
             <cmdsynopsis>
                 <command>readpst</command>
                 <arg><option>-C</option></arg>
+                <arg><option>-D</option></arg>
                 <arg><option>-M</option></arg>
                 <arg><option>-S</option></arg>
                 <arg><option>-V</option></arg>
@@ -86,6 +87,12 @@
                     </para></listitem>
                 </varlistentry>
                 <varlistentry>
+                    <term>-D</term>
+                    <listitem><para>
+                        Include deleted items in the output.
+                    </para></listitem>
+                </varlistentry>
+                <varlistentry>
                     <term>-M</term>
                     <listitem><para>
                         Output messages in MH format as separate files.  This will create