changeset 239:aa50c23a6935

patch from Lee Ayres to add file name extensions in separate mode; allow mixed items types in a folder in separate mode
author Carl Byington <carl@five-ten-sg.com>
date Mon, 14 Sep 2009 10:56:39 -0700 (2009-09-14)
parents 410b6422d65b
children 75a27d2add60
files AUTHORS ChangeLog NEWS configure.in regression/regression-tests.bash src/readpst.c xml/libpst.in
diffstat 7 files changed, 75 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/AUTHORS	Sun Sep 13 10:49:12 2009 -0700
+++ b/AUTHORS	Mon Sep 14 10:56:39 2009 -0700
@@ -30,6 +30,7 @@
     bharder <bharder@methodlogic.net>
     Chris White <chris@soniannetworks.com>
     Roberto Polli <robipolli@gmail.com>
+    Lee Ayres <ayres@interhack.com>
 
 
 Testing team:
--- a/ChangeLog	Sun Sep 13 10:49:12 2009 -0700
+++ b/ChangeLog	Mon Sep 14 10:56:39 2009 -0700
@@ -1,6 +1,8 @@
 LibPST 0.6.44 (2009-xx-xx)
 ===============================
     * fix --help usage; readpstlog is gone, debug files are now ascii text.
+    * patch from Lee Ayres to add file name extensions in separate mode.
+    * allow mixed items types in a folder in separate mode.
 
 LibPST 0.6.43 (2009-09-12)
 ===============================
--- a/NEWS	Sun Sep 13 10:49:12 2009 -0700
+++ b/NEWS	Mon Sep 14 10:56:39 2009 -0700
@@ -1,3 +1,4 @@
+0.6.44  2009-09-14 patch from Lee Ayres to add file name extensions in separate mode
 0.6.43  2009-09-12 patches from Justin Greer, Chris White, Roberto Polli; better rfc822 embedded message decoding
 0.6.42  2009-09-03 patch from Fridrich Strba to build with DJGPP DOS cross-compiler
 0.6.41  2009-06-23 fix ax_python detection - should not use locate command
--- a/configure.in	Sun Sep 13 10:49:12 2009 -0700
+++ b/configure.in	Mon Sep 14 10:56:39 2009 -0700
@@ -1,5 +1,5 @@
 AC_PREREQ(2.59)
-AC_INIT(libpst,0.6.43,carl@five-ten-sg.com)
+AC_INIT(libpst,0.6.44,carl@five-ten-sg.com)
 AC_CONFIG_SRCDIR([src/libpst.c])
 AC_CONFIG_HEADER([config.h])
 AM_INIT_AUTOMAKE
--- a/regression/regression-tests.bash	Sun Sep 13 10:49:12 2009 -0700
+++ b/regression/regression-tests.bash	Mon Sep 14 10:56:39 2009 -0700
@@ -20,11 +20,11 @@
 {
     n="$1"
     fn="$2"
-    echo $fn
     ba=$(basename "$fn" .pst)
     size=$(stat -c %s $fn)
     rm -rf output$n
     if [ -z "$val" ] || [ $size -lt 10000000 ]; then
+        echo $fn
         mkdir output$n
         $val ../src/pst2dii -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output$n -O $ba.mydii -d $fn.log $fn >$fn.dii.err 2>&1
     fi
@@ -35,11 +35,11 @@
 {
     n="$1"
     fn="$2"
-    echo $fn
     ba=$(basename "$fn" .pst)
     size=$(stat -c %s $fn)
     rm -rf output$n
     if [ -z "$val" ] || [ $size -lt 10000000 ]; then
+        echo $fn
         mkdir output$n
         $val ../src/pst2ldif -d $ba.ldif.log -b 'o=ams-cc.com, c=US' -c 'inetOrgPerson' $fn >$ba.ldif.err 2>&1
     fi
@@ -50,20 +50,30 @@
 {
     n="$1"
     fn="$2"
-    echo $fn
     ba=$(basename "$fn" .pst)
     size=$(stat -c %s $fn)
     jobs=""
     [ -n "$val" ] && jobs="-j 0"
     rm -rf output$n
     if [ -z "$val" ] || [ $size -lt 10000000 ]; then
+        echo $fn
         mkdir output$n
         if [ "$regression" == "yes" ]; then
             $val ../src/readpst $jobs -te -r -cv -o output$n $fn >$ba.err 2>&1
         else
-            #val ../src/readpst $jobs -r -D -cv -o output$n            $fn
-            $val ../src/readpst $jobs -te -r -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
-            #$val ../src/readpst $jobs -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
+            ## only email and include deleted items, have a deleted items folder with multiple item types
+            #$val ../src/readpst $jobs -te -r -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
+
+            ## normal recursive dump
+            #$val ../src/readpst $jobs     -r    -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
+
+             # separate mode with filename extensions
+             $val ../src/readpst $jobs     -r -e -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
+
+            ## separate mode where we decode all attachments to binary files
+            #$val ../src/readpst $jobs     -r -S -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
+
+            ## testing idblock
             #../src/getidblock -p $fn 0 >$ba.fulldump
         fi
     fi
--- a/src/readpst.c	Sun Sep 13 10:49:12 2009 -0700
+++ b/src/readpst.c	Mon Sep 14 10:56:39 2009 -0700
@@ -11,7 +11,7 @@
 #define OUTPUT_TEMPLATE "%s"
 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory"
 #define KMAIL_INDEX ".%s.index"
-#define SEP_MAIL_FILE_TEMPLATE "%i"
+#define SEP_MAIL_FILE_TEMPLATE "%i%s"
 
 // max size of the c_time char*. It will store the date of the email
 #define C_TIME_SIZE 500
@@ -39,7 +39,7 @@
 int       close_recurse_dir();
 char*     mk_separate_dir(char *dir);
 int       close_separate_dir();
-int       mk_separate_file(struct file_ll *f);
+int       mk_separate_file(struct file_ll *f, char *extension);
 char*     my_stristr(char *haystack, char *needle);
 void      check_filename(char *fname);
 void      write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst);
@@ -81,7 +81,7 @@
 
 // separate mode creates the same directory structure as recurse. The emails are stored in
 // separate files, numbering from 1 upward. Attachments belonging to the emails are
-// saved as email_no-filename (e.g. 1-samplefile.doc or 000001-Attachment2.zip)
+// saved as email_no-filename (e.g. 1-samplefile.doc or 1-Attachment2.zip)
 #define MODE_SEPARATE 3
 
 
@@ -118,6 +118,7 @@
 // global settings
 int         mode         = MODE_NORMAL;
 int         mode_MH      = 0;   // a submode of MODE_SEPARATE
+int         mode_EX      = 0;   // a submode of MODE_SEPARATE
 int         mode_thunder = 0;   // a submode of MODE_RECURSE
 int         output_mode  = OUTPUT_NORMAL;
 int         contact_mode = CMODE_VCARD;
@@ -126,6 +127,7 @@
 int         contact_mode_specified = 0;
 int         overwrite = 0;
 int         save_rtf_body = 1;
+int         file_name_len = 10;     // enough room for MODE_SPEARATE file name
 pst_file    pstfile;
 regex_t     meta_charset_pattern;
 
@@ -281,13 +283,13 @@
             }
             else {
                 if (!ff.type) ff.type = item->type;
-                if (ff.type != PST_TYPE_CONTACT) {
+                if ((ff.type != PST_TYPE_CONTACT) && (mode != MODE_SEPARATE)) {
                     ff.skip_count++;
                     DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type));
                 }
                 else {
                     ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff);
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".vcf" : "");
                     if (contact_mode == CMODE_VCARD) {
                         pst_convert_utf8_null(item, &item->comment);
                         write_vcard(ff.output, item, item->contact, item->comment.str);
@@ -308,14 +310,14 @@
             }
             else {
                 if (!ff.type) ff.type = item->type;
-                if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) {
+                if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT) && (mode != MODE_SEPARATE)) {
                     ff.skip_count++;
                     DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type));
                 }
                 else {
                     char *extra_mime_headers = NULL;
                     ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff);
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".eml" : "");
                     write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers);
                 }
             }
@@ -328,13 +330,13 @@
             }
             else {
                 if (!ff.type) ff.type = item->type;
-                if (ff.type != PST_TYPE_JOURNAL) {
+                if ((ff.type != PST_TYPE_JOURNAL) && (mode != MODE_SEPARATE)) {
                     ff.skip_count++;
                     DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type));
                 }
                 else {
                     ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff);
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "");
                     write_journal(ff.output, item);
                     fprintf(ff.output, "\n");
                 }
@@ -348,14 +350,14 @@
             }
             else {
                 if (!ff.type) ff.type = item->type;
-                if (ff.type != PST_TYPE_APPOINTMENT) {
+                if ((ff.type != PST_TYPE_APPOINTMENT) && (mode != MODE_SEPARATE)) {
                     ff.skip_count++;
                     DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type));
                 }
                 else {
                     ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff);
-                    write_appointment(ff.output, item, 0);
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "");
+                    write_schedule_part_data(ff.output, item, NULL, NULL);
                     fprintf(ff.output, "\n");
                 }
             }
@@ -396,7 +398,7 @@
     }
 
     // command-line option handling
-    while ((c = getopt(argc, argv, "bc:Dd:hj:kMo:qrSt:uVw"))!= -1) {
+    while ((c = getopt(argc, argv, "bc:Dd:ehj:kMo:qrSt:uVw"))!= -1) {
         switch (c) {
         case 'b':
             save_rtf_body = 0;
@@ -435,6 +437,13 @@
         case 'M':
             mode = MODE_SEPARATE;
             mode_MH = 1;
+            mode_EX = 0;
+            break;
+        case 'e':
+            mode = MODE_SEPARATE;
+            mode_MH = 1;
+            mode_EX = 1;
+            file_name_len = 14;
             break;
         case 'o':
             output_dir = optarg;
@@ -449,6 +458,7 @@
         case 'S':
             mode = MODE_SEPARATE;
             mode_MH = 0;
+            mode_EX = 0;
             break;
         case 't':
             // email, appointment, contact, other
@@ -642,11 +652,12 @@
     printf("OPTIONS:\n");
     printf("\t-V\t- Version. Display program version\n");
     printf("\t-D\t- Include deleted items in output\n");
-    printf("\t-M\t- MH. Write emails in the MH format\n");
+    printf("\t-M\t- Write emails in the MH (rfc822) format\n");
     printf("\t-S\t- Separate. Write emails in the separate format\n");
     printf("\t-b\t- Don't save RTF-Body attachments\n");
     printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n");
     printf("\t-d <filename> \t- Debug to file.\n");
+    printf("\t-e\t- As with -M, but include extensions on output files\n");
     printf("\t-h\t- Help. This screen\n");
     printf("\t-j <integer>\t- Number of parallel jobs to run\n");
     printf("\t-k\t- KMail. Output in kmail format\n");
@@ -798,7 +809,7 @@
         if (y == 0)
             snprintf(dir_name, dirsize, "%s", dir);
         else
-            snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y); // enough for 9 digits allocated above
+      snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y, ""); // enough for 9 digits allocated above
 
         check_filename(dir_name);
         DEBUG_INFO(("about to try creating %s\n", dir_name));
@@ -858,14 +869,14 @@
 }
 
 
-int mk_separate_file(struct file_ll *f) {
+int mk_separate_file(struct file_ll *f, char *extension) {
     const int name_offset = 1;
     DEBUG_ENT("mk_separate_file");
     DEBUG_INFO(("opening next file to save email\n"));
     if (f->item_count > 999999999) { // bigger than nine 9's
         DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n"));
     }
-    sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset);
+    sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset, extension);
     if (f->output) fclose(f->output);
     f->output = NULL;
     check_filename(f->name);
@@ -1265,9 +1276,9 @@
     fprintf(f_output, "BEGIN:VCALENDAR\n");
     fprintf(f_output, "VERSION:2.0\n");
     fprintf(f_output, "PRODID:LibPST v%s\n", VERSION);
-    fprintf(f_output, "METHOD:%s\n", method);
+    if (method) fprintf(f_output, "METHOD:%s\n", method);
     fprintf(f_output, "BEGIN:VEVENT\n");
-    fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender);
+    if (sender) fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender);
     write_appointment(f_output, item, 1);
     fprintf(f_output, "END:VCALENDAR\n");
 }
@@ -1912,8 +1923,8 @@
     } else if (mode == MODE_SEPARATE) {
         // do similar stuff to recurse here.
         mk_separate_dir(item->file_as.str);
-        f->name = (char*) pst_malloc(10);
-        memset(f->name, 0, 10);
+        f->name = (char*) pst_malloc(file_name_len);
+        memset(f->name, 0, file_name_len);
     } else {
         f->name = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1);
         sprintf(f->name, OUTPUT_TEMPLATE, item->file_as.str);
--- a/xml/libpst.in	Sun Sep 13 10:49:12 2009 -0700
+++ b/xml/libpst.in	Mon Sep 14 10:56:39 2009 -0700
@@ -35,7 +35,7 @@
 
     <refentry id="readpst.1">
         <refentryinfo>
-            <date>2009-09-12</date>
+            <date>2009-09-14</date>
         </refentryinfo>
 
         <refmeta>
@@ -60,6 +60,7 @@
                 <arg><option>-b</option></arg>
                 <arg><option>-c <replaceable class="parameter">format</replaceable></option></arg>
                 <arg><option>-d <replaceable class="parameter">debug-file</replaceable></option></arg>
+                <arg><option>-e</option></arg>
                 <arg><option>-h</option></arg>
                 <arg><option>-j <replaceable class="parameter">jobs</replaceable></option></arg>
                 <arg><option>-k</option></arg>
@@ -93,7 +94,7 @@
                 <varlistentry>
                     <term>-M</term>
                     <listitem><para>
-                        Output messages in MH format as separate files.  This will create
+                        Output messages in MH (rfc822) format as separate files.  This will create
                         folders as named in the PST file, and will put each email together with
                         any attachments into its own file.  These files will be numbered from 1
                         to n with no leading zeros.
@@ -102,12 +103,14 @@
                 <varlistentry>
                     <term>-S</term>
                     <listitem><para>
-                        Output messages into separate files.  This will create folders as named
-                        in the PST file, and will put each email in its own file.  These files
-                        will be numbered from 1 increasing in intervals of 1 (ie 1, 2, 3, ...).
-                        Any attachments are saved alongside each email as XXXXXXXXX-attach1,
-                        XXXXXXXXX-attach2 and so on, or with the name of the attachment if one
-                        is present.
+                        Output messages into separate files.  This will create folders as
+                        named in the PST file, and will put each email in its own file.  These
+                        files will be numbered from 1 to n with no leading zeros. So the email
+                        contents for message $m are saved in a file named $m.  Attachments
+                        will also be saved in the same folder as the email message. The
+                        attachments for message $m are saved as $m-$name where $name is (the
+                        original name of the attachment, or 'attach$n' if the attachment had
+                        no name), where $n is another sequential index with no leading zeros.
                     </para></listitem>
                 </varlistentry>
                 <varlistentry>
@@ -136,6 +139,13 @@
                     </para></listitem>
                 </varlistentry>
                 <varlistentry>
+                    <term>-e</term>
+                    <listitem><para>
+                        Same as the M option, but each output file will include an extension
+                        from (.eml, .ics, .vcf).
+                    </para></listitem>
+                </varlistentry>
+                <varlistentry>
                     <term>-h</term>
                     <listitem><para>
                         Show summary of options and exit.
@@ -245,7 +255,7 @@
 
     <refentry id="lspst.1">
         <refentryinfo>
-            <date>2009-09-12</date>
+            <date>2009-09-14</date>
         </refentryinfo>
 
         <refmeta>
@@ -340,7 +350,7 @@
 
     <refentry id="pst2ldif.1">
         <refentryinfo>
-            <date>2009-09-12</date>
+            <date>2009-09-14</date>
         </refentryinfo>
 
         <refmeta>
@@ -508,7 +518,7 @@
 
     <refentry id="pst2dii.1">
         <refentryinfo>
-            <date>2009-09-12</date>
+            <date>2009-09-14</date>
         </refentryinfo>
 
         <refmeta>
@@ -641,7 +651,7 @@
 
     <refentry id="pst.5">
         <refentryinfo>
-            <date>2009-09-12</date>
+            <date>2009-09-14</date>
         </refentryinfo>
 
         <refmeta>