changeset 201:3850a3b11745

fixes for parallel readpst
author Carl Byington <carl@five-ten-sg.com>
date Sat, 16 May 2009 10:32:26 -0700
parents d360f96f71f6
children 2f38c4ce606f
files TODO configure.in python/python-libpst.cpp regression/regression-tests.bash src/define.h src/libpst.c src/libpst.h src/readpst.c xml/libpst.in
diffstat 9 files changed, 205 insertions(+), 120 deletions(-) [+]
line wrap: on
line diff
--- a/TODO	Wed May 13 20:06:53 2009 -0700
+++ b/TODO	Sat May 16 10:32:26 2009 -0700
@@ -5,5 +5,8 @@
 pst2diii needs header and mime type updates from readpst.
 
 At the next soname bump (to libpst.so.5) we should
+    move some of readpst into the shared library, in particular write_normal_email()
     remove readpstlog, and produce ascii debug log files
-    move some of readpst into the shared library, in particular write_normal_email()
+    add debug FILE* into the pst_file structure, so all debug printing
+        will be passed the pst_file struct. Also add a semaphore in there
+        to synchronize debug printing.
--- a/configure.in	Wed May 13 20:06:53 2009 -0700
+++ b/configure.in	Sat May 16 10:32:26 2009 -0700
@@ -133,7 +133,7 @@
     )
 AC_HEADER_DIRENT
 AC_HEADER_STDC
-AC_CHECK_HEADERS([ctype.h dirent.h errno.h fcntl.h inttypes.h limits.h regex.h semaphore.h signal.h stdarg.h stdint.h stdio.h stdlib.h string.h sys/param.h sys/stat.h sys/types.h time.h unistd.h wchar.h])
+AC_CHECK_HEADERS([ctype.h dirent.h errno.h fcntl.h inttypes.h limits.h regex.h semaphore.h signal.h stdarg.h stdint.h stdio.h stdlib.h string.h sys/param.h sys/shm.h sys/stat.h sys/types.h time.h unistd.h wchar.h])
 AC_SEARCH_LIBS([sem_init],rt)
 
 
@@ -161,7 +161,7 @@
 fi
 AC_FUNC_STRFTIME
 AC_FUNC_VPRINTF
-AC_CHECK_FUNCS([memchr memmove memset regcomp strcasecmp strncasecmp strchr strdup strerror strpbrk strrchr strstr strtol])
+AC_CHECK_FUNCS([chdir getcwd memchr memmove memset regcomp strcasecmp strncasecmp strchr strdup strerror strpbrk strrchr strstr strtol])
 AM_ICONV
 if test "$am_cv_func_iconv" != "yes"; then
     AC_MSG_ERROR([libpst requires iconv which is missing])
@@ -285,11 +285,13 @@
 	enable_shared="yes"
     # check for boost
     AX_PYTHON
-    AX_BOOST_PYTHON
     if test "$ax_python_bin" = "no"; then
         AC_MSG_ERROR(python binary not found)
     fi
-
+    AX_BOOST_PYTHON
+    if test "$ac_cv_boost_python" = "no"; then
+        AC_MSG_ERROR(boost python not found)
+    fi
     AC_SUBST(PYTHON_VERSION, [$ax_python_bin])
 fi
 
--- a/python/python-libpst.cpp	Wed May 13 20:06:53 2009 -0700
+++ b/python/python-libpst.cpp	Sat May 16 10:32:26 2009 -0700
@@ -588,6 +588,8 @@
         ;
 
     class_<pst_file>("pst_file")
+        .def_readonly("cwd",         &pst_file::cwd)
+        .def_readonly("fname",       &pst_file::fname)
         .add_property("i_head",      make_getter(&pst_file::i_head, return_value_policy<reference_existing_object>()))
         .add_property("i_tail",      make_getter(&pst_file::i_tail, return_value_policy<reference_existing_object>()))
         .add_property("d_head",      make_getter(&pst_file::d_head, return_value_policy<reference_existing_object>()))
--- a/regression/regression-tests.bash	Wed May 13 20:06:53 2009 -0700
+++ b/regression/regression-tests.bash	Sat May 16 10:32:26 2009 -0700
@@ -54,8 +54,8 @@
     #    ../src/readpst -cv -o output$n $fn >$ba.err 2>&1
     #           readpst -cv -o output$n -d dumper $fn >$ba.err 2>&1
     $val ../src/readpst -r -D -cv -o output$n  $fn
-    #$val ../src/readpst -r -D -cv -o output$n -d dumper $fn >$ba.err 2>&1
-    #     ../src/readpstlog -f I dumper >$ba.log
+  # $val ../src/readpst -r -D -cv -o output$n -d dumper $fn >$ba.err 2>&1
+  #      ../src/readpstlog -f I dumper >$ba.log
 
     #../src/getidblock -d -p $fn 0 >$ba.fulldump
     #../src/readpstlog -f I getidblock.log >$ba.fulldump.log
@@ -117,15 +117,16 @@
     #dopst  13 test-text.pst
     #dopst  14 joe.romanowski.pst
     #dopst  15 hourig1.pst
-    ##dopst  16 hourig2.pst
-    dopst  17 hourig3.pst
+    dopst  16 hourig2.pst
+    #dopst  17 hourig3.pst
     #dopst  18 test-mac.pst
-    ##dopst  19 harris.pst
+    #dopst  19 harris.pst
     #dopst  20 spam.pst
     #dopst  21 rendgen.pst       # single email appointment
-    dopst  22 rendgen2.pst      # email appointment with no termination date
+    #dopst  22 rendgen2.pst      # email appointment with no termination date
     #dopst  23 rendgen3.pst      # mime signed email
-    dopst  24 rendgen4.pst      # appointment test cases
+    #dopst  24 rendgen4.pst      # appointment test cases
+    #dopst  25 rendgen5.pst      # appointment test cases
 fi
 
 grep 'lost:' *err | grep -v 'lost: 0 '
--- a/src/define.h	Wed May 13 20:06:53 2009 -0700
+++ b/src/define.h	Sat May 16 10:32:26 2009 -0700
@@ -136,6 +136,10 @@
     #include <sys/types.h>
 #endif
 
+#ifdef HAVE_SYS_SHM_H
+    #include <sys/shm.h>
+#endif
+
 #ifdef HAVE_SYS_WAIT_H
     #include <sys/wait.h>
 #endif
--- a/src/libpst.c	Wed May 13 20:06:53 2009 -0700
+++ b/src/libpst.c	Sat May 16 10:32:26 2009 -0700
@@ -253,7 +253,7 @@
 };
 
 static int              pst_build_desc_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val);
-static pst_id2_tree*      pst_build_id2(pst_file *pf, pst_index_ll* list);
+static pst_id2_tree*    pst_build_id2(pst_file *pf, pst_index_ll* list);
 static int              pst_build_id_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val);
 static int              pst_chr_count(char *str, char x);
 static size_t           pst_ff_compile_ID(pst_file *pf, uint64_t i_id, pst_holder *h, size_t size);
@@ -268,8 +268,8 @@
 static size_t           pst_getAtPos(pst_file *pf, int64_t pos, void* buf, size_t size);
 static int              pst_getBlockOffsetPointer(pst_file *pf, pst_id2_tree *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p);
 static int              pst_getBlockOffset(char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p);
-static pst_id2_tree*      pst_getID2(pst_id2_tree * ptr, uint64_t id);
-static pst_desc_tree*     pst_getDptr(pst_file *pf, uint64_t d_id);
+static pst_id2_tree*    pst_getID2(pst_id2_tree * ptr, uint64_t id);
+static pst_desc_tree*   pst_getDptr(pst_file *pf, uint64_t d_id);
 static uint64_t         pst_getIntAt(pst_file *pf, char *buf);
 static uint64_t         pst_getIntAtPos(pst_file *pf, int64_t pos);
 static pst_mapi_object* pst_parse_block(pst_file *pf, uint64_t block_id, pst_id2_tree *i2_head);
@@ -353,6 +353,20 @@
     DEBUG_INFO(("Pointer1 is %#"PRIx64", back pointer2 is %#"PRIx64"\n", pf->index1, pf->index1_back));
 
     DEBUG_RET();
+
+    pf->cwd = pst_malloc(PATH_MAX+1);
+    getcwd(pf->cwd, PATH_MAX+1);
+    pf->fname = strdup(name);
+    return 0;
+}
+
+
+int  pst_reopen(pst_file *pf) {
+    char cwd[PATH_MAX];
+    if (!getcwd(cwd, PATH_MAX))            return -1;
+    if (chdir(pf->cwd))                    return -1;
+    if (!freopen(pf->fname, "rb", pf->fp)) return -1;
+    if (chdir(cwd))                        return -1;
     return 0;
 }
 
@@ -365,13 +379,14 @@
     }
     if (fclose(pf->fp)) {
         DEBUG_WARN(("fclose returned non-zero value\n"));
-        DEBUG_RET();
-        return -1;
     }
+    // free the paths
+    free(pf->cwd);
+    free(pf->fname);
     // we must free the id linklist and the desc tree
-    pst_free_id (pf->i_head);
-    pst_free_desc (pf->d_head);
-    pst_free_xattrib (pf->x_head);
+    pst_free_id(pf->i_head);
+    pst_free_desc(pf->d_head);
+    pst_free_xattrib(pf->x_head);
     DEBUG_RET();
     return 0;
 }
@@ -2490,6 +2505,8 @@
                     LIST_COPY_CSTR(item->ascii_type);
                     if (pst_strincmp("IPF.Note", item->ascii_type, 8) == 0)
                         item->type = PST_TYPE_NOTE;
+                    if (pst_strincmp("IPF.Imap", item->ascii_type, 8) == 0)
+                        item->type = PST_TYPE_NOTE;
                     else if (pst_stricmp("IPF", item->ascii_type) == 0)
                         item->type = PST_TYPE_NOTE;
                     else if (pst_strincmp("IPF.Contact", item->ascii_type, 11) == 0)
@@ -4335,6 +4352,7 @@
         }
         if (i   <= s) { r->termination      = PST_LE_GET_UINT8(p+i) - 0x21;  i += 4; }
         if (i+4 <= s) { r->count            = PST_LE_GET_UINT32(p+i);        i += 4; }
+        if (r->termination == 2) r->count = 0;
         switch (r->type) {
             case 0: // daily
                 if (r->sub_type == 0) {
--- a/src/libpst.h	Wed May 13 20:06:53 2009 -0700
+++ b/src/libpst.h	Sat May 16 10:32:26 2009 -0700
@@ -850,6 +850,12 @@
 
 
 typedef struct pst_file {
+    /** file pointer to opened PST file */
+    FILE*   fp;
+    /** original cwd when the file was opened */
+    char*   cwd;
+    /** original file name when the file was opened */
+    char*   fname;
     /** the head and tail of the linked list of index structures */
     pst_index_ll *i_head, *i_tail;
     /** the head and tail of the top level of the descriptor tree */
@@ -872,8 +878,6 @@
     uint64_t index2;
     /** back pointer value in the first b-tree node in the descriptor tree */
     uint64_t index2_back;
-    /** file pointer to opened PST file */
-    FILE * fp;
     /** size of the pst file */
     uint64_t size;
     /** @li 0 PST_NO_ENCRYPT, none
@@ -895,7 +899,14 @@
  * @param name name of the file, suitable for fopen().
  * @return 0 if ok, -1 if error
  */
-int            pst_open(pst_file *pf, const char *name);
+int             pst_open(pst_file *pf, const char *name);
+
+
+/** Reopen the pst file after a fork
+ * @param pf   pointer to the pst_file structure setup by pst_open().
+ * @return 0 if ok, -1 if error
+ */
+int             pst_reopen(pst_file *pf);
 
 
 /** Load the index entries from the pst file. This loads both the
@@ -903,20 +914,20 @@
  *  first call after pst_open().
  * @param pf pointer to the pst_file structure setup by pst_open().
  */
-int            pst_load_index (pst_file *pf);
+int             pst_load_index (pst_file *pf);
 
 
 /** Load the extended attribute mapping table from the pst file. This
  *  should normally be the second call after pst_open().
  * @param pf pointer to the pst_file structure setup by pst_open().
  */
-int            pst_load_extended_attributes(pst_file *pf);
+int             pst_load_extended_attributes(pst_file *pf);
 
 
 /** Close a pst file.
  * @param pf pointer to the pst_file structure setup by pst_open().
  */
-int            pst_close(pst_file *pf);
+int             pst_close(pst_file *pf);
 
 
 /** Get the top of folders descriptor tree. This is the main descriptor tree
@@ -924,7 +935,7 @@
  * @param pf   pointer to the pst_file structure setup by pst_open().
  * @param root root item, which can be obtained by pst_parse_item(pf, pf->d.head, NULL).
  */
-pst_desc_tree* pst_getTopOfFolders(pst_file *pf, const pst_item *root);
+pst_desc_tree*  pst_getTopOfFolders(pst_file *pf, const pst_item *root);
 
 
 /** Assemble the binary attachment into a single buffer.
@@ -933,7 +944,7 @@
  * @return       structure containing size of and pointer to the buffer.
  *               the caller must free this buffer.
  */
-pst_binary     pst_attach_to_mem(pst_file *pf, pst_item_attach *attach);
+pst_binary      pst_attach_to_mem(pst_file *pf, pst_item_attach *attach);
 
 
 /** Write a binary attachment to a file.
@@ -941,7 +952,7 @@
  * @param attach pointer to the attachment record
  * @param fp     pointer to an open FILE.
  */
-size_t         pst_attach_to_file(pst_file *pf, pst_item_attach *attach, FILE* fp);
+size_t          pst_attach_to_file(pst_file *pf, pst_item_attach *attach, FILE* fp);
 
 
 /** Write a binary attachment base64 encoded to a file.
@@ -949,14 +960,14 @@
  * @param attach pointer to the attachment record
  * @param fp     pointer to an open FILE.
  */
-size_t         pst_attach_to_file_base64(pst_file *pf, pst_item_attach *attach, FILE* fp);
+size_t          pst_attach_to_file_base64(pst_file *pf, pst_item_attach *attach, FILE* fp);
 
 
 /** Walk the descriptor tree.
  * @param d pointer to the current item in the descriptor tree.
  * @return  pointer to the next item in the descriptor tree.
  */
-pst_desc_tree* pst_getNextDptr(pst_desc_tree* d);
+pst_desc_tree*  pst_getNextDptr(pst_desc_tree* d);
 
 
 /** Assemble a mapi object from a descriptor pointer.
@@ -966,13 +977,13 @@
  *               attached rfc822 messages, in which case it is attach->id2_head.
  * @return pointer to the mapi object. Must be free'd by pst_freeItem().
  */
-pst_item*      pst_parse_item (pst_file *pf, pst_desc_tree *d_ptr, pst_id2_tree *m_head);
+pst_item*       pst_parse_item (pst_file *pf, pst_desc_tree *d_ptr, pst_id2_tree *m_head);
 
 
 /** Free the item returned by pst_parse_item().
  * @param item  pointer to item returned from pst_parse_item().
  */
-void           pst_freeItem(pst_item *item);
+void            pst_freeItem(pst_item *item);
 
 
 /** Lookup the i_id in the index linked list, and return a pointer to the element.
@@ -980,7 +991,7 @@
  * @param i_id   key for the index linked list
  * @return pointer to the element, or NULL if not found.
  */
-pst_index_ll*  pst_getID(pst_file* pf, uint64_t i_id);
+pst_index_ll*   pst_getID(pst_file* pf, uint64_t i_id);
 
 
 /** Decrypt a block of data from the pst file.
@@ -993,7 +1004,7 @@
     @li 2 PST_ENCRYPT, german enigma 3 rotor cipher with fixed key
  * @return 0 if ok, -1 if error (NULL buffer or unknown encryption type)
  */
-int            pst_decrypt(uint64_t i_id, char *buf, size_t size, unsigned char type);
+int             pst_decrypt(uint64_t i_id, char *buf, size_t size, unsigned char type);
 
 
 /** Get an ID block from the file using pst_ff_getIDblock() and decrypt if necessary.
@@ -1003,7 +1014,7 @@
  *             If this pointer is non-NULL, it will first be free()d.
  * @return     Size of block read into memory
  */
-size_t         pst_ff_getIDblock_dec(pst_file *pf, uint64_t i_id, char **buf);
+size_t          pst_ff_getIDblock_dec(pst_file *pf, uint64_t i_id, char **buf);
 
 
 /** Read a block of data from the file into memory.
@@ -1013,7 +1024,7 @@
  *             If this pointer is non-NULL, it will first be free()d.
  * @return     size of block read into memory
  */
-size_t         pst_ff_getIDblock(pst_file *pf, uint64_t i_id, char** buf);
+size_t          pst_ff_getIDblock(pst_file *pf, uint64_t i_id, char** buf);
 
 
 /** fwrite with checking for null pointer.
@@ -1023,7 +1034,7 @@
  * @param stream output file
  * @return number of bytes written, zero if ptr==NULL
  */
-size_t         pst_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream);
+size_t          pst_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream);
 
 
 /** Add any necessary escape characters for rfc2426 vcard format
@@ -1033,7 +1044,7 @@
  *            to a different buffer containing the escaped string. In
  *            either case, you don't need to free this returned pointer.
  */
-char*          pst_rfc2426_escape(char *str);
+char*           pst_rfc2426_escape(char *str);
 
 
 /** Convert a FILETIME into rfc2425 date/time format 1953-10-15T23:10:00Z
@@ -1043,7 +1054,7 @@
  * @param[out] result  pointer to output buffer, must be at least 30 bytes
  * @return   time in rfc2425 format
  */
-char*          pst_rfc2425_datetime_format(const FILETIME* ft, int buflen, char* result);
+char*           pst_rfc2425_datetime_format(const FILETIME* ft, int buflen, char* result);
 
 
 /** Convert a FILETIME into rfc2445 date/time format 19531015T231000Z
@@ -1052,13 +1063,13 @@
  * @param[out] result  pointer to output buffer, must be at least 30 bytes
  * @return   time in rfc2445 format
  */
-char*          pst_rfc2445_datetime_format(const FILETIME* ft, int buflen, char* result);
+char*           pst_rfc2445_datetime_format(const FILETIME* ft, int buflen, char* result);
 
 
 /** Convert the current time rfc2445 date/time format 19531015T231000Z
  * @return   time in rfc2445 format
  */
-char*          pst_rfc2445_datetime_format_now(int buflen, char* result);
+char*           pst_rfc2445_datetime_format_now(int buflen, char* result);
 
 
 /** Get the default character set for this item. This is used to find
@@ -1066,21 +1077,21 @@
  * @param  item   pointer to the mapi item of interest
  * @return default character set as a string useable by iconv()
  */
-const char*    pst_default_charset(pst_item *item);
+const char*     pst_default_charset(pst_item *item);
 
 
 /** Convert str to utf8 if possible; null strings are preserved.
  * @param item  pointer to the containing mapi item
  * @param str   pointer to the mapi string of interest
  */
-void           pst_convert_utf8_null(pst_item *item, pst_string *str);
+void            pst_convert_utf8_null(pst_item *item, pst_string *str);
 
 
 /** Convert str to utf8 if possible; null strings are converted into empty strings.
  * @param item  pointer to the containing mapi item
  * @param str   pointer to the mapi string of interest
  */
-void           pst_convert_utf8(pst_item *item, pst_string *str);
+void            pst_convert_utf8(pst_item *item, pst_string *str);
 
 
 /** Decode raw recurrence data into a better structure.
@@ -1093,7 +1104,7 @@
 /** Free a recurrence structure.
  * @param r input pointer to be freed
  */
-void pst_free_recurrence(pst_recurrence* r);
+void            pst_free_recurrence(pst_recurrence* r);
 
 
 
--- a/src/readpst.c	Wed May 13 20:06:53 2009 -0700
+++ b/src/readpst.c	Sat May 16 10:32:26 2009 -0700
@@ -26,8 +26,8 @@
     int32_t type;
 };
 
-void      grim_reaper();
-pid_t     try_fork();
+int       grim_reaper();
+pid_t     try_fork(char* folder);
 void      process(pst_item *outeritem, pst_desc_tree *d_ptr);
 void      write_email_body(FILE *f, char *body);
 void      removeCR(char *c);
@@ -110,76 +110,84 @@
 #define RTF_ATTACH_TYPE "application/rtf"
 
 // global settings
-int mode         = MODE_NORMAL;
-int mode_MH      = 0;   // a submode of MODE_SEPARATE
-int output_mode  = OUTPUT_NORMAL;
-int contact_mode = CMODE_VCARD;
-int deleted_mode = DMODE_EXCLUDE;
-int contact_mode_specified = 0;
-int overwrite = 0;
-int save_rtf_body = 1;
-pst_file pstfile;
-regex_t  meta_charset_pattern;
+int         mode         = MODE_NORMAL;
+int         mode_MH      = 0;   // a submode of MODE_SEPARATE
+int         output_mode  = OUTPUT_NORMAL;
+int         contact_mode = CMODE_VCARD;
+int         deleted_mode = DMODE_EXCLUDE;
+int         contact_mode_specified = 0;
+int         overwrite = 0;
+int         save_rtf_body = 1;
+pst_file    pstfile;
+regex_t     meta_charset_pattern;
 
-int active_children;    // number of children of this process, cannot be larger than max_children
-int max_children;       // setup by main(), and at the start of new child process
-pid_t *child_processes; // setup by main(), and at the start of new child process
+int         number_processors = 1;  // number of cpus we have
+int         max_children  = 0;      // based on number of cpus and command line args
+int         max_child_specified = 0;// have command line arg -j
+int         active_children;        // number of children of this process, cannot be larger than max_children
+pid_t*      child_processes;        // setup by main(), and at the start of new child process
 
 #ifdef HAVE_SEMAPHORE_H
-sem_t global_children;
+int         shared_memory_id;
+sem_t*      global_children = NULL;
 #endif
 
 
-void grim_reaper(int waitall)
+int grim_reaper(int waitall)
+{
+    int available = 0;
+#ifdef HAVE_FORK
+#ifdef HAVE_SEMAPHORE_H
+    if (global_children) {
+        sem_getvalue(global_children, &available);
+        //printf("grim reaper %s for pid %d (parent %d) with %d children, %d available\n", (waitall) ? "all" : "", getpid(), getppid(), active_children, available);
+        fflush(stdout);
+        int i,j;
+        for (i=0; i<active_children; i++) {
+            pid_t child = child_processes[i];
+            pid_t ch = waitpid(child, NULL, ((waitall) ? 0 : WNOHANG));
+            if (ch == child) {
+                // this has terminated, remove it from the list
+                for (j=i; j<active_children-1; j++) {
+                    child_processes[j] = child_processes[j+1];
+                }
+                active_children--;
+                i--;
+            }
+        }
+        sem_getvalue(global_children, &available);
+        //printf("grim reaper %s for pid %d with %d children, %d available\n", (waitall) ? "all" : "", getpid(), active_children, available);
+        fflush(stdout);
+    }
+#endif
+#endif
+    return available;
+}
+
+
+pid_t try_fork(char *folder)
 {
 #ifdef HAVE_FORK
 #ifdef HAVE_SEMAPHORE_H
-    printf("grim reaper %s for pid %d (parent %d) with %d children\n", (waitall) ? "all" : "", getpid(), getppid(), active_children);
-    fflush(stdout);
-    int i,j;
-    for (i=0; i<active_children; i++) {
-        pid_t child = child_processes[i];
-        pid_t ch = waitpid(child, NULL, ((waitall) ? 0 : WNOHANG));
-        if (ch == child) {
-            // this has terminated, remove it from the list
-            for (j=i; j<active_children-1; j++) {
-                child_processes[j] = child_processes[j+1];
-            }
-            active_children--;
-            i--;
-        }
-    }
-    printf("grim reaper %s for pid %d with %d children\n", (waitall) ? "all" : "", getpid(), active_children);
-    fflush(stdout);
-#endif
-#endif
-}
-
-
-pid_t try_fork()
-{
-#ifdef HAVE_FORK
-#ifdef HAVE_SEMAPHORE_H
-    int available;
-    grim_reaper(0);
-    sem_getvalue(&global_children, &available);
+    int available = grim_reaper(0);
     if (available) {
-        sem_wait(&global_children);
+        sem_wait(global_children);
         pid_t child = fork();
         if (child < 0) {
             // fork failed, pretend it worked and we are the child
             return 0;
         }
         else if (child == 0) {
-            pid_t me = getpid();
-            printf("forked child pid %d \n", me);
-            fflush(stdout);
             // fork worked, and we are the child, reinitialize *our* list of children
             active_children = 0;
             memset(child_processes, 0, sizeof(pid_t) * max_children);
+            pst_reopen(&pstfile);   // close and reopen the pst file to get an independent file position pointer
         }
         else {
             // fork worked, and we are the parent, record this child that we need to wait for
+            pid_t me = getpid();
+            //printf("parent %d forked child pid %d to process folder %s\n", me, child, folder);
+            fflush(stdout);
             child_processes[active_children++] = child;
         }
         return child;
@@ -232,7 +240,7 @@
             if (d_ptr->child && (deleted_mode == DMODE_INCLUDE || strcasecmp(item->file_as.str, "Deleted Items"))) {
                 //if this is a non-empty folder other than deleted items, we want to recurse into it
                 pid_t parent = getpid();
-                pid_t child = try_fork();
+                pid_t child = try_fork(item->file_as.str);
                 if (child == 0) {
                     // we are the child process, or the original parent if no children were available
                     pid_t me = getpid();
@@ -243,7 +251,7 @@
                         // we really were a child, forked for the sole purpose of processing this folder
                         // free my child count slot before really exiting, since
                         // all I am doing here is waiting for my children to exit
-                        sem_post(&global_children);
+                        sem_post(global_children);
                         grim_reaper(1); // wait for all my child processes to exit
                         exit(0);        // really exit
                     }
@@ -351,7 +359,7 @@
     }
 
     // command-line option handling
-    while ((c = getopt(argc, argv, "bc:Dd:hko:qrSMVw"))!= -1) {
+    while ((c = getopt(argc, argv, "bc:Dd:hj:kMo:qrSVw"))!= -1) {
         switch (c) {
         case 'b':
             save_rtf_body = 0;
@@ -380,9 +388,9 @@
             usage();
             exit(0);
             break;
-        case 'V':
-            version();
-            exit(0);
+        case 'j':
+            max_children = atoi(optarg);
+            max_child_specified = 1;
             break;
         case 'k':
             mode = MODE_KMAIL;
@@ -404,6 +412,10 @@
             mode = MODE_SEPARATE;
             mode_MH = 0;
             break;
+        case 'V':
+            version();
+            exit(0);
+            break;
         case 'w':
             overwrite = 1;
             break;
@@ -443,8 +455,6 @@
         DIE(("main: Cannot change to output dir %s: %s\n", output_dir, strerror(x)));
     }
 
-    if (output_mode != OUTPUT_QUIET) printf("About to start processing first record...\n");
-
     d_ptr = pstfile.d_head; // first record is main record
     item  = pst_parse_item(&pstfile, d_ptr, NULL);
     if (!item || !item->message_store) {
@@ -474,16 +484,38 @@
         DIE(("Top of folders record not found. Cannot continue\n"));
     }
 
-    max_children = (d_log) ? 0 : 10;
+#ifdef _SC_NPROCESSORS_ONLN
+    number_processors =  sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+    max_children    = (d_log) ? 0 : (!max_child_specified) ? number_processors * 4 : max_children;
+    active_children = 0;
     child_processes = (pid_t *)pst_malloc(sizeof(pid_t) * max_children);
-    active_children = 0;
     memset(child_processes, 0, sizeof(pid_t) * max_children);
+
 #ifdef HAVE_SEMAPHORE_H
-    sem_init(&global_children, 1, max_children);
+    if (max_children) {
+        shared_memory_id = shmget(IPC_PRIVATE, sizeof(sem_t), 0777);
+        //printf("shared memory id %d\n", shared_memory_id);
+        if (shared_memory_id >= 0) {
+            global_children = (sem_t *)shmat(shared_memory_id, NULL, 0);
+            //printf("shared memory pointer %p\n", (void*)global_children);
+            if (global_children == (sem_t *)-1) global_children = NULL;
+            if (global_children) sem_init(global_children, 1, max_children);
+            shmctl(shared_memory_id, IPC_RMID, NULL);
+        }
+    }
 #endif
+
     process(item, d_ptr->child);    // do the children of TOPF
     grim_reaper(1); // wait for all child processes
 
+#ifdef HAVE_SEMAPHORE_H
+    if (global_children) {
+        sem_destroy(global_children);
+        shmdt(global_children);
+    }
+#endif
+
     pst_freeItem(item);
     pst_close(&pstfile);
     DEBUG_RET();
@@ -531,7 +563,6 @@
     printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name);
     printf("OPTIONS:\n");
     printf("\t-V\t- Version. Display program version\n");
-    printf("\t-C\t- Decrypt (compressible encryption) the entire file and output on stdout (not typically useful)\n");
     printf("\t-D\t- Include deleted items in output\n");
     printf("\t-M\t- MH. Write emails in the MH format\n");
     printf("\t-S\t- Separate. Write emails in the separate format\n");
@@ -539,11 +570,14 @@
     printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n");
     printf("\t-d <filename> \t- Debug to file. This is a binary log. Use readpstlog to print it\n");
     printf("\t-h\t- Help. This screen\n");
+    printf("\t-j <integer>\t- Number of parallel jobs to run\n");
     printf("\t-k\t- KMail. Output in kmail format\n");
     printf("\t-o <dirname>\t- Output directory to write files to. CWD is changed *after* opening pst file\n");
     printf("\t-q\t- Quiet. Only print error messages\n");
     printf("\t-r\t- Recursive. Output in a recursive format\n");
     printf("\t-w\t- Overwrite any output mbox files\n");
+    printf("\n");
+    printf("Only one of -k -M -r -S should be specified\n");
     DEBUG_RET();
 }
 
@@ -713,7 +747,7 @@
         struct dirent *dirent = NULL;
         struct stat filestat;
         if (!(sdir = opendir("./"))) {
-            WARN(("mk_separate_dir: Cannot open dir \"%s\" for deletion of old contents\n", "./"));
+            DEBUG_WARN(("mk_separate_dir: Cannot open dir \"%s\" for deletion of old contents\n", "./"));
         } else {
             while ((dirent = readdir(sdir))) {
                 if (lstat(dirent->d_name, &filestat) != -1)
@@ -751,7 +785,7 @@
     DEBUG_ENT("mk_separate_file");
     DEBUG_MAIN(("opening next file to save email\n"));
     if (f->item_count > 999999999) { // bigger than nine 9's
-        DIE(("mk_separate_file: The number of emails in this folder has become too high to handle"));
+        DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n"));
     }
     sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset);
     if (f->output) fclose(f->output);
@@ -848,7 +882,7 @@
     }
     DEBUG_EMAIL(("Saving attachment to %s\n", temp));
     if (!(fp = fopen(temp, "w"))) {
-        WARN(("write_separate_attachment: Cannot open attachment save file \"%s\"\n", temp));
+        DEBUG_WARN(("write_separate_attachment: Cannot open attachment save file \"%s\"\n", temp));
     } else {
         (void)pst_attach_to_file(pst, attach, fp);
         fclose(fp);
@@ -1681,7 +1715,8 @@
             pst_recurrence *rdata = pst_convert_recurrence(appointment);
             fprintf(f_output, "RRULE:FREQ=%s", rules[rdata->type]);
             if (rdata->count)       fprintf(f_output, ";COUNT=%u",      rdata->count);
-            if (rdata->interval)    fprintf(f_output, ";INTERVAL=%u",   rdata->interval);
+            if ((rdata->interval != 1) &&
+                (rdata->interval))  fprintf(f_output, ";INTERVAL=%u",   rdata->interval);
             if (rdata->dayofmonth)  fprintf(f_output, ";BYMONTHDAY=%d", rdata->dayofmonth);
             if (rdata->monthofyear) fprintf(f_output, ";BYMONTH=%d",    rdata->monthofyear);
             if (rdata->position)    fprintf(f_output, ";BYSETPOS=%d",   rdata->position);
@@ -1694,11 +1729,12 @@
                     int bit = 1 << i;
                     if (bit & rdata->bydaymask) {
                         char temp[40];
-                        snprintf(temp, sizeof(temp), "%s%s%s", byday, (empty) ? "BYDAY=" : ";", days[i]);
+                        snprintf(temp, sizeof(temp), "%s%s%s", byday, (empty) ? ";BYDAY=" : ";", days[i]);
                         strcpy(byday, temp);
                         empty = 0;
                     }
                 }
+                fprintf(f_output, "%s", byday);
             }
             fprintf(f_output, "\n");
             pst_free_recurrence(rdata);
@@ -1816,7 +1852,7 @@
         fclose(f->output);
         stat(f->name, &st);
         if (!st.st_size) {
-            WARN(("removing empty output file %s ", f->name));
+            DEBUG_WARN(("removing empty output file %s\n", f->name));
             remove(f->name);
         }
     }
--- a/xml/libpst.in	Wed May 13 20:06:53 2009 -0700
+++ b/xml/libpst.in	Sat May 16 10:32:26 2009 -0700
@@ -35,7 +35,7 @@
 
     <refentry id="readpst.1">
         <refentryinfo>
-            <date>2009-04-15</date>
+            <date>2009-05-16</date>
         </refentryinfo>
 
         <refmeta>
@@ -61,6 +61,7 @@
                 <arg><option>-c <replaceable class="parameter">format</replaceable></option></arg>
                 <arg><option>-d <replaceable class="parameter">debug-file</replaceable></option></arg>
                 <arg><option>-h</option></arg>
+                <arg><option>-j <replaceable class="parameter">jobs</replaceable</option></arg>
                 <arg><option>-k</option></arg>
                 <arg><option>-o <replaceable class="parameter">output-directory</replaceable></option></arg>
                 <arg><option>-q</option></arg>
@@ -140,6 +141,13 @@
                     </para></listitem>
                 </varlistentry>
                 <varlistentry>
+                    <term>-j <replaceable class="parameter">jobs</replaceable></term>
+                    <listitem><para>
+                        Specifies the maximum number of parallel jobs. Specify 0 to suppress
+                        running parallel jobs.
+                    </para></listitem>
+                </varlistentry>
+                <varlistentry>
                     <term>-k</term>
                     <listitem><para>
                         Changes the output format to KMail.
@@ -228,7 +236,7 @@
 
     <refentry id="lspst.1">
         <refentryinfo>
-            <date>2009-04-15</date>
+            <date>2009-05-16</date>
         </refentryinfo>
 
         <refmeta>
@@ -331,7 +339,7 @@
 
     <refentry id="readpstlog.1">
         <refentryinfo>
-            <date>2009-04-15</date>
+            <date>2009-05-16</date>
         </refentryinfo>
 
         <refmeta>
@@ -506,7 +514,7 @@
 
     <refentry id="pst2ldif.1">
         <refentryinfo>
-            <date>2009-04-15</date>
+            <date>2009-05-16</date>
         </refentryinfo>
 
         <refmeta>
@@ -675,7 +683,7 @@
 
     <refentry id="pst2dii.1">
         <refentryinfo>
-            <date>2009-04-15</date>
+            <date>2009-05-16</date>
         </refentryinfo>
 
         <refmeta>
@@ -809,7 +817,7 @@
 
     <refentry id="pst.5">
         <refentryinfo>
-            <date>2009-04-15</date>
+            <date>2009-05-16</date>
         </refentryinfo>
 
         <refmeta>