Mercurial > libpst
changeset 201:3850a3b11745
fixes for parallel readpst
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Sat, 16 May 2009 10:32:26 -0700 (2009-05-16) |
parents | d360f96f71f6 |
children | 2f38c4ce606f |
files | TODO configure.in python/python-libpst.cpp regression/regression-tests.bash src/define.h src/libpst.c src/libpst.h src/readpst.c xml/libpst.in |
diffstat | 9 files changed, 205 insertions(+), 120 deletions(-) [+] |
line wrap: on
line diff
--- a/TODO Wed May 13 20:06:53 2009 -0700 +++ b/TODO Sat May 16 10:32:26 2009 -0700 @@ -5,5 +5,8 @@ pst2diii needs header and mime type updates from readpst. At the next soname bump (to libpst.so.5) we should + move some of readpst into the shared library, in particular write_normal_email() remove readpstlog, and produce ascii debug log files - move some of readpst into the shared library, in particular write_normal_email() + add debug FILE* into the pst_file structure, so all debug printing + will be passed the pst_file struct. Also add a semaphore in there + to synchronize debug printing.
--- a/configure.in Wed May 13 20:06:53 2009 -0700 +++ b/configure.in Sat May 16 10:32:26 2009 -0700 @@ -133,7 +133,7 @@ ) AC_HEADER_DIRENT AC_HEADER_STDC -AC_CHECK_HEADERS([ctype.h dirent.h errno.h fcntl.h inttypes.h limits.h regex.h semaphore.h signal.h stdarg.h stdint.h stdio.h stdlib.h string.h sys/param.h sys/stat.h sys/types.h time.h unistd.h wchar.h]) +AC_CHECK_HEADERS([ctype.h dirent.h errno.h fcntl.h inttypes.h limits.h regex.h semaphore.h signal.h stdarg.h stdint.h stdio.h stdlib.h string.h sys/param.h sys/shm.h sys/stat.h sys/types.h time.h unistd.h wchar.h]) AC_SEARCH_LIBS([sem_init],rt) @@ -161,7 +161,7 @@ fi AC_FUNC_STRFTIME AC_FUNC_VPRINTF -AC_CHECK_FUNCS([memchr memmove memset regcomp strcasecmp strncasecmp strchr strdup strerror strpbrk strrchr strstr strtol]) +AC_CHECK_FUNCS([chdir getcwd memchr memmove memset regcomp strcasecmp strncasecmp strchr strdup strerror strpbrk strrchr strstr strtol]) AM_ICONV if test "$am_cv_func_iconv" != "yes"; then AC_MSG_ERROR([libpst requires iconv which is missing]) @@ -285,11 +285,13 @@ enable_shared="yes" # check for boost AX_PYTHON - AX_BOOST_PYTHON if test "$ax_python_bin" = "no"; then AC_MSG_ERROR(python binary not found) fi - + AX_BOOST_PYTHON + if test "$ac_cv_boost_python" = "no"; then + AC_MSG_ERROR(boost python not found) + fi AC_SUBST(PYTHON_VERSION, [$ax_python_bin]) fi
--- a/python/python-libpst.cpp Wed May 13 20:06:53 2009 -0700 +++ b/python/python-libpst.cpp Sat May 16 10:32:26 2009 -0700 @@ -588,6 +588,8 @@ ; class_<pst_file>("pst_file") + .def_readonly("cwd", &pst_file::cwd) + .def_readonly("fname", &pst_file::fname) .add_property("i_head", make_getter(&pst_file::i_head, return_value_policy<reference_existing_object>())) .add_property("i_tail", make_getter(&pst_file::i_tail, return_value_policy<reference_existing_object>())) .add_property("d_head", make_getter(&pst_file::d_head, return_value_policy<reference_existing_object>()))
--- a/regression/regression-tests.bash Wed May 13 20:06:53 2009 -0700 +++ b/regression/regression-tests.bash Sat May 16 10:32:26 2009 -0700 @@ -54,8 +54,8 @@ # ../src/readpst -cv -o output$n $fn >$ba.err 2>&1 # readpst -cv -o output$n -d dumper $fn >$ba.err 2>&1 $val ../src/readpst -r -D -cv -o output$n $fn - #$val ../src/readpst -r -D -cv -o output$n -d dumper $fn >$ba.err 2>&1 - # ../src/readpstlog -f I dumper >$ba.log + # $val ../src/readpst -r -D -cv -o output$n -d dumper $fn >$ba.err 2>&1 + # ../src/readpstlog -f I dumper >$ba.log #../src/getidblock -d -p $fn 0 >$ba.fulldump #../src/readpstlog -f I getidblock.log >$ba.fulldump.log @@ -117,15 +117,16 @@ #dopst 13 test-text.pst #dopst 14 joe.romanowski.pst #dopst 15 hourig1.pst - ##dopst 16 hourig2.pst - dopst 17 hourig3.pst + dopst 16 hourig2.pst + #dopst 17 hourig3.pst #dopst 18 test-mac.pst - ##dopst 19 harris.pst + #dopst 19 harris.pst #dopst 20 spam.pst #dopst 21 rendgen.pst # single email appointment - dopst 22 rendgen2.pst # email appointment with no termination date + #dopst 22 rendgen2.pst # email appointment with no termination date #dopst 23 rendgen3.pst # mime signed email - dopst 24 rendgen4.pst # appointment test cases + #dopst 24 rendgen4.pst # appointment test cases + #dopst 25 rendgen5.pst # appointment test cases fi grep 'lost:' *err | grep -v 'lost: 0 '
--- a/src/define.h Wed May 13 20:06:53 2009 -0700 +++ b/src/define.h Sat May 16 10:32:26 2009 -0700 @@ -136,6 +136,10 @@ #include <sys/types.h> #endif +#ifdef HAVE_SYS_SHM_H + #include <sys/shm.h> +#endif + #ifdef HAVE_SYS_WAIT_H #include <sys/wait.h> #endif
--- a/src/libpst.c Wed May 13 20:06:53 2009 -0700 +++ b/src/libpst.c Sat May 16 10:32:26 2009 -0700 @@ -253,7 +253,7 @@ }; static int pst_build_desc_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val); -static pst_id2_tree* pst_build_id2(pst_file *pf, pst_index_ll* list); +static pst_id2_tree* pst_build_id2(pst_file *pf, pst_index_ll* list); static int pst_build_id_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val); static int pst_chr_count(char *str, char x); static size_t pst_ff_compile_ID(pst_file *pf, uint64_t i_id, pst_holder *h, size_t size); @@ -268,8 +268,8 @@ static size_t pst_getAtPos(pst_file *pf, int64_t pos, void* buf, size_t size); static int pst_getBlockOffsetPointer(pst_file *pf, pst_id2_tree *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p); static int pst_getBlockOffset(char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p); -static pst_id2_tree* pst_getID2(pst_id2_tree * ptr, uint64_t id); -static pst_desc_tree* pst_getDptr(pst_file *pf, uint64_t d_id); +static pst_id2_tree* pst_getID2(pst_id2_tree * ptr, uint64_t id); +static pst_desc_tree* pst_getDptr(pst_file *pf, uint64_t d_id); static uint64_t pst_getIntAt(pst_file *pf, char *buf); static uint64_t pst_getIntAtPos(pst_file *pf, int64_t pos); static pst_mapi_object* pst_parse_block(pst_file *pf, uint64_t block_id, pst_id2_tree *i2_head); @@ -353,6 +353,20 @@ DEBUG_INFO(("Pointer1 is %#"PRIx64", back pointer2 is %#"PRIx64"\n", pf->index1, pf->index1_back)); DEBUG_RET(); + + pf->cwd = pst_malloc(PATH_MAX+1); + getcwd(pf->cwd, PATH_MAX+1); + pf->fname = strdup(name); + return 0; +} + + +int pst_reopen(pst_file *pf) { + char cwd[PATH_MAX]; + if (!getcwd(cwd, PATH_MAX)) return -1; + if (chdir(pf->cwd)) return -1; + if (!freopen(pf->fname, "rb", pf->fp)) return -1; + if (chdir(cwd)) return -1; return 0; } @@ -365,13 +379,14 @@ } if (fclose(pf->fp)) { DEBUG_WARN(("fclose returned non-zero value\n")); - DEBUG_RET(); - return -1; } + // free the paths + free(pf->cwd); + free(pf->fname); // we must free the id linklist and the desc tree - pst_free_id (pf->i_head); - pst_free_desc (pf->d_head); - pst_free_xattrib (pf->x_head); + pst_free_id(pf->i_head); + pst_free_desc(pf->d_head); + pst_free_xattrib(pf->x_head); DEBUG_RET(); return 0; } @@ -2490,6 +2505,8 @@ LIST_COPY_CSTR(item->ascii_type); if (pst_strincmp("IPF.Note", item->ascii_type, 8) == 0) item->type = PST_TYPE_NOTE; + if (pst_strincmp("IPF.Imap", item->ascii_type, 8) == 0) + item->type = PST_TYPE_NOTE; else if (pst_stricmp("IPF", item->ascii_type) == 0) item->type = PST_TYPE_NOTE; else if (pst_strincmp("IPF.Contact", item->ascii_type, 11) == 0) @@ -4335,6 +4352,7 @@ } if (i <= s) { r->termination = PST_LE_GET_UINT8(p+i) - 0x21; i += 4; } if (i+4 <= s) { r->count = PST_LE_GET_UINT32(p+i); i += 4; } + if (r->termination == 2) r->count = 0; switch (r->type) { case 0: // daily if (r->sub_type == 0) {
--- a/src/libpst.h Wed May 13 20:06:53 2009 -0700 +++ b/src/libpst.h Sat May 16 10:32:26 2009 -0700 @@ -850,6 +850,12 @@ typedef struct pst_file { + /** file pointer to opened PST file */ + FILE* fp; + /** original cwd when the file was opened */ + char* cwd; + /** original file name when the file was opened */ + char* fname; /** the head and tail of the linked list of index structures */ pst_index_ll *i_head, *i_tail; /** the head and tail of the top level of the descriptor tree */ @@ -872,8 +878,6 @@ uint64_t index2; /** back pointer value in the first b-tree node in the descriptor tree */ uint64_t index2_back; - /** file pointer to opened PST file */ - FILE * fp; /** size of the pst file */ uint64_t size; /** @li 0 PST_NO_ENCRYPT, none @@ -895,7 +899,14 @@ * @param name name of the file, suitable for fopen(). * @return 0 if ok, -1 if error */ -int pst_open(pst_file *pf, const char *name); +int pst_open(pst_file *pf, const char *name); + + +/** Reopen the pst file after a fork + * @param pf pointer to the pst_file structure setup by pst_open(). + * @return 0 if ok, -1 if error + */ +int pst_reopen(pst_file *pf); /** Load the index entries from the pst file. This loads both the @@ -903,20 +914,20 @@ * first call after pst_open(). * @param pf pointer to the pst_file structure setup by pst_open(). */ -int pst_load_index (pst_file *pf); +int pst_load_index (pst_file *pf); /** Load the extended attribute mapping table from the pst file. This * should normally be the second call after pst_open(). * @param pf pointer to the pst_file structure setup by pst_open(). */ -int pst_load_extended_attributes(pst_file *pf); +int pst_load_extended_attributes(pst_file *pf); /** Close a pst file. * @param pf pointer to the pst_file structure setup by pst_open(). */ -int pst_close(pst_file *pf); +int pst_close(pst_file *pf); /** Get the top of folders descriptor tree. This is the main descriptor tree @@ -924,7 +935,7 @@ * @param pf pointer to the pst_file structure setup by pst_open(). * @param root root item, which can be obtained by pst_parse_item(pf, pf->d.head, NULL). */ -pst_desc_tree* pst_getTopOfFolders(pst_file *pf, const pst_item *root); +pst_desc_tree* pst_getTopOfFolders(pst_file *pf, const pst_item *root); /** Assemble the binary attachment into a single buffer. @@ -933,7 +944,7 @@ * @return structure containing size of and pointer to the buffer. * the caller must free this buffer. */ -pst_binary pst_attach_to_mem(pst_file *pf, pst_item_attach *attach); +pst_binary pst_attach_to_mem(pst_file *pf, pst_item_attach *attach); /** Write a binary attachment to a file. @@ -941,7 +952,7 @@ * @param attach pointer to the attachment record * @param fp pointer to an open FILE. */ -size_t pst_attach_to_file(pst_file *pf, pst_item_attach *attach, FILE* fp); +size_t pst_attach_to_file(pst_file *pf, pst_item_attach *attach, FILE* fp); /** Write a binary attachment base64 encoded to a file. @@ -949,14 +960,14 @@ * @param attach pointer to the attachment record * @param fp pointer to an open FILE. */ -size_t pst_attach_to_file_base64(pst_file *pf, pst_item_attach *attach, FILE* fp); +size_t pst_attach_to_file_base64(pst_file *pf, pst_item_attach *attach, FILE* fp); /** Walk the descriptor tree. * @param d pointer to the current item in the descriptor tree. * @return pointer to the next item in the descriptor tree. */ -pst_desc_tree* pst_getNextDptr(pst_desc_tree* d); +pst_desc_tree* pst_getNextDptr(pst_desc_tree* d); /** Assemble a mapi object from a descriptor pointer. @@ -966,13 +977,13 @@ * attached rfc822 messages, in which case it is attach->id2_head. * @return pointer to the mapi object. Must be free'd by pst_freeItem(). */ -pst_item* pst_parse_item (pst_file *pf, pst_desc_tree *d_ptr, pst_id2_tree *m_head); +pst_item* pst_parse_item (pst_file *pf, pst_desc_tree *d_ptr, pst_id2_tree *m_head); /** Free the item returned by pst_parse_item(). * @param item pointer to item returned from pst_parse_item(). */ -void pst_freeItem(pst_item *item); +void pst_freeItem(pst_item *item); /** Lookup the i_id in the index linked list, and return a pointer to the element. @@ -980,7 +991,7 @@ * @param i_id key for the index linked list * @return pointer to the element, or NULL if not found. */ -pst_index_ll* pst_getID(pst_file* pf, uint64_t i_id); +pst_index_ll* pst_getID(pst_file* pf, uint64_t i_id); /** Decrypt a block of data from the pst file. @@ -993,7 +1004,7 @@ @li 2 PST_ENCRYPT, german enigma 3 rotor cipher with fixed key * @return 0 if ok, -1 if error (NULL buffer or unknown encryption type) */ -int pst_decrypt(uint64_t i_id, char *buf, size_t size, unsigned char type); +int pst_decrypt(uint64_t i_id, char *buf, size_t size, unsigned char type); /** Get an ID block from the file using pst_ff_getIDblock() and decrypt if necessary. @@ -1003,7 +1014,7 @@ * If this pointer is non-NULL, it will first be free()d. * @return Size of block read into memory */ -size_t pst_ff_getIDblock_dec(pst_file *pf, uint64_t i_id, char **buf); +size_t pst_ff_getIDblock_dec(pst_file *pf, uint64_t i_id, char **buf); /** Read a block of data from the file into memory. @@ -1013,7 +1024,7 @@ * If this pointer is non-NULL, it will first be free()d. * @return size of block read into memory */ -size_t pst_ff_getIDblock(pst_file *pf, uint64_t i_id, char** buf); +size_t pst_ff_getIDblock(pst_file *pf, uint64_t i_id, char** buf); /** fwrite with checking for null pointer. @@ -1023,7 +1034,7 @@ * @param stream output file * @return number of bytes written, zero if ptr==NULL */ -size_t pst_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream); +size_t pst_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream); /** Add any necessary escape characters for rfc2426 vcard format @@ -1033,7 +1044,7 @@ * to a different buffer containing the escaped string. In * either case, you don't need to free this returned pointer. */ -char* pst_rfc2426_escape(char *str); +char* pst_rfc2426_escape(char *str); /** Convert a FILETIME into rfc2425 date/time format 1953-10-15T23:10:00Z @@ -1043,7 +1054,7 @@ * @param[out] result pointer to output buffer, must be at least 30 bytes * @return time in rfc2425 format */ -char* pst_rfc2425_datetime_format(const FILETIME* ft, int buflen, char* result); +char* pst_rfc2425_datetime_format(const FILETIME* ft, int buflen, char* result); /** Convert a FILETIME into rfc2445 date/time format 19531015T231000Z @@ -1052,13 +1063,13 @@ * @param[out] result pointer to output buffer, must be at least 30 bytes * @return time in rfc2445 format */ -char* pst_rfc2445_datetime_format(const FILETIME* ft, int buflen, char* result); +char* pst_rfc2445_datetime_format(const FILETIME* ft, int buflen, char* result); /** Convert the current time rfc2445 date/time format 19531015T231000Z * @return time in rfc2445 format */ -char* pst_rfc2445_datetime_format_now(int buflen, char* result); +char* pst_rfc2445_datetime_format_now(int buflen, char* result); /** Get the default character set for this item. This is used to find @@ -1066,21 +1077,21 @@ * @param item pointer to the mapi item of interest * @return default character set as a string useable by iconv() */ -const char* pst_default_charset(pst_item *item); +const char* pst_default_charset(pst_item *item); /** Convert str to utf8 if possible; null strings are preserved. * @param item pointer to the containing mapi item * @param str pointer to the mapi string of interest */ -void pst_convert_utf8_null(pst_item *item, pst_string *str); +void pst_convert_utf8_null(pst_item *item, pst_string *str); /** Convert str to utf8 if possible; null strings are converted into empty strings. * @param item pointer to the containing mapi item * @param str pointer to the mapi string of interest */ -void pst_convert_utf8(pst_item *item, pst_string *str); +void pst_convert_utf8(pst_item *item, pst_string *str); /** Decode raw recurrence data into a better structure. @@ -1093,7 +1104,7 @@ /** Free a recurrence structure. * @param r input pointer to be freed */ -void pst_free_recurrence(pst_recurrence* r); +void pst_free_recurrence(pst_recurrence* r);
--- a/src/readpst.c Wed May 13 20:06:53 2009 -0700 +++ b/src/readpst.c Sat May 16 10:32:26 2009 -0700 @@ -26,8 +26,8 @@ int32_t type; }; -void grim_reaper(); -pid_t try_fork(); +int grim_reaper(); +pid_t try_fork(char* folder); void process(pst_item *outeritem, pst_desc_tree *d_ptr); void write_email_body(FILE *f, char *body); void removeCR(char *c); @@ -110,76 +110,84 @@ #define RTF_ATTACH_TYPE "application/rtf" // global settings -int mode = MODE_NORMAL; -int mode_MH = 0; // a submode of MODE_SEPARATE -int output_mode = OUTPUT_NORMAL; -int contact_mode = CMODE_VCARD; -int deleted_mode = DMODE_EXCLUDE; -int contact_mode_specified = 0; -int overwrite = 0; -int save_rtf_body = 1; -pst_file pstfile; -regex_t meta_charset_pattern; +int mode = MODE_NORMAL; +int mode_MH = 0; // a submode of MODE_SEPARATE +int output_mode = OUTPUT_NORMAL; +int contact_mode = CMODE_VCARD; +int deleted_mode = DMODE_EXCLUDE; +int contact_mode_specified = 0; +int overwrite = 0; +int save_rtf_body = 1; +pst_file pstfile; +regex_t meta_charset_pattern; -int active_children; // number of children of this process, cannot be larger than max_children -int max_children; // setup by main(), and at the start of new child process -pid_t *child_processes; // setup by main(), and at the start of new child process +int number_processors = 1; // number of cpus we have +int max_children = 0; // based on number of cpus and command line args +int max_child_specified = 0;// have command line arg -j +int active_children; // number of children of this process, cannot be larger than max_children +pid_t* child_processes; // setup by main(), and at the start of new child process #ifdef HAVE_SEMAPHORE_H -sem_t global_children; +int shared_memory_id; +sem_t* global_children = NULL; #endif -void grim_reaper(int waitall) +int grim_reaper(int waitall) +{ + int available = 0; +#ifdef HAVE_FORK +#ifdef HAVE_SEMAPHORE_H + if (global_children) { + sem_getvalue(global_children, &available); + //printf("grim reaper %s for pid %d (parent %d) with %d children, %d available\n", (waitall) ? "all" : "", getpid(), getppid(), active_children, available); + fflush(stdout); + int i,j; + for (i=0; i<active_children; i++) { + pid_t child = child_processes[i]; + pid_t ch = waitpid(child, NULL, ((waitall) ? 0 : WNOHANG)); + if (ch == child) { + // this has terminated, remove it from the list + for (j=i; j<active_children-1; j++) { + child_processes[j] = child_processes[j+1]; + } + active_children--; + i--; + } + } + sem_getvalue(global_children, &available); + //printf("grim reaper %s for pid %d with %d children, %d available\n", (waitall) ? "all" : "", getpid(), active_children, available); + fflush(stdout); + } +#endif +#endif + return available; +} + + +pid_t try_fork(char *folder) { #ifdef HAVE_FORK #ifdef HAVE_SEMAPHORE_H - printf("grim reaper %s for pid %d (parent %d) with %d children\n", (waitall) ? "all" : "", getpid(), getppid(), active_children); - fflush(stdout); - int i,j; - for (i=0; i<active_children; i++) { - pid_t child = child_processes[i]; - pid_t ch = waitpid(child, NULL, ((waitall) ? 0 : WNOHANG)); - if (ch == child) { - // this has terminated, remove it from the list - for (j=i; j<active_children-1; j++) { - child_processes[j] = child_processes[j+1]; - } - active_children--; - i--; - } - } - printf("grim reaper %s for pid %d with %d children\n", (waitall) ? "all" : "", getpid(), active_children); - fflush(stdout); -#endif -#endif -} - - -pid_t try_fork() -{ -#ifdef HAVE_FORK -#ifdef HAVE_SEMAPHORE_H - int available; - grim_reaper(0); - sem_getvalue(&global_children, &available); + int available = grim_reaper(0); if (available) { - sem_wait(&global_children); + sem_wait(global_children); pid_t child = fork(); if (child < 0) { // fork failed, pretend it worked and we are the child return 0; } else if (child == 0) { - pid_t me = getpid(); - printf("forked child pid %d \n", me); - fflush(stdout); // fork worked, and we are the child, reinitialize *our* list of children active_children = 0; memset(child_processes, 0, sizeof(pid_t) * max_children); + pst_reopen(&pstfile); // close and reopen the pst file to get an independent file position pointer } else { // fork worked, and we are the parent, record this child that we need to wait for + pid_t me = getpid(); + //printf("parent %d forked child pid %d to process folder %s\n", me, child, folder); + fflush(stdout); child_processes[active_children++] = child; } return child; @@ -232,7 +240,7 @@ if (d_ptr->child && (deleted_mode == DMODE_INCLUDE || strcasecmp(item->file_as.str, "Deleted Items"))) { //if this is a non-empty folder other than deleted items, we want to recurse into it pid_t parent = getpid(); - pid_t child = try_fork(); + pid_t child = try_fork(item->file_as.str); if (child == 0) { // we are the child process, or the original parent if no children were available pid_t me = getpid(); @@ -243,7 +251,7 @@ // we really were a child, forked for the sole purpose of processing this folder // free my child count slot before really exiting, since // all I am doing here is waiting for my children to exit - sem_post(&global_children); + sem_post(global_children); grim_reaper(1); // wait for all my child processes to exit exit(0); // really exit } @@ -351,7 +359,7 @@ } // command-line option handling - while ((c = getopt(argc, argv, "bc:Dd:hko:qrSMVw"))!= -1) { + while ((c = getopt(argc, argv, "bc:Dd:hj:kMo:qrSVw"))!= -1) { switch (c) { case 'b': save_rtf_body = 0; @@ -380,9 +388,9 @@ usage(); exit(0); break; - case 'V': - version(); - exit(0); + case 'j': + max_children = atoi(optarg); + max_child_specified = 1; break; case 'k': mode = MODE_KMAIL; @@ -404,6 +412,10 @@ mode = MODE_SEPARATE; mode_MH = 0; break; + case 'V': + version(); + exit(0); + break; case 'w': overwrite = 1; break; @@ -443,8 +455,6 @@ DIE(("main: Cannot change to output dir %s: %s\n", output_dir, strerror(x))); } - if (output_mode != OUTPUT_QUIET) printf("About to start processing first record...\n"); - d_ptr = pstfile.d_head; // first record is main record item = pst_parse_item(&pstfile, d_ptr, NULL); if (!item || !item->message_store) { @@ -474,16 +484,38 @@ DIE(("Top of folders record not found. Cannot continue\n")); } - max_children = (d_log) ? 0 : 10; +#ifdef _SC_NPROCESSORS_ONLN + number_processors = sysconf(_SC_NPROCESSORS_ONLN); +#endif + max_children = (d_log) ? 0 : (!max_child_specified) ? number_processors * 4 : max_children; + active_children = 0; child_processes = (pid_t *)pst_malloc(sizeof(pid_t) * max_children); - active_children = 0; memset(child_processes, 0, sizeof(pid_t) * max_children); + #ifdef HAVE_SEMAPHORE_H - sem_init(&global_children, 1, max_children); + if (max_children) { + shared_memory_id = shmget(IPC_PRIVATE, sizeof(sem_t), 0777); + //printf("shared memory id %d\n", shared_memory_id); + if (shared_memory_id >= 0) { + global_children = (sem_t *)shmat(shared_memory_id, NULL, 0); + //printf("shared memory pointer %p\n", (void*)global_children); + if (global_children == (sem_t *)-1) global_children = NULL; + if (global_children) sem_init(global_children, 1, max_children); + shmctl(shared_memory_id, IPC_RMID, NULL); + } + } #endif + process(item, d_ptr->child); // do the children of TOPF grim_reaper(1); // wait for all child processes +#ifdef HAVE_SEMAPHORE_H + if (global_children) { + sem_destroy(global_children); + shmdt(global_children); + } +#endif + pst_freeItem(item); pst_close(&pstfile); DEBUG_RET(); @@ -531,7 +563,6 @@ printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); printf("OPTIONS:\n"); printf("\t-V\t- Version. Display program version\n"); - printf("\t-C\t- Decrypt (compressible encryption) the entire file and output on stdout (not typically useful)\n"); printf("\t-D\t- Include deleted items in output\n"); printf("\t-M\t- MH. Write emails in the MH format\n"); printf("\t-S\t- Separate. Write emails in the separate format\n"); @@ -539,11 +570,14 @@ printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); printf("\t-d <filename> \t- Debug to file. This is a binary log. Use readpstlog to print it\n"); printf("\t-h\t- Help. This screen\n"); + printf("\t-j <integer>\t- Number of parallel jobs to run\n"); printf("\t-k\t- KMail. Output in kmail format\n"); printf("\t-o <dirname>\t- Output directory to write files to. CWD is changed *after* opening pst file\n"); printf("\t-q\t- Quiet. Only print error messages\n"); printf("\t-r\t- Recursive. Output in a recursive format\n"); printf("\t-w\t- Overwrite any output mbox files\n"); + printf("\n"); + printf("Only one of -k -M -r -S should be specified\n"); DEBUG_RET(); } @@ -713,7 +747,7 @@ struct dirent *dirent = NULL; struct stat filestat; if (!(sdir = opendir("./"))) { - WARN(("mk_separate_dir: Cannot open dir \"%s\" for deletion of old contents\n", "./")); + DEBUG_WARN(("mk_separate_dir: Cannot open dir \"%s\" for deletion of old contents\n", "./")); } else { while ((dirent = readdir(sdir))) { if (lstat(dirent->d_name, &filestat) != -1) @@ -751,7 +785,7 @@ DEBUG_ENT("mk_separate_file"); DEBUG_MAIN(("opening next file to save email\n")); if (f->item_count > 999999999) { // bigger than nine 9's - DIE(("mk_separate_file: The number of emails in this folder has become too high to handle")); + DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n")); } sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset); if (f->output) fclose(f->output); @@ -848,7 +882,7 @@ } DEBUG_EMAIL(("Saving attachment to %s\n", temp)); if (!(fp = fopen(temp, "w"))) { - WARN(("write_separate_attachment: Cannot open attachment save file \"%s\"\n", temp)); + DEBUG_WARN(("write_separate_attachment: Cannot open attachment save file \"%s\"\n", temp)); } else { (void)pst_attach_to_file(pst, attach, fp); fclose(fp); @@ -1681,7 +1715,8 @@ pst_recurrence *rdata = pst_convert_recurrence(appointment); fprintf(f_output, "RRULE:FREQ=%s", rules[rdata->type]); if (rdata->count) fprintf(f_output, ";COUNT=%u", rdata->count); - if (rdata->interval) fprintf(f_output, ";INTERVAL=%u", rdata->interval); + if ((rdata->interval != 1) && + (rdata->interval)) fprintf(f_output, ";INTERVAL=%u", rdata->interval); if (rdata->dayofmonth) fprintf(f_output, ";BYMONTHDAY=%d", rdata->dayofmonth); if (rdata->monthofyear) fprintf(f_output, ";BYMONTH=%d", rdata->monthofyear); if (rdata->position) fprintf(f_output, ";BYSETPOS=%d", rdata->position); @@ -1694,11 +1729,12 @@ int bit = 1 << i; if (bit & rdata->bydaymask) { char temp[40]; - snprintf(temp, sizeof(temp), "%s%s%s", byday, (empty) ? "BYDAY=" : ";", days[i]); + snprintf(temp, sizeof(temp), "%s%s%s", byday, (empty) ? ";BYDAY=" : ";", days[i]); strcpy(byday, temp); empty = 0; } } + fprintf(f_output, "%s", byday); } fprintf(f_output, "\n"); pst_free_recurrence(rdata); @@ -1816,7 +1852,7 @@ fclose(f->output); stat(f->name, &st); if (!st.st_size) { - WARN(("removing empty output file %s ", f->name)); + DEBUG_WARN(("removing empty output file %s\n", f->name)); remove(f->name); } }
--- a/xml/libpst.in Wed May 13 20:06:53 2009 -0700 +++ b/xml/libpst.in Sat May 16 10:32:26 2009 -0700 @@ -35,7 +35,7 @@ <refentry id="readpst.1"> <refentryinfo> - <date>2009-04-15</date> + <date>2009-05-16</date> </refentryinfo> <refmeta> @@ -61,6 +61,7 @@ <arg><option>-c <replaceable class="parameter">format</replaceable></option></arg> <arg><option>-d <replaceable class="parameter">debug-file</replaceable></option></arg> <arg><option>-h</option></arg> + <arg><option>-j <replaceable class="parameter">jobs</replaceable</option></arg> <arg><option>-k</option></arg> <arg><option>-o <replaceable class="parameter">output-directory</replaceable></option></arg> <arg><option>-q</option></arg> @@ -140,6 +141,13 @@ </para></listitem> </varlistentry> <varlistentry> + <term>-j <replaceable class="parameter">jobs</replaceable></term> + <listitem><para> + Specifies the maximum number of parallel jobs. Specify 0 to suppress + running parallel jobs. + </para></listitem> + </varlistentry> + <varlistentry> <term>-k</term> <listitem><para> Changes the output format to KMail. @@ -228,7 +236,7 @@ <refentry id="lspst.1"> <refentryinfo> - <date>2009-04-15</date> + <date>2009-05-16</date> </refentryinfo> <refmeta> @@ -331,7 +339,7 @@ <refentry id="readpstlog.1"> <refentryinfo> - <date>2009-04-15</date> + <date>2009-05-16</date> </refentryinfo> <refmeta> @@ -506,7 +514,7 @@ <refentry id="pst2ldif.1"> <refentryinfo> - <date>2009-04-15</date> + <date>2009-05-16</date> </refentryinfo> <refmeta> @@ -675,7 +683,7 @@ <refentry id="pst2dii.1"> <refentryinfo> - <date>2009-04-15</date> + <date>2009-05-16</date> </refentryinfo> <refmeta> @@ -809,7 +817,7 @@ <refentry id="pst.5"> <refentryinfo> - <date>2009-04-15</date> + <date>2009-05-16</date> </refentryinfo> <refmeta>