# HG changeset patch # User Carl Byington # Date 1252950999 25200 # Node ID aa50c23a69356b4c611a0d61636438cf43dd540c # Parent 410b6422d65bf9a3ac3b6627238fde4e474100d1 patch from Lee Ayres to add file name extensions in separate mode; allow mixed items types in a folder in separate mode diff -r 410b6422d65b -r aa50c23a6935 AUTHORS --- a/AUTHORS Sun Sep 13 10:49:12 2009 -0700 +++ b/AUTHORS Mon Sep 14 10:56:39 2009 -0700 @@ -30,6 +30,7 @@ bharder Chris White Roberto Polli + Lee Ayres Testing team: diff -r 410b6422d65b -r aa50c23a6935 ChangeLog --- a/ChangeLog Sun Sep 13 10:49:12 2009 -0700 +++ b/ChangeLog Mon Sep 14 10:56:39 2009 -0700 @@ -1,6 +1,8 @@ LibPST 0.6.44 (2009-xx-xx) =============================== * fix --help usage; readpstlog is gone, debug files are now ascii text. + * patch from Lee Ayres to add file name extensions in separate mode. + * allow mixed items types in a folder in separate mode. LibPST 0.6.43 (2009-09-12) =============================== diff -r 410b6422d65b -r aa50c23a6935 NEWS --- a/NEWS Sun Sep 13 10:49:12 2009 -0700 +++ b/NEWS Mon Sep 14 10:56:39 2009 -0700 @@ -1,3 +1,4 @@ +0.6.44 2009-09-14 patch from Lee Ayres to add file name extensions in separate mode 0.6.43 2009-09-12 patches from Justin Greer, Chris White, Roberto Polli; better rfc822 embedded message decoding 0.6.42 2009-09-03 patch from Fridrich Strba to build with DJGPP DOS cross-compiler 0.6.41 2009-06-23 fix ax_python detection - should not use locate command diff -r 410b6422d65b -r aa50c23a6935 configure.in --- a/configure.in Sun Sep 13 10:49:12 2009 -0700 +++ b/configure.in Mon Sep 14 10:56:39 2009 -0700 @@ -1,5 +1,5 @@ AC_PREREQ(2.59) -AC_INIT(libpst,0.6.43,carl@five-ten-sg.com) +AC_INIT(libpst,0.6.44,carl@five-ten-sg.com) AC_CONFIG_SRCDIR([src/libpst.c]) AC_CONFIG_HEADER([config.h]) AM_INIT_AUTOMAKE diff -r 410b6422d65b -r aa50c23a6935 regression/regression-tests.bash --- a/regression/regression-tests.bash Sun Sep 13 10:49:12 2009 -0700 +++ b/regression/regression-tests.bash Mon Sep 14 10:56:39 2009 -0700 @@ -20,11 +20,11 @@ { n="$1" fn="$2" - echo $fn ba=$(basename "$fn" .pst) size=$(stat -c %s $fn) rm -rf output$n if [ -z "$val" ] || [ $size -lt 10000000 ]; then + echo $fn mkdir output$n $val ../src/pst2dii -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output$n -O $ba.mydii -d $fn.log $fn >$fn.dii.err 2>&1 fi @@ -35,11 +35,11 @@ { n="$1" fn="$2" - echo $fn ba=$(basename "$fn" .pst) size=$(stat -c %s $fn) rm -rf output$n if [ -z "$val" ] || [ $size -lt 10000000 ]; then + echo $fn mkdir output$n $val ../src/pst2ldif -d $ba.ldif.log -b 'o=ams-cc.com, c=US' -c 'inetOrgPerson' $fn >$ba.ldif.err 2>&1 fi @@ -50,20 +50,30 @@ { n="$1" fn="$2" - echo $fn ba=$(basename "$fn" .pst) size=$(stat -c %s $fn) jobs="" [ -n "$val" ] && jobs="-j 0" rm -rf output$n if [ -z "$val" ] || [ $size -lt 10000000 ]; then + echo $fn mkdir output$n if [ "$regression" == "yes" ]; then $val ../src/readpst $jobs -te -r -cv -o output$n $fn >$ba.err 2>&1 else - #val ../src/readpst $jobs -r -D -cv -o output$n $fn - $val ../src/readpst $jobs -te -r -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 - #$val ../src/readpst $jobs -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 + ## only email and include deleted items, have a deleted items folder with multiple item types + #$val ../src/readpst $jobs -te -r -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 + + ## normal recursive dump + #$val ../src/readpst $jobs -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 + + # separate mode with filename extensions + $val ../src/readpst $jobs -r -e -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 + + ## separate mode where we decode all attachments to binary files + #$val ../src/readpst $jobs -r -S -D -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 + + ## testing idblock #../src/getidblock -p $fn 0 >$ba.fulldump fi fi diff -r 410b6422d65b -r aa50c23a6935 src/readpst.c --- a/src/readpst.c Sun Sep 13 10:49:12 2009 -0700 +++ b/src/readpst.c Mon Sep 14 10:56:39 2009 -0700 @@ -11,7 +11,7 @@ #define OUTPUT_TEMPLATE "%s" #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" #define KMAIL_INDEX ".%s.index" -#define SEP_MAIL_FILE_TEMPLATE "%i" +#define SEP_MAIL_FILE_TEMPLATE "%i%s" // max size of the c_time char*. It will store the date of the email #define C_TIME_SIZE 500 @@ -39,7 +39,7 @@ int close_recurse_dir(); char* mk_separate_dir(char *dir); int close_separate_dir(); -int mk_separate_file(struct file_ll *f); +int mk_separate_file(struct file_ll *f, char *extension); char* my_stristr(char *haystack, char *needle); void check_filename(char *fname); void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst); @@ -81,7 +81,7 @@ // separate mode creates the same directory structure as recurse. The emails are stored in // separate files, numbering from 1 upward. Attachments belonging to the emails are -// saved as email_no-filename (e.g. 1-samplefile.doc or 000001-Attachment2.zip) +// saved as email_no-filename (e.g. 1-samplefile.doc or 1-Attachment2.zip) #define MODE_SEPARATE 3 @@ -118,6 +118,7 @@ // global settings int mode = MODE_NORMAL; int mode_MH = 0; // a submode of MODE_SEPARATE +int mode_EX = 0; // a submode of MODE_SEPARATE int mode_thunder = 0; // a submode of MODE_RECURSE int output_mode = OUTPUT_NORMAL; int contact_mode = CMODE_VCARD; @@ -126,6 +127,7 @@ int contact_mode_specified = 0; int overwrite = 0; int save_rtf_body = 1; +int file_name_len = 10; // enough room for MODE_SPEARATE file name pst_file pstfile; regex_t meta_charset_pattern; @@ -281,13 +283,13 @@ } else { if (!ff.type) ff.type = item->type; - if (ff.type != PST_TYPE_CONTACT) { + if ((ff.type != PST_TYPE_CONTACT) && (mode != MODE_SEPARATE)) { ff.skip_count++; DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type)); } else { ff.item_count++; - if (mode == MODE_SEPARATE) mk_separate_file(&ff); + if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".vcf" : ""); if (contact_mode == CMODE_VCARD) { pst_convert_utf8_null(item, &item->comment); write_vcard(ff.output, item, item->contact, item->comment.str); @@ -308,14 +310,14 @@ } else { if (!ff.type) ff.type = item->type; - if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) { + if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT) && (mode != MODE_SEPARATE)) { ff.skip_count++; DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type)); } else { char *extra_mime_headers = NULL; ff.item_count++; - if (mode == MODE_SEPARATE) mk_separate_file(&ff); + if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".eml" : ""); write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers); } } @@ -328,13 +330,13 @@ } else { if (!ff.type) ff.type = item->type; - if (ff.type != PST_TYPE_JOURNAL) { + if ((ff.type != PST_TYPE_JOURNAL) && (mode != MODE_SEPARATE)) { ff.skip_count++; DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type)); } else { ff.item_count++; - if (mode == MODE_SEPARATE) mk_separate_file(&ff); + if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : ""); write_journal(ff.output, item); fprintf(ff.output, "\n"); } @@ -348,14 +350,14 @@ } else { if (!ff.type) ff.type = item->type; - if (ff.type != PST_TYPE_APPOINTMENT) { + if ((ff.type != PST_TYPE_APPOINTMENT) && (mode != MODE_SEPARATE)) { ff.skip_count++; DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type)); } else { ff.item_count++; - if (mode == MODE_SEPARATE) mk_separate_file(&ff); - write_appointment(ff.output, item, 0); + if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : ""); + write_schedule_part_data(ff.output, item, NULL, NULL); fprintf(ff.output, "\n"); } } @@ -396,7 +398,7 @@ } // command-line option handling - while ((c = getopt(argc, argv, "bc:Dd:hj:kMo:qrSt:uVw"))!= -1) { + while ((c = getopt(argc, argv, "bc:Dd:ehj:kMo:qrSt:uVw"))!= -1) { switch (c) { case 'b': save_rtf_body = 0; @@ -435,6 +437,13 @@ case 'M': mode = MODE_SEPARATE; mode_MH = 1; + mode_EX = 0; + break; + case 'e': + mode = MODE_SEPARATE; + mode_MH = 1; + mode_EX = 1; + file_name_len = 14; break; case 'o': output_dir = optarg; @@ -449,6 +458,7 @@ case 'S': mode = MODE_SEPARATE; mode_MH = 0; + mode_EX = 0; break; case 't': // email, appointment, contact, other @@ -642,11 +652,12 @@ printf("OPTIONS:\n"); printf("\t-V\t- Version. Display program version\n"); printf("\t-D\t- Include deleted items in output\n"); - printf("\t-M\t- MH. Write emails in the MH format\n"); + printf("\t-M\t- Write emails in the MH (rfc822) format\n"); printf("\t-S\t- Separate. Write emails in the separate format\n"); printf("\t-b\t- Don't save RTF-Body attachments\n"); printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); printf("\t-d \t- Debug to file.\n"); + printf("\t-e\t- As with -M, but include extensions on output files\n"); printf("\t-h\t- Help. This screen\n"); printf("\t-j \t- Number of parallel jobs to run\n"); printf("\t-k\t- KMail. Output in kmail format\n"); @@ -798,7 +809,7 @@ if (y == 0) snprintf(dir_name, dirsize, "%s", dir); else - snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y); // enough for 9 digits allocated above + snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y, ""); // enough for 9 digits allocated above check_filename(dir_name); DEBUG_INFO(("about to try creating %s\n", dir_name)); @@ -858,14 +869,14 @@ } -int mk_separate_file(struct file_ll *f) { +int mk_separate_file(struct file_ll *f, char *extension) { const int name_offset = 1; DEBUG_ENT("mk_separate_file"); DEBUG_INFO(("opening next file to save email\n")); if (f->item_count > 999999999) { // bigger than nine 9's DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n")); } - sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset); + sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset, extension); if (f->output) fclose(f->output); f->output = NULL; check_filename(f->name); @@ -1265,9 +1276,9 @@ fprintf(f_output, "BEGIN:VCALENDAR\n"); fprintf(f_output, "VERSION:2.0\n"); fprintf(f_output, "PRODID:LibPST v%s\n", VERSION); - fprintf(f_output, "METHOD:%s\n", method); + if (method) fprintf(f_output, "METHOD:%s\n", method); fprintf(f_output, "BEGIN:VEVENT\n"); - fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender); + if (sender) fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender); write_appointment(f_output, item, 1); fprintf(f_output, "END:VCALENDAR\n"); } @@ -1912,8 +1923,8 @@ } else if (mode == MODE_SEPARATE) { // do similar stuff to recurse here. mk_separate_dir(item->file_as.str); - f->name = (char*) pst_malloc(10); - memset(f->name, 0, 10); + f->name = (char*) pst_malloc(file_name_len); + memset(f->name, 0, file_name_len); } else { f->name = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1); sprintf(f->name, OUTPUT_TEMPLATE, item->file_as.str); diff -r 410b6422d65b -r aa50c23a6935 xml/libpst.in --- a/xml/libpst.in Sun Sep 13 10:49:12 2009 -0700 +++ b/xml/libpst.in Mon Sep 14 10:56:39 2009 -0700 @@ -35,7 +35,7 @@ - 2009-09-12 + 2009-09-14 @@ -60,6 +60,7 @@ + @@ -93,7 +94,7 @@ -M - Output messages in MH format as separate files. This will create + Output messages in MH (rfc822) format as separate files. This will create folders as named in the PST file, and will put each email together with any attachments into its own file. These files will be numbered from 1 to n with no leading zeros. @@ -102,12 +103,14 @@ -S - Output messages into separate files. This will create folders as named - in the PST file, and will put each email in its own file. These files - will be numbered from 1 increasing in intervals of 1 (ie 1, 2, 3, ...). - Any attachments are saved alongside each email as XXXXXXXXX-attach1, - XXXXXXXXX-attach2 and so on, or with the name of the attachment if one - is present. + Output messages into separate files. This will create folders as + named in the PST file, and will put each email in its own file. These + files will be numbered from 1 to n with no leading zeros. So the email + contents for message $m are saved in a file named $m. Attachments + will also be saved in the same folder as the email message. The + attachments for message $m are saved as $m-$name where $name is (the + original name of the attachment, or 'attach$n' if the attachment had + no name), where $n is another sequential index with no leading zeros. @@ -136,6 +139,13 @@ + -e + + Same as the M option, but each output file will include an extension + from (.eml, .ics, .vcf). + + + -h Show summary of options and exit. @@ -245,7 +255,7 @@ - 2009-09-12 + 2009-09-14 @@ -340,7 +350,7 @@ - 2009-09-12 + 2009-09-14 @@ -508,7 +518,7 @@ - 2009-09-12 + 2009-09-14 @@ -641,7 +651,7 @@ - 2009-09-12 + 2009-09-14