Mercurial > libpst
changeset 230:42b38d65f7e4
patches from Justin Greer
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Thu, 10 Sep 2009 13:01:08 -0700 |
parents | e7f363452178 |
children | fe64279df92b |
files | regression/regression-tests.bash src/libpst.c src/pst2ldif.cpp src/readpst.c xml/libpst.in |
diffstat | 5 files changed, 212 insertions(+), 93 deletions(-) [+] |
line wrap: on
line diff
--- a/regression/regression-tests.bash Fri Sep 04 08:06:22 2009 -0700 +++ b/regression/regression-tests.bash Thu Sep 10 13:01:08 2009 -0700 @@ -113,7 +113,7 @@ #dopst 16 hourig2.pst #dopst 17 hourig3.pst dopst 18 test-mac.pst - #dopst 19 harris.pst + dopst 19 harris.pst dopst 20 spam.pst dopst 21 rendgen.pst # single email appointment dopst 22 rendgen2.pst # email appointment with no termination date
--- a/src/libpst.c Fri Sep 04 08:06:22 2009 -0700 +++ b/src/libpst.c Thu Sep 10 13:01:08 2009 -0700 @@ -65,7 +65,10 @@ typedef struct pst_holder { char **buf; FILE *fp; - int base64; + int base64; // bool, are we encoding into base64 + int base64_line_count; // base64 bytes emitted on the current line + size_t base64_extra; // count of bytes held in base64_extra_chars + char base64_extra_chars[2]; // up to two pending unencoded bytes } pst_holder; @@ -252,6 +255,7 @@ 0x61, 0xe0, 0xc6, 0xc1, 0x59, 0xab, 0xbb, 0x58, 0xde, 0x5f, 0xdf, 0x60, 0x79, 0x7e, 0xb2, 0x8a }; +static size_t pst_append_holder(pst_holder *h, size_t size, char **buf, size_t z); static int pst_build_desc_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val); static pst_id2_tree* pst_build_id2(pst_file *pf, pst_index_ll* list); static int pst_build_id_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val); @@ -260,6 +264,7 @@ static size_t pst_ff_getIDblock(pst_file *pf, uint64_t i_id, char** buf); static size_t pst_ff_getID2block(pst_file *pf, uint64_t id2, pst_id2_tree *id2_head, char** buf); static size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h); +static size_t pst_finish_cleanup_holder(pst_holder *h, size_t size); static void pst_free_attach(pst_item_attach *attach); static void pst_free_desc (pst_desc_tree *head); static void pst_free_id2(pst_id2_tree * head); @@ -537,7 +542,7 @@ pst_binary pst_attach_to_mem(pst_file *pf, pst_item_attach *attach) { pst_index_ll *ptr; pst_binary rc; - pst_holder h = {&rc.data, NULL, 0}; + pst_holder h = {&rc.data, NULL, 0, 0, 0}; rc.size = 0; rc.data = NULL; DEBUG_ENT("pst_attach_to_mem"); @@ -560,7 +565,7 @@ size_t pst_attach_to_file(pst_file *pf, pst_item_attach *attach, FILE* fp) { pst_index_ll *ptr; - pst_holder h = {NULL, fp, 0}; + pst_holder h = {NULL, fp, 0, 0, 0}; size_t size = 0; DEBUG_ENT("pst_attach_to_file"); if ((!attach->data.data) && (attach->i_id != (uint64_t)-1)) { @@ -584,7 +589,7 @@ size_t pst_attach_to_file_base64(pst_file *pf, pst_item_attach *attach, FILE* fp) { pst_index_ll *ptr; - pst_holder h = {NULL, fp, 1}; + pst_holder h = {NULL, fp, 1, 0, 0}; size_t size = 0; DEBUG_ENT("pst_attach_to_file_base64"); if ((!attach->data.data) && (attach->i_id != (uint64_t)-1)) { @@ -941,6 +946,7 @@ static size_t pst_decode_type3(pst_file *pf, pst_table3_rec *table3_rec, char *buf); static size_t pst_decode_type3(pst_file *pf, pst_table3_rec *table3_rec, char *buf) { size_t r; + DEBUG_ENT("pst_decode_type3"); if (pf->do_read64) { DEBUG_INFO(("Decoding table3 64\n")); DEBUG_HEXDUMPC(buf, sizeof(pst_table3_rec), 0x10); @@ -956,6 +962,7 @@ table3_rec->id = table3_rec32.id; r = sizeof(pst_table3_rec32); } + DEBUG_RET(); return r; } @@ -3911,7 +3918,7 @@ static size_t pst_ff_getID2block(pst_file *pf, uint64_t id2, pst_id2_tree *id2_head, char** buf) { size_t ret; pst_id2_tree* ptr; - pst_holder h = {buf, NULL, 0}; + pst_holder h = {buf, NULL, 0, 0, 0}; DEBUG_ENT("pst_ff_getID2block"); ptr = pst_getID2(id2_head, id2); @@ -3926,46 +3933,49 @@ } +/** find the actual data from an i_id and send it to the destination + * specified by the pst_holder h. h must be a new empty destination. + * + * @param pf PST file structure + * @param ptr + * @param h specifies the output destination (buffer, file, encoding) + * @return updated size of the output + */ static size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h) { size_t ret; - char *b = NULL, *t; + char *b = NULL; DEBUG_ENT("pst_ff_getID2data"); if (!(ptr->i_id & 0x02)) { ret = pst_ff_getIDblock_dec(pf, ptr->i_id, &b); - if (h->buf) { - *(h->buf) = b; - } else if ((h->base64 == 1) && h->fp) { - t = pst_base64_encode(b, ret); - if (t) { - (void)pst_fwrite(t, (size_t)1, strlen(t), h->fp); - free(t); // caught by valgrind - } - free(b); - } else if (h->fp) { - (void)pst_fwrite(b, (size_t)1, ret, h->fp); - free(b); - } else { - // h-> does not specify any output - } - + ret = pst_append_holder(h, (size_t)0, &b, ret); + free(b); } else { - // here we will assume it is a block that points to others + // here we will assume it is an indirection block that points to others DEBUG_INFO(("Assuming it is a multi-block record because of it's id\n")); ret = pst_ff_compile_ID(pf, ptr->i_id, h, (size_t)0); } + ret = pst_finish_cleanup_holder(h, ret); DEBUG_RET(); return ret; } +/** find the actual data from an indirection i_id and send it to the destination + * specified by the pst_holder. + * + * @param pf PST file structure + * @param i_id ID of the block to read + * @param h specifies the output destination (buffer, file, encoding) + * @param size number of bytes of data already sent to h + * @return updated size of the output + */ static size_t pst_ff_compile_ID(pst_file *pf, uint64_t i_id, pst_holder *h, size_t size) { - size_t z, a; - uint16_t count, y; - char *buf3 = NULL, *buf2 = NULL, *t; - char *b_ptr; - int line_count = 0; - char base64_extra_chars[3]; - uint32_t base64_extra = 0; + size_t z, a; + uint16_t count, y; + char *buf3 = NULL; + char *buf2 = NULL; + char *b_ptr; + int line_count = 0; pst_block_hdr block_hdr; pst_table3_rec table3_rec; //for type 3 (0x0101) blocks @@ -3983,30 +3993,31 @@ LE32_CPU(block_hdr.offset); DEBUG_INFO(("block header (index_offset=%#hx, type=%#hx, offset=%#x)\n", block_hdr.index_offset, block_hdr.type, block_hdr.offset)); + count = block_hdr.type; + b_ptr = buf3 + 8; + + // For indirect lookups through a table of i_ids, just recurse back into this + // function, letting it concatenate all the data together, and then return the + // total size of the data. + if (block_hdr.index_offset == (uint16_t)0x0201) { // Indirect lookup (depth 2). + for (y=0; y<count; y++) { + b_ptr += pst_decode_type3(pf, &table3_rec, b_ptr); + size = pst_ff_compile_ID(pf, table3_rec.id, h, size); + } + free(buf3); + DEBUG_RET(); + return size; + } + if (block_hdr.index_offset != (uint16_t)0x0101) { //type 3 DEBUG_WARN(("WARNING: not a type 0x0101 buffer, Treating as normal buffer\n")); if (pf->encryption) (void)pst_decrypt(i_id, buf3, a, pf->encryption); - if (h->buf) - *(h->buf) = buf3; - else if (h->base64 == 1 && h->fp) { - t = pst_base64_encode(buf3, a); - if (t) { - (void)pst_fwrite(t, (size_t)1, strlen(t), h->fp); - free(t); // caught by valgrind - } - free(buf3); - } else if (h->fp) { - (void)pst_fwrite(buf3, (size_t)1, a, h->fp); - free(buf3); - } else { - // h-> does not specify any output - } + size = pst_append_holder(h, size, &buf3, a); + free(buf3); DEBUG_RET(); - return a; + return size; } - count = block_hdr.type; - b_ptr = buf3 + 8; - line_count = 0; + for (y=0; y<count; y++) { b_ptr += pst_decode_type3(pf, &table3_rec, b_ptr); z = pst_ff_getIDblock_dec(pf, table3_rec.id, &buf2); @@ -4017,51 +4028,92 @@ DEBUG_RET(); return z; } - if (h->buf) { - *(h->buf) = realloc(*(h->buf), size+z+1); - DEBUG_INFO(("appending read data of size %i onto main buffer from pos %i\n", z, size)); - memcpy(&((*(h->buf))[size]), buf2, z); - } else if ((h->base64 == 1) && h->fp) { - if (base64_extra) { - // include any bytes left over from the last encoding - buf2 = (char*)realloc(buf2, z+base64_extra); - memmove(buf2+base64_extra, buf2, z); - memcpy(buf2, base64_extra_chars, base64_extra); - z += base64_extra; - } - - // find out how many bytes will be left over after this encoding and save them - base64_extra = z % 3; - if (base64_extra) { - z -= base64_extra; - memcpy(base64_extra_chars, buf2+z, base64_extra); - } - - // encode this chunk - t = pst_base64_encode_multiple(buf2, z, &line_count); - if (t) { - DEBUG_INFO(("writing %i bytes to file as base64 [%i]. Currently %i\n", z, strlen(t), size)); - (void)pst_fwrite(t, (size_t)1, strlen(t), h->fp); - free(t); // caught by valgrind - } - } else if (h->fp) { - DEBUG_INFO(("writing %i bytes to file. Currently %i\n", z, size)); - (void)pst_fwrite(buf2, (size_t)1, z, h->fp); - } else { - // h-> does not specify any output + size = pst_append_holder(h, size, &buf2, z); + } + + free(buf3); + if (buf2) free(buf2); + DEBUG_RET(); + return size; +} + + +/** append (buf,z) data to the output destination (h,size) + * + * @param h specifies the output destination (buffer, file, encoding) + * @param size number of bytes of data already sent to h + * @param buf reference to a pointer to the buffer to be appended to the destination + * @param z number of bytes in buf + * @return updated size of the output, buffer pointer possibly reallocated + */ +static size_t pst_append_holder(pst_holder *h, size_t size, char **buf, size_t z) { + char *t; + DEBUG_ENT("pst_append_holder"); + + // raw append to a buffer + if (h->buf) { + *(h->buf) = realloc(*(h->buf), size+z+1); + DEBUG_INFO(("appending read data of size %i onto main buffer from pos %i\n", z, size)); + memcpy(*(h->buf)+size, *buf, z); + + // base64 encoding to a file + } else if ((h->base64 == 1) && h->fp) { + // + if (h->base64_extra) { + // include any bytes left over from the last encoding + *buf = (char*)realloc(*buf, z+h->base64_extra); + memmove(*buf+h->base64_extra, *buf, z); + memcpy(*buf, h->base64_extra_chars, h->base64_extra); + z += h->base64_extra; } - size += z; + + // find out how many bytes will be left over after this encoding and save them + h->base64_extra = z % 3; + if (h->base64_extra) { + z -= h->base64_extra; + memcpy(h->base64_extra_chars, *buf+z, h->base64_extra); + } + + // encode this chunk + t = pst_base64_encode_multiple(*buf, z, &h->base64_line_count); + if (t) { + DEBUG_INFO(("writing %i bytes to file as base64 [%i]. Currently %i\n", z, strlen(t), size)); + (void)pst_fwrite(t, (size_t)1, strlen(t), h->fp); + free(t); // caught by valgrind + } + + // raw append to a file + } else if (h->fp) { + DEBUG_INFO(("writing %i bytes to file. Currently %i\n", z, size)); + (void)pst_fwrite(*buf, (size_t)1, z, h->fp); + + // null output + } else { + // h-> does not specify any output } - if ((h->base64 == 1) && h->fp && base64_extra) { + DEBUG_RET(); + return size+z; +} + + +/** finish cleanup for base64 encoding to a file with extra bytes left over + * + * @param h specifies the output destination (buffer, file, encoding) + * @param size number of bytes of data already sent to h + * @return updated size of the output + */ +static size_t pst_finish_cleanup_holder(pst_holder *h, size_t size) { + char *t; + DEBUG_ENT("pst_finish_cleanup_holder"); + if ((h->base64 == 1) && h->fp && h->base64_extra) { // need to encode any bytes left over - t = pst_base64_encode_multiple(base64_extra_chars, (size_t)base64_extra, &line_count); + t = pst_base64_encode_multiple(h->base64_extra_chars, h->base64_extra, &h->base64_line_count); if (t) { (void)pst_fwrite(t, (size_t)1, strlen(t), h->fp); free(t); // caught by valgrind } + size += h->base64_extra; } - free(buf3); - if (buf2) free(buf2); DEBUG_RET(); return size; } @@ -4246,6 +4298,8 @@ case 932 : return "iso-2022-jp"; case 936 : return "gb2313"; case 950 : return "big5"; + case 1200 : return "ucs-2le"; + case 1201 : return "ucs-2be"; case 20127 : return "us-ascii"; case 20269 : return "iso-6937"; case 20865 : return "iso-8859-15";
--- a/src/pst2ldif.cpp Fri Sep 04 08:06:22 2009 -0700 +++ b/src/pst2ldif.cpp Thu Sep 10 13:01:08 2009 -0700 @@ -60,6 +60,7 @@ static void free_strings(string_set &s); static void free_strings(string_set &s) { + if (s.empty()) return; for (string_set::iterator i=s.begin(); i!=s.end(); i++) { free((void*)*i); }
--- a/src/readpst.c Fri Sep 04 08:06:22 2009 -0700 +++ b/src/readpst.c Thu Sep 10 13:01:08 2009 -0700 @@ -103,6 +103,12 @@ #define DMODE_EXCLUDE 0 #define DMODE_INCLUDE 1 +// Output type mode flags +#define OTMODE_EMAIL 1 +#define OTMODE_APPOINTMENT 2 +#define OTMODE_JOURNAL 4 +#define OTMODE_CONTACT 8 + // output settings for RTF bodies // filename for the attachment #define RTF_ATTACH_NAME "rtf-body.rtf" @@ -115,6 +121,7 @@ int output_mode = OUTPUT_NORMAL; int contact_mode = CMODE_VCARD; int deleted_mode = DMODE_EXCLUDE; +int output_type_mode = 0xff; // Default to all. int contact_mode_specified = 0; int overwrite = 0; int save_rtf_body = 1; @@ -272,6 +279,10 @@ ff.skip_count++; DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type)); } + else if (!(output_type_mode & OTMODE_CONTACT)) { + ff.skip_count++; + DEBUG_INFO(("skipping contact: not in output type list\n")); + } else { ff.item_count++; if (mode == MODE_SEPARATE) mk_separate_file(&ff); @@ -293,6 +304,10 @@ ff.skip_count++; DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type)); } + else if (!(output_type_mode & OTMODE_EMAIL)) { + ff.skip_count++; + DEBUG_INFO(("skipping email: not in output type list\n")); + } else { char *extra_mime_headers = NULL; ff.item_count++; @@ -307,6 +322,10 @@ ff.skip_count++; DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type)); } + else if (!(output_type_mode & OTMODE_JOURNAL)) { + ff.skip_count++; + DEBUG_INFO(("skipping journal entry: not in output type list\n")); + } else { ff.item_count++; if (mode == MODE_SEPARATE) mk_separate_file(&ff); @@ -321,6 +340,10 @@ ff.skip_count++; DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type)); } + else if (!(output_type_mode & OTMODE_APPOINTMENT)) { + ff.skip_count++; + DEBUG_INFO(("skipping appointment: not in output type list\n")); + } else { ff.item_count++; if (mode == MODE_SEPARATE) mk_separate_file(&ff); @@ -364,7 +387,7 @@ } // command-line option handling - while ((c = getopt(argc, argv, "bc:Dd:hj:kMo:qrSVw"))!= -1) { + while ((c = getopt(argc, argv, "bc:Dd:hj:kMo:qrSt:Vw"))!= -1) { switch (c) { case 'b': save_rtf_body = 0; @@ -421,6 +444,36 @@ version(); exit(0); break; + case 't': + // email, appointment, contact, other + if (!optarg) { + usage(); + exit(0); + } + temp = optarg; + output_type_mode = 0; + while (*temp > 0) { + switch (temp[0]) { + case 'e': + output_type_mode |= OTMODE_EMAIL; + break; + case 'a': + output_type_mode |= OTMODE_APPOINTMENT; + break; + case 'j': + output_type_mode |= OTMODE_JOURNAL; + break; + case 'c': + output_type_mode |= OTMODE_CONTACT; + break; + default: + usage(); + exit(0); + break; + } + temp++; + } + break; case 'w': overwrite = 1; break; @@ -579,6 +632,7 @@ printf("\t-S\t- Separate. Write emails in the separate format\n"); printf("\t-b\t- Don't save RTF-Body attachments\n"); printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); + printf("\t-t[eajc]\t- Set the output type list. e = email, a = attachment, j = journal, c = contact\n"); printf("\t-d <filename> \t- Debug to file. This is a binary log. Use readpstlog to print it\n"); printf("\t-h\t- Help. This screen\n"); printf("\t-j <integer>\t- Number of parallel jobs to run\n"); @@ -1344,11 +1398,12 @@ // print the supplied email headers if (headers) { - int len; - fprintf(f_output, "%s", headers); - // make sure the headers end with a \n - len = strlen(headers); - if (!len || (headers[len-1] != '\n')) fprintf(f_output, "\n"); + int len = strlen(headers); + if (len > 0) { + fprintf(f_output, "%s", headers); + // make sure the headers end with a \n + if (headers[len-1] != '\n') fprintf(f_output, "\n"); + } } // create required header fields that are not already written
--- a/xml/libpst.in Fri Sep 04 08:06:22 2009 -0700 +++ b/xml/libpst.in Thu Sep 10 13:01:08 2009 -0700 @@ -66,6 +66,7 @@ <arg><option>-o <replaceable class="parameter">output-directory</replaceable></option></arg> <arg><option>-q</option></arg> <arg><option>-r</option></arg> + <arg><option>-t <replaceable class="parameter">output-type-codes</replaceable></option></arg> <arg><option>-w</option></arg> <arg choice='plain'>pstfile</arg> </cmdsynopsis> @@ -177,6 +178,14 @@ </para></listitem> </varlistentry> <varlistentry> + <term>-t <replaceable class="parameter">output-type-codes</replaceable></term> + <listitem><para> + Specifies the item types that are processed. The argument is a sequence + of single letters from (e,a,j,c) for (email, appointment, journal, contact) + types. The default is to process all item types. + </para></listitem> + </varlistentry> + <varlistentry> <term>-w</term> <listitem><para> Overwrite any previous output files. Beware: When used with the -S