Mercurial > libpst
changeset 142:2189a6b8134e
improve character set handling - don't try to convert utf-8 to single byte for fields that were not originally unicode.
if the conversion fails, leave the data in utf-8.
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Mon, 23 Feb 2009 20:40:51 -0800 |
parents | fd4297884319 |
children | fdc58ad2c758 |
files | ChangeLog regression/regression-tests.bash src/Makefile.am src/getidblock.c src/libpst.c src/libpst.h src/readpst.c src/vbuf.c |
diffstat | 8 files changed, 459 insertions(+), 271 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog Sat Feb 14 11:02:37 2009 -0800 +++ b/ChangeLog Mon Feb 23 20:40:51 2009 -0800 @@ -4,6 +4,9 @@ that are not used by normal mail clients. * improve decoding of multipart/report and message/rfc822 mime types. + * improve character set handling - don't try to convert utf-8 + to single byte for fields that were not originally unicode. + if the conversion fails, leave the data in utf-8. LibPST 0.6.27 (2009-02-07) ===============================
--- a/regression/regression-tests.bash Sat Feb 14 11:02:37 2009 -0800 +++ b/regression/regression-tests.bash Mon Feb 23 20:40:51 2009 -0800 @@ -46,24 +46,24 @@ dodii 3 test.pst dodii 4 big_mail.pst else - #dopst 1 ams.pst - #dopst 2 sample_64.pst - #dopst 3 test.pst - #dopst 4 big_mail.pst - #dopst 5 mbmg.archive.pst - #dopst 6 Single2003-read.pst - #dopst 7 Single2003-unread.pst - #dopst 8 ol2k3high.pst - #dopst 9 ol97high.pst - #dopst 10 returned_message.pst - #dopst 11 flow.pst - #dopst 12 test-html.pst - #dopst 13 test-text.pst - #dopst 14 joe.romanowski.pst - #dopst 15 hourig1.pst + dopst 1 ams.pst + dopst 2 sample_64.pst + dopst 3 test.pst + dopst 4 big_mail.pst + dopst 5 mbmg.archive.pst + dopst 6 Single2003-read.pst + dopst 7 Single2003-unread.pst + dopst 8 ol2k3high.pst + dopst 9 ol97high.pst + dopst 10 returned_message.pst + dopst 11 flow.pst + dopst 12 test-html.pst + dopst 13 test-text.pst + dopst 14 joe.romanowski.pst + dopst 15 hourig1.pst #dopst 16 hourig2.pst - dopst 17 hourig3.pst - #dopst 18 test-mac.pst + #dopst 17 hourig3.pst + dopst 18 test-mac.pst fi grep 'lost:' *err | grep -v 'lost: 0 '
--- a/src/Makefile.am Sat Feb 14 11:02:37 2009 -0800 +++ b/src/Makefile.am Mon Feb 23 20:40:51 2009 -0800 @@ -76,7 +76,7 @@ libstrfunc.h\ timeconv.h \ vbuf.h - libpst_la_LDFLAGS = $(NO_UNDEFINED) -version-info 1:0:0 + libpst_la_LDFLAGS = $(NO_UNDEFINED) -version-info 1:1:0 endif libpst_la_SOURCES = $(common_source) $(common_header)
--- a/src/getidblock.c Sat Feb 14 11:02:37 2009 -0800 +++ b/src/getidblock.c Mon Feb 23 20:40:51 2009 -0800 @@ -1,19 +1,92 @@ #include "define.h" -static void usage(); +int decrypt = 0, process = 0, binary = 0; +pst_file pstfile; + + +void usage(); +void usage() +{ + printf("usage: getidblock [options] filename id\n"); + printf("\tfilename - name of the file to access\n"); + printf("\tid - ID of the block to fetch (0 to fetch all) - can begin with 0x for hex\n"); + printf("\toptions\n"); + printf("\t\t-d\tDecrypt the block before printing\n"); + printf("\t\t-p\tProcess the block before finishing.\n"); + printf("\t\t\tView the debug log for information\n"); +} + + +void dumper(uint64_t id); +void dumper(uint64_t id) +{ + char *buf = NULL; + size_t readSize; + pst_desc_ll *ptr; + + DEBUG_MAIN(("\n\n\nLooking at block index1 id %#"PRIx64"\n", id)); + + if ((readSize = pst_ff_getIDblock(&pstfile, id, &buf)) <= 0 || buf == 0) { + DIE(("Error loading block\n")); + } + + if (decrypt) + if (pst_decrypt(id, buf, readSize, (int) pstfile.encryption) != 0) { + DIE(("Error decrypting block\n")); + } + + DEBUG_MAIN(("Printing block id %#"PRIx64", size %#x\n", id, readSize)); + if (binary) { + if (fwrite(buf, 1, readSize, stdout) != 0) { + DIE(("Error occured during writing of buf to stdout\n")); + } + } else { + printf("Block id %#"PRIx64", size %#x\n", id, readSize); + pst_debug_hexdumper(stdout, buf, readSize, 0x10, 0); + } + if (buf) free(buf); + + if (process) { + DEBUG_MAIN(("Parsing block id %#"PRIx64"\n", id)); + ptr = pstfile.d_head; + while (ptr) { + if (ptr->list_index && ptr->list_index->id == id) + break; + if (ptr->desc && ptr->desc->id == id) + break; + ptr = pst_getNextDptr(ptr); + } + if (!ptr) { + ptr = (pst_desc_ll *) xmalloc(sizeof(pst_desc_ll)); + ptr->desc = pst_getID(&pstfile, id); + ptr->list_index = NULL; + } + pst_item *item = pst_parse_item(&pstfile, ptr); + if (item) pst_freeItem(item); + } +} + + +void dump_desc(pst_desc_ll *ptr); +void dump_desc(pst_desc_ll *ptr) +{ + while (ptr) { + DEBUG_MAIN(("\n\n\nLooking at block desc id %#"PRIx64"\n", ptr->id)); + if (ptr->desc && ptr->desc->id) dumper(ptr->desc->id); + if (ptr->list_index && ptr->list_index->id) dumper(ptr->list_index->id); + if (ptr->child) dump_desc(ptr->child); + ptr = ptr->next; + } +} + int main(int argc, char* const* argv) { // pass the id number to display on the command line char *fname, *sid; - pst_file pstfile; uint64_t id; - int decrypt = 0, process = 0, binary = 0, c; - char *buf = NULL; - size_t readSize; - pst_item *item; - pst_desc_ll *ptr; + int c; DEBUG_INIT("getidblock.log"); DEBUG_REGISTER_CLOSE(); @@ -58,56 +131,17 @@ if (pst_load_index(&pstfile) != 0) { DIE(("Error loading file index\n")); } - // if ((ptr = pst_getID(&pstfile, id)) == NULL) { - // DIE(("id not found [%#x]\n", id)); - // } - DEBUG_MAIN(("Loading block\n")); - - if ((readSize = pst_ff_getIDblock(&pstfile, id, &buf)) <= 0 || buf == NULL) { - // if ((readSize = pst_read_block_size(&pstfile, ptr->offset, ptr->size, &buf, 1, 1)) < ptr->size) { - DIE(("Error loading block\n")); + if (id) { + dumper(id); } - if (binary == 0) - printf("Block %#"PRIx64", size %#x[%i]\n", id, (unsigned int) readSize, (int) readSize); - - if (decrypt != 0) - if (pst_decrypt(id, buf, readSize, (int) pstfile.encryption) != 0) { - DIE(("Error decrypting block\n")); - } - - DEBUG_MAIN(("Printing block... [id %#x, size %#x]\n", id, readSize)); - if (binary == 0) { - pst_debug_hexdumper(stdout, buf, readSize, 0x10, 0); - } else { - if (fwrite(buf, 1, readSize, stdout) != 0) { - DIE(("Error occured during writing of buf to stdout\n")); + else { + pst_index_ll *ptr = pstfile.i_head; + while (ptr) { + dumper(ptr->id); + ptr = ptr->next; } - } - free(buf); - - if (process != 0) { - DEBUG_MAIN(("Parsing block...\n")); - ptr = pstfile.d_head; - while (ptr != NULL) { - if (ptr->list_index != NULL && ptr->list_index->id == id) - break; - if (ptr->desc != NULL && ptr->desc->id == id) - break; - ptr = pst_getNextDptr(ptr); - } - if (ptr == NULL) { - ptr = (pst_desc_ll *) xmalloc(sizeof(pst_desc_ll)); - ptr->desc = pst_getID(&pstfile, id); - ptr->list_index = NULL; - } - if (ptr != NULL) { - if ((item = pst_parse_item(&pstfile, ptr)) != NULL) - pst_freeItem(item); - } else { - DEBUG_MAIN(("item not found with this ID\n")); - printf("Cannot find the owning Record of this ID. Cannot parse\n"); - } + dump_desc(pstfile.d_head); } if (pst_close(&pstfile) != 0) { @@ -118,13 +152,3 @@ return 0; } -void usage() -{ - printf("usage: getidblock [options] filename id\n"); - printf("\tfilename - name of the file to access\n"); - printf("\tid - ID of the block to fetch - can begin with 0x for hex\n"); - printf("\toptions\n"); - printf("\t\t-d\tDecrypt the block before printing\n"); - printf("\t\t-p\tProcess the block before finishing.\n"); - printf("\t\t\tView the debug log for information\n"); -}
--- a/src/libpst.c Sat Feb 14 11:02:37 2009 -0800 +++ b/src/libpst.c Mon Feb 23 20:40:51 2009 -0800 @@ -125,7 +125,9 @@ 0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2, 0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec }; -// for "strong" encryption, we have the two additional tables +// for "strong" encryption, we have the two additional tables, +// which (with the previous table) are used as the keys in an +// Enigma 3 rotor cipher static unsigned char comp_high1 [] = { 0x41, 0x36, 0x13, 0x62, 0xa8, 0x21, 0x6e, 0xbb, 0xf4, 0x16, 0xcc, 0x04, 0x7f, 0x64, 0xe8, 0x5d, 0x1e, 0xf2, 0xcb, 0x2a, 0x74, 0xc5, 0x5e, 0x35, 0xd2, 0x95, 0x47, 0x9e, 0x96, 0x2d, 0x9a, 0x88, @@ -519,7 +521,7 @@ } if (p->list_index) { - id2_head = pst_build_id2(pf, p->list_index, NULL); + id2_head = pst_build_id2(pf, p->list_index); pst_printID2ptr(id2_head); } else { DEBUG_WARN(("Have not been able to fetch any id2 values for item 0x61. Brace yourself!\n")); @@ -1047,10 +1049,8 @@ } if (d_ptr->list_index) { - id2_head = pst_build_id2(pf, d_ptr->list_index, NULL); + id2_head = pst_build_id2(pf, d_ptr->list_index); (void)pst_printID2ptr(id2_head); - } else { - DEBUG_WARN(("Have not been able to fetch any id2 values for this item. Brace yourself!\n")); } list = pst_parse_block(pf, d_ptr->desc->id, id2_head, NULL); @@ -1073,16 +1073,48 @@ return NULL; } if (list) pst_free_list(list); - list = NULL; //pst_process will free the items in the list + list = NULL; + + if ((id_ptr = pst_getID2(id2_head, (uint64_t)0x692))) { + // DSN/MDN reports? + DEBUG_EMAIL(("DSN/MDN processing \n")); + if ((list = pst_parse_block(pf, id_ptr->id, id2_head, NULL)) == NULL) { + DEBUG_WARN(("ERROR error processing main DSN/MDN record\n")); + if (item) pst_freeItem(item); + if (list) pst_free_list(list); + if (id2_head) pst_free_id2(id2_head); + DEBUG_RET(); + return item; + } + else { + for (x=0; x < list->count_array; x++) { + attach = (pst_item_attach*) xmalloc(sizeof(pst_item_attach)); + memset(attach, 0, sizeof(pst_item_attach)); + attach->next = item->attach; + item->attach = attach; + } + + if (pst_process(list, item, item->attach)) { + DEBUG_WARN(("ERROR pst_process() failed with attachments\n")); + if (item) pst_freeItem(item); + if (list) pst_free_list(list); + if (id2_head) pst_free_id2(id2_head); + DEBUG_RET(); + return NULL; + } + if (list) pst_free_list(list); + list = NULL; + } + } if ((id_ptr = pst_getID2(id2_head, (uint64_t)0x671))) { // should not have any existing attachments anyway - while (item->attach) { - DEBUG_EMAIL(("throw away existing attachment\n")); - attach = item->attach->next; - free(item->attach); - item->attach = attach; - } + //while (item->attach) { + // DEBUG_EMAIL(("throw away existing attachment\n")); + // attach = item->attach->next; + // free(item->attach); + // item->attach = attach; + //} DEBUG_EMAIL(("ATTACHMENT processing attachment\n")); if ((list = pst_parse_block(pf, id_ptr->id, id2_head, NULL)) == NULL) { @@ -1126,6 +1158,9 @@ attach = attach->next; continue; } + if (list->count_array > 1) { + DEBUG_WARN(("ERROR probably fatal, list count array will overrun attach structure.\n")); + } if (pst_process(list, item, attach)) { DEBUG_WARN(("ERROR pst_process() failed with an attachment\n")); if (list) pst_free_list(list); @@ -1146,6 +1181,7 @@ } } else { DEBUG_WARN(("ERROR cannot locate id2 value %#"PRIx64"\n", attach->id2_val)); + attach->id2_val = 0; // suppress this missing attachment } attach = attach->next; } @@ -1684,27 +1720,22 @@ #define NULL_CHECK(x) { if (!x) { DEBUG_EMAIL(("NULL_CHECK: Null Found\n")); break;} } -#define MOVE_NEXT(targ) { \ - if (next){\ - if (!targ) {\ - DEBUG_EMAIL(("MOVE_NEXT: Target is NULL. Will stop processing this option\n"));\ - break;\ - }\ - targ = targ->next;\ - if (!targ) {\ - DEBUG_EMAIL(("MOVE_NEXT: Target is NULL after next. Will stop processing this option\n"));\ - break;\ - }\ - next=0;\ - }\ -} - - -int pst_process(pst_num_array *list , pst_item *item, pst_item_attach *attach) { - int32_t x, t; - int next = 0; - pst_item_extra_field *ef; - + +/** + * process the list of items produced from parse_block() + * + * @param list pointer to the linked list of things from parse_block() + * @param item pointer to the item to be updated from the list. + * this item may be an email, contact or other sort of item. + * the type of this item is generally set by the things + * from the list. + * @param attach pointer to the linked list of attachment records. If + * this is non-null, the length of the this attachment list + * must be at least as large as the length of the list. + * + * @return 0 for ok, -1 for error. + */ +int pst_process(pst_num_array *list, pst_item *item, pst_item_attach *attach) { DEBUG_ENT("pst_process"); if (!item) { DEBUG_EMAIL(("item cannot be NULL.\n")); @@ -1713,8 +1744,10 @@ } while (list) { - x = 0; + int32_t x = 0; while (x < list->count_item) { + int32_t t; + pst_item_extra_field *ef; // check here to see if the id is one that is mapped. DEBUG_EMAIL(("#%d - id: %#x type: %#x length: %#x\n", x, list->items[x]->id, list->items[x]->type, list->items[x]->size)); @@ -1771,13 +1804,14 @@ // 0 - Low // 1 - Normal // 2 - High - DEBUG_EMAIL(("Importance Level - ")); MALLOC_EMAIL(item); memcpy(&(item->email->importance), list->items[x]->data, sizeof(item->email->importance)); LE32_CPU(item->email->importance); t = item->email->importance; - DEBUG_EMAIL(("%s [%i]\n", ((int)t==0?"Low":((int)t==1?"Normal":"High")), t)); + DEBUG_EMAIL(("%s [%i]\n", ((int)t==0?"Low": + ((int)t==1?"Normal": + "High")), t)); break; case 0x001A: // PR_MESSAGE_CLASS Ascii type of messages - NOT FOLDERS // must be case insensitive @@ -1862,22 +1896,32 @@ memcpy(&(item->email->orig_sensitivity), list->items[x]->data, sizeof(item->email->orig_sensitivity)); LE32_CPU(item->email->orig_sensitivity); t = item->email->orig_sensitivity; - DEBUG_EMAIL(("%s [%i]\n", ((int)t==0?"None":((int)t==1?"Personal": - ((int)t==2?"Private":"Company Confidential"))), t)); + DEBUG_EMAIL(("%s [%i]\n", ((int)t==0?"None": + ((int)t==1?"Personal": + ((int)t==2?"Private": + "Company Confidential"))), t)); + break; + case 0x0032: // PR_REPORT_TIME + DEBUG_EMAIL(("Report time - ")); + MALLOC_EMAIL(item); + LIST_COPY_TIME(item->email->report_time); + DEBUG_EMAIL(("%s", fileTimeToAscii(item->email->report_time))); break; case 0x0036: // PR_SENSITIVITY // sender's opinion of the sensitivity of an email // 0 - None // 1 - Personal // 2 - Private - // 3 - Company Confidiential + // 3 - Company Confidential DEBUG_EMAIL(("Sensitivity - ")); MALLOC_EMAIL(item); memcpy(&(item->email->sensitivity), list->items[x]->data, sizeof(item->email->sensitivity)); LE32_CPU(item->email->sensitivity); t = item->email->sensitivity; - DEBUG_EMAIL(("%s [%i]\n", ((int)t==0?"None":((int)t==1?"Personal": - ((int)t==2?"Private":"Company Confidential"))), t)); + DEBUG_EMAIL(("%s [%i]\n", ((int)t==0?"None": + ((int)t==1?"Personal": + ((int)t==2?"Private": + "Company Confidential"))), t)); break; case 0x0037: // PR_SUBJECT raw subject DEBUG_EMAIL(("Raw Subject - ")); @@ -2086,6 +2130,28 @@ LIST_COPY(item->email->header, (char*)); DEBUG_EMAIL(("%s\n", item->email->header)); break; + case 0x0C04: // PR_NDR_REASON_CODE + MALLOC_EMAIL(item); + memcpy(&(item->email->ndr_reason_code), list->items[x]->data, sizeof(item->email->ndr_reason_code)); + LE32_CPU(item->email->ndr_reason_code); + t = item->email->ndr_reason_code; + DEBUG_EMAIL(("NDR reason code - [%i]\n", (int)t)); + break; + case 0x0C05: // PR_NDR_DIAG_CODE + MALLOC_EMAIL(item); + memcpy(&(item->email->ndr_diag_code), list->items[x]->data, sizeof(item->email->ndr_diag_code)); + LE32_CPU(item->email->ndr_diag_code); + t = item->email->ndr_diag_code; + DEBUG_EMAIL(("NDR diag code - [%i]\n", (int)t)); + break; + case 0x0C06: // PR_NON_RECEIPT_NOTIFICATION_REQUESTED + DEBUG_EMAIL(("Non-Receipt Notification Requested - (ignored) - ")); + if (*(int16_t*)list->items[x]->data) { + DEBUG_EMAIL(("True\n")); + } else { + DEBUG_EMAIL(("False\n")); + } + break; case 0x0C17: // PR_REPLY_REQUESTED DEBUG_EMAIL(("Reply Requested - ")); MALLOC_EMAIL(item); @@ -2103,6 +2169,12 @@ case 0x0C1A: // PR_SENDER_NAME Name of Sender Structure 2 DEBUG_EMAIL(("Name of Sender Structure 2 -- NOT HANDLED\n")); break; + case 0x0C1B: // PR_SUPPLEMENTARY_INFO + DEBUG_EMAIL(("Supplementary info - ")); + MALLOC_EMAIL(item); + LIST_COPY(item->email->supplementary_info, (char*)); + DEBUG_EMAIL(("%s\n", item->email->supplementary_info)); + break; case 0x0C1D: // PR_SENDER_SEARCH_KEY Name of Sender Address 2 DEBUG_EMAIL(("Name of Sender Address 2 (Sender search key) - ")); MALLOC_EMAIL(item); @@ -2121,6 +2193,13 @@ LIST_COPY(item->email->sender2_address, (char*)); DEBUG_EMAIL(("%s\n", item->email->sender2_address)); break; + case 0x0C20: // PR_NDR_STATUS_CODE + MALLOC_EMAIL(item); + memcpy(&(item->email->ndr_status_code), list->items[x]->data, sizeof(item->email->ndr_status_code)); + LE32_CPU(item->email->ndr_status_code); + t = item->email->ndr_status_code; + DEBUG_EMAIL(("NDR status code - [%i]\n", (int)t)); + break; case 0x0E01: // PR_DELETE_AFTER_SUBMIT // I am not too sure how this works DEBUG_EMAIL(("Delete after submit - ")); @@ -2206,7 +2285,6 @@ case 0x0E20: // PR_ATTACH_SIZE binary Attachment data in record DEBUG_EMAIL(("Attachment Size - ")); NULL_CHECK(attach); - MOVE_NEXT(attach); t = (*(int32_t*)list->items[x]->data); LE32_CPU(t); attach->size = (size_t)t; @@ -2219,11 +2297,20 @@ DEBUG_EMAIL_HEXPRINT(item->record_key, item->record_key_size); DEBUG_EMAIL(("\n")); break; - case 0x1000: // PR_BODY Plain Text body - DEBUG_EMAIL(("Plain Text body - ")); + case 0x1000: // PR_BODY MALLOC_EMAIL(item); LIST_COPY(item->email->body, (char*)); - DEBUG_EMAIL(("%s\n", item->email->body)); + item->email->body_was_unicode = (list->items[x]->type == 0x1f) ? 1 : 0; + DEBUG_EMAIL(("Plain Text body %s - \n%s\n", (item->email->body_was_unicode) ? "unicode" : "sbcs", + item->email->body)); + break; + case 0x1001: // PR_REPORT_TEXT + DEBUG_EMAIL(("Report Text - ")); + MALLOC_EMAIL(item); + LIST_COPY(item->email->report_text, (char*)); + item->email->report_was_unicode = (list->items[x]->type == 0x1f) ? 1 : 0; + DEBUG_EMAIL(("Report Text %s - \n%s\n", (item->email->report_was_unicode) ? "unicode" : "sbcs", + item->email->report_text)); break; case 0x1006: // PR_RTF_SYNC_BODY_CRC DEBUG_EMAIL(("RTF Sync Body CRC - ")); @@ -2271,10 +2358,11 @@ DEBUG_EMAIL(("%i\n", item->email->rtf_ws_trailing_count)); break; case 0x1013: // HTML body - DEBUG_EMAIL(("HTML body - ")); MALLOC_EMAIL(item); LIST_COPY(item->email->htmlbody, (char*)); - DEBUG_EMAIL(("%s\n", item->email->htmlbody)); + item->email->htmlbody_was_unicode = (list->items[x]->type == 0x1f) ? 1 : 0; + DEBUG_EMAIL(("HTML body %s - \n%s\n", (item->email->htmlbody_was_unicode) ? "unicode" : "sbcs", + item->email->htmlbody)); break; case 0x1035: // Message ID DEBUG_EMAIL(("Message ID - ")); @@ -2452,7 +2540,6 @@ case 0x3701: // PR_ATTACH_DATA_OBJ binary data of attachment DEBUG_EMAIL(("Binary Data [Size %i] - ", list->items[x]->size)); NULL_CHECK(attach); - MOVE_NEXT(attach); if (!list->items[x]->data) { //special case attach->id2_val = list->items[x]->type; DEBUG_EMAIL(("Seen a Reference. The data hasn't been loaded yet. [%#"PRIx64"][%#x]\n", @@ -2466,7 +2553,6 @@ case 0x3704: // PR_ATTACH_FILENAME Attachment filename (8.3) DEBUG_EMAIL(("Attachment Filename - ")); NULL_CHECK(attach); - MOVE_NEXT(attach); LIST_COPY(attach->filename1, (char*)); DEBUG_EMAIL(("%s\n", attach->filename1)); break; @@ -2480,7 +2566,6 @@ // 6 - OLE DEBUG_EMAIL(("Attachment method - ")); NULL_CHECK(attach); - MOVE_NEXT(attach); memcpy(&(attach->method), list->items[x]->data, sizeof(attach->method)); LE32_CPU(attach->method); t = attach->method; @@ -2494,7 +2579,6 @@ case 0x3707: // PR_ATTACH_LONG_FILENAME Attachment filename (long?) DEBUG_EMAIL(("Attachment Filename long - ")); NULL_CHECK(attach); - MOVE_NEXT(attach); LIST_COPY(attach->filename2, (char*)); DEBUG_EMAIL(("%s\n", attach->filename2)); break; @@ -2502,7 +2586,6 @@ // position in characters that the attachment appears in the plain text body DEBUG_EMAIL(("Attachment Position - ")); NULL_CHECK(attach); - MOVE_NEXT(attach); memcpy(&(attach->position), list->items[x]->data, sizeof(attach->position)); LE32_CPU(attach->position); DEBUG_EMAIL(("%i [%#x]\n", attach->position)); @@ -2510,7 +2593,6 @@ case 0x370E: // PR_ATTACH_MIME_TAG Mime type of encoding DEBUG_EMAIL(("Attachment mime encoding - ")); NULL_CHECK(attach); - MOVE_NEXT(attach); LIST_COPY(attach->mimetype, (char*)); DEBUG_EMAIL(("%s\n", attach->mimetype)); break; @@ -2518,7 +2600,6 @@ // sequence number for mime parts. Includes body DEBUG_EMAIL(("Attachment Mime Sequence - ")); NULL_CHECK(attach); - MOVE_NEXT(attach); memcpy(&(attach->sequence), list->items[x]->data, sizeof(attach->sequence)); LE32_CPU(attach->sequence); DEBUG_EMAIL(("%i\n", attach->sequence)); @@ -3013,6 +3094,20 @@ LIST_COPY(item->contact->other_po_box, (char*)); DEBUG_EMAIL(("%s\n", item->contact->other_po_box)); break; + case 0x3FDE: // PR_INTERNET_CPID + MALLOC_EMAIL(item); + memcpy(&(item->email->internet_cpid), list->items[x]->data, sizeof(item->email->internet_cpid)); + LE32_CPU(item->email->internet_cpid); + t = item->email->internet_cpid; + DEBUG_EMAIL(("Internet code page %i\n", (int)t)); + break; + case 0x3FFD: // PR_MESSAGE_CODEPAGE + MALLOC_EMAIL(item); + memcpy(&(item->email->message_codepage), list->items[x]->data, sizeof(item->email->message_codepage)); + LE32_CPU(item->email->message_codepage); + t = item->email->message_codepage; + DEBUG_EMAIL(("Message code page %i\n", (int)t)); + break; case 0x65E3: // Entry ID? DEBUG_EMAIL(("Entry ID - ")); item->record_key = (char*) xmalloc(16+1); @@ -3025,7 +3120,6 @@ DEBUG_EMAIL(("Attachment ID2 value - ")); if (attach) { uint32_t tempid; - MOVE_NEXT(attach); memcpy(&(tempid), list->items[x]->data, sizeof(tempid)); LE32_CPU(tempid); attach->id2_val = tempid; @@ -3523,9 +3617,8 @@ } x++; } - x = 0; list = list->next; - next = 1; + if (attach) attach = attach->next; } DEBUG_RET(); return 0; @@ -3558,8 +3651,9 @@ pst_index2_ll *t; DEBUG_ENT("pst_free_id2"); while (head) { + if (head->child) pst_free_id2(head->child); t = head->next; - free (head); + free(head); head = t; } DEBUG_RET(); @@ -3615,7 +3709,7 @@ } -pst_index2_ll * pst_build_id2(pst_file *pf, pst_index_ll* list, pst_index2_ll* head_ptr) { +pst_index2_ll * pst_build_id2(pst_file *pf, pst_index_ll* list) { pst_block_header block_head; pst_index2_ll *head = NULL, *tail = NULL; uint16_t x = 0; @@ -3626,10 +3720,6 @@ pst_index2_ll *i2_ptr = NULL; DEBUG_ENT("pst_build_id2"); - if (head_ptr) { - head = head_ptr; - while (head_ptr) head_ptr = (tail = head_ptr)->next; - } if (pst_read_block_size(pf, list->offset, list->size, &buf) < list->size) { //an error occured in block read WARN(("block read error occured. offset = %#"PRIx64", size = %#"PRIx64"\n", list->offset, list->size)); @@ -3656,41 +3746,28 @@ b_ptr = buf + ((pf->do_read64) ? 0x08 : 0x04); while (x < block_head.count) { b_ptr += pst_decode_assoc(pf, &id2_rec, b_ptr); - DEBUG_INDEX(("\tid2 = %#x, id = %#"PRIx64", table2 = %#"PRIx64"\n", id2_rec.id2, id2_rec.id, id2_rec.table2)); + DEBUG_INDEX(("id2 = %#x, id = %#"PRIx64", table2 = %#"PRIx64"\n", id2_rec.id2, id2_rec.id, id2_rec.table2)); if ((i_ptr = pst_getID(pf, id2_rec.id)) == NULL) { - DEBUG_WARN(("\t\t%#"PRIx64" - Not Found\n", id2_rec.id)); + DEBUG_WARN(("%#"PRIx64" - Not Found\n", id2_rec.id)); } else { - DEBUG_INDEX(("\t\t%#"PRIx64" - Offset %#"PRIx64", u1 %#"PRIx64", Size %"PRIi64"(%#"PRIx64")\n", + DEBUG_INDEX(("%#"PRIx64" - Offset %#"PRIx64", u1 %#"PRIx64", Size %"PRIi64"(%#"PRIx64")\n", i_ptr->id, i_ptr->offset, i_ptr->u1, i_ptr->size, i_ptr->size)); - // add it to the linked list + // add it to the tree i2_ptr = (pst_index2_ll*) xmalloc(sizeof(pst_index2_ll)); - i2_ptr->id2 = id2_rec.id2; - i2_ptr->id = i_ptr; - i2_ptr->next = NULL; + i2_ptr->id2 = id2_rec.id2; + i2_ptr->id = i_ptr; + i2_ptr->child = NULL; + i2_ptr->next = NULL; if (!head) head = i2_ptr; if (tail) tail->next = i2_ptr; tail = i2_ptr; - if (id2_rec.table2 != 0) { + if (id2_rec.table2) { if ((i_ptr = pst_getID(pf, id2_rec.table2)) == NULL) { - DEBUG_WARN(("\tTable2 [%#x] not found\n", id2_rec.table2)); + DEBUG_WARN(("Table2 [%#x] not found\n", id2_rec.table2)); } else { - DEBUG_INDEX(("\tGoing deeper for table2 [%#x]\n", id2_rec.table2)); - if ((i2_ptr = pst_build_id2(pf, i_ptr, head))) { - // DEBUG_INDEX(("pst_build_id2(): \t\tAdding new list onto end of current\n")); - // if (!head) - // head = i2_ptr; - // if (tail) - // tail->next = i2_ptr; - // while (i2_ptr->next) - // i2_ptr = i2_ptr->next; - // tail = i2_ptr; - } - // need to re-establish tail - DEBUG_INDEX(("Returned from depth\n")); - if (tail) { - while (tail->next) tail = tail->next; - } + DEBUG_INDEX(("Going deeper for table2 [%#x]\n", id2_rec.table2)); + i2_ptr->child = pst_build_id2(pf, i_ptr); } } } @@ -3749,6 +3826,9 @@ if (item->email->subject) SAFE_FREE(item->email->subject->subj); SAFE_FREE(item->email->subject); + SAFE_FREE(item->email->report_text); + SAFE_FREE(item->email->report_time); + SAFE_FREE(item->email->supplementary_info); free(item->email); } if (item->folder) { @@ -4012,10 +4092,19 @@ } -pst_index_ll * pst_getID2(pst_index2_ll *ptr, uint64_t id) { +pst_index_ll *pst_getID2(pst_index2_ll *head, uint64_t id) { DEBUG_ENT("pst_getID2"); - DEBUG_INDEX(("Head = %p id = %#"PRIx64"\n", ptr, id)); - while (ptr && (ptr->id2 != id)) { + DEBUG_INDEX(("looking for id = %#"PRIx64"\n", id)); + pst_index2_ll *ptr = head; + while (ptr) { + if (ptr->id2 == id) break; + if (ptr->child) { + pst_index_ll *rc = pst_getID2(ptr->child, id); + if (rc) { + DEBUG_RET(); + return rc; + } + } ptr = ptr->next; } if (ptr) { @@ -4024,7 +4113,7 @@ DEBUG_RET(); return ptr->id; } - DEBUG_INDEX(("ERROR Not Found\n")); + //DEBUG_INDEX(("ERROR Not Found\n")); DEBUG_RET(); return NULL; } @@ -4087,6 +4176,7 @@ DEBUG_ENT("pst_printID2ptr"); while (ptr) { DEBUG_INDEX(("%#"PRIx64" id=%#"PRIx64"\n", ptr->id2, (ptr->id ? ptr->id->id : (uint64_t)0))); + if (ptr->child) pst_printID2ptr(ptr->child); ptr = ptr->next; } DEBUG_RET();
--- a/src/libpst.h Sat Feb 14 11:02:37 2009 -0800 +++ b/src/libpst.h Mon Feb 23 20:40:51 2009 -0800 @@ -140,28 +140,29 @@ uint64_t offset; uint64_t size; int64_t u1; - struct pst_index_tree * next; + struct pst_index_tree *next; } pst_index_ll; typedef struct pst_index2_tree { uint64_t id2; pst_index_ll *id; - struct pst_index2_tree * next; + struct pst_index2_tree *child; + struct pst_index2_tree *next; } pst_index2_ll; typedef struct pst_desc_tree { uint64_t id; uint64_t parent_id; - pst_index_ll * list_index; - pst_index_ll * desc; + pst_index_ll *list_index; + pst_index_ll *desc; int32_t no_child; - struct pst_desc_tree * prev; - struct pst_desc_tree * next; - struct pst_desc_tree * parent; - struct pst_desc_tree * child; - struct pst_desc_tree * child_tail; + struct pst_desc_tree *prev; + struct pst_desc_tree *next; + struct pst_desc_tree *parent; + struct pst_desc_tree *child; + struct pst_desc_tree *child_tail; } pst_desc_ll; @@ -173,67 +174,79 @@ typedef struct pst_item_email { - FILETIME *arrival_date; - int autoforward; // 1 = true, 0 = not set, -1 = false - char *body; - char *body_charset; // null if not specified - char *cc_address; - char *bcc_address; - char *common_name; - int32_t conv_index; - int conversion_prohib; // 1 = true, 0 = false - int delete_after_submit; // 1 = true, 0 = false - int delivery_report; // 1 = true, 0 = false - char *encrypted_body; - size_t encrypted_body_size; - char *encrypted_htmlbody; - size_t encrypted_htmlbody_size; - int32_t flag; - char *header; - char *htmlbody; - int32_t importance; - char *in_reply_to; - int message_cc_me; // 1 = true, 0 = false - int message_recip_me; // 1 = true, 0 = false - int message_to_me; // 1 = true, 0 = false - char *messageid; - int32_t orig_sensitivity; - char *original_bcc; - char *original_cc; - char *original_to; - char *outlook_recipient; - char *outlook_recipient_name; - char *outlook_recipient2; - char *outlook_sender; - char *outlook_sender_name; - char *outlook_sender2; - int32_t priority; - char *proc_subject; - int read_receipt; // 1 = true, 0 = false - char *recip_access; - char *recip_address; - char *recip2_access; - char *recip2_address; - int reply_requested; // 1 = true, 0 = false - char *reply_to; - char *return_path_address; - int32_t rtf_body_char_count; - int32_t rtf_body_crc; - char *rtf_body_tag; - char *rtf_compressed; - uint32_t rtf_compressed_size; - int rtf_in_sync; // 1 = true, 0 = doesn't exist, -1 = false - int32_t rtf_ws_prefix_count; - int32_t rtf_ws_trailing_count; - char *sender_access; - char *sender_address; - char *sender2_access; - char *sender2_address; - int32_t sensitivity; - FILETIME *sent_date; - pst_entryid *sentmail_folder; - char *sentto_address; + FILETIME *arrival_date; + int autoforward; // 1 = true, 0 = not set, -1 = false + char *body; + int32_t body_was_unicode; // 1 = true, 0 = false + char *body_charset; // null if not specified + char *cc_address; + char *bcc_address; + char *common_name; + int32_t conv_index; + int conversion_prohib; // 1 = true, 0 = false + int delete_after_submit; // 1 = true, 0 = false + int delivery_report; // 1 = true, 0 = false + char *encrypted_body; + size_t encrypted_body_size; + char *encrypted_htmlbody; + size_t encrypted_htmlbody_size; + int32_t flag; + char *header; + char *htmlbody; + int32_t htmlbody_was_unicode; // 1 = true, 0 = false + int32_t importance; + char *in_reply_to; + int message_cc_me; // 1 = true, 0 = false + int message_recip_me; // 1 = true, 0 = false + int message_to_me; // 1 = true, 0 = false + char *messageid; + int32_t orig_sensitivity; + char *original_bcc; + char *original_cc; + char *original_to; + char *outlook_recipient; + char *outlook_recipient_name; + char *outlook_recipient2; + char *outlook_sender; + char *outlook_sender_name; + char *outlook_sender2; + int32_t priority; + char *proc_subject; + int read_receipt; // 1 = true, 0 = false + char *recip_access; + char *recip_address; + char *recip2_access; + char *recip2_address; + int reply_requested; // 1 = true, 0 = false + char *reply_to; + char *return_path_address; + int32_t rtf_body_char_count; + int32_t rtf_body_crc; + char *rtf_body_tag; + char *rtf_compressed; + uint32_t rtf_compressed_size; + int rtf_in_sync; // 1 = true, 0 = doesn't exist, -1 = false + int32_t rtf_ws_prefix_count; + int32_t rtf_ws_trailing_count; + char *sender_access; + char *sender_address; + char *sender2_access; + char *sender2_address; + int32_t sensitivity; + int32_t internet_cpid; + int32_t message_codepage; + FILETIME *sent_date; + pst_entryid *sentmail_folder; + char *sentto_address; pst_item_email_subject *subject; + // delivery report fields + char *report_text; + int32_t report_was_unicode; + FILETIME *report_time; + int32_t ndr_reason_code; + int32_t ndr_diag_code; + char *supplementary_info; + int32_t ndr_status_code; } pst_item_email; @@ -457,18 +470,16 @@ pst_x_attrib_ll *x_head; pst_block_recorder *block_head; - //set this to 0 to read 32-bit pst files (pre Outlook 2003) - //set this to 1 to read 64-bit pst files (Outlook 2003 and later) - int do_read64; - + int do_read64; // 0 is 32-bit pst file, pre Outlook 2003; + // 1 is 64-bit pst file, Outlook 2003 and later uint64_t index1; uint64_t index1_back; uint64_t index2; uint64_t index2_back; - FILE * fp; // file pointer to opened PST file - uint64_t size; // pst file size - unsigned char encryption; // pst encryption setting - unsigned char ind_type; // pst index type + FILE * fp; // file pointer to opened PST file + uint64_t size; // pst file size + unsigned char encryption; // pst encryption setting + unsigned char ind_type; // pst index type } pst_file; @@ -548,7 +559,7 @@ void pst_free_xattrib(pst_x_attrib_ll *x); int pst_getBlockOffsetPointer(pst_file *pf, pst_index2_ll *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p); int pst_getBlockOffset(char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p); -pst_index2_ll* pst_build_id2(pst_file *pf, pst_index_ll* list, pst_index2_ll* head_ptr); +pst_index2_ll* pst_build_id2(pst_file *pf, pst_index_ll* list); pst_index_ll* pst_getID(pst_file* pf, uint64_t id); pst_index_ll* pst_getID2(pst_index2_ll * ptr, uint64_t id); pst_desc_ll* pst_getDptr(pst_file *pf, uint64_t id);
--- a/src/readpst.c Sat Feb 14 11:02:37 2009 -0800 +++ b/src/readpst.c Mon Feb 23 20:40:51 2009 -0800 @@ -51,7 +51,7 @@ int test_base64(char *body); void find_html_charset(char *html, char *charset, size_t charsetlen); void find_rfc822_headers(char** extra_mime_headers); -void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary); +void write_body_part(FILE* f_output, char *body, int32_t body_was_unicode, char *mime, char *charset, char *boundary, pst_file* pst); void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf, char** extra_mime_headers); void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]); void write_appointment(FILE* f_output, pst_item_appointment* appointment, @@ -136,13 +136,12 @@ ff.skip_count++; } else { - DEBUG_MAIN(("main: Desc Email ID %#x [d_ptr->id = %#x]\n", d_ptr->desc->id, d_ptr->id)); + DEBUG_MAIN(("main: Desc Email ID %#"PRIx64" [d_ptr->id = %#"PRIx64"]\n", d_ptr->desc->id, d_ptr->id)); item = pst_parse_item(&pstfile, d_ptr); DEBUG_MAIN(("main: About to process item\n")); if (item && item->email && item->email->subject && item->email->subject->subj) { - DEBUG_EMAIL(("item->email->subject = %p\n", item->email->subject)); - DEBUG_EMAIL(("item->email->subject->subj = %p\n", item->email->subject->subj)); + DEBUG_EMAIL(("item->email->subject->subj = %s\n", item->email->subject->subj)); } if (item) { if (item->folder && d_ptr->child && (deleted_mode == DMODE_INCLUDE || strcasecmp(item->file_as, "Deleted Items"))) { @@ -766,8 +765,7 @@ char *attach_filename; char *enc = NULL; // base64 encoded attachment DEBUG_ENT("write_inline_attachment"); - DEBUG_EMAIL(("Attachment Size is %i\n", attach->size)); - DEBUG_EMAIL(("Attachment Pointer is %p\n", attach->data)); + DEBUG_EMAIL(("Attachment Size is %i, pointer %p, id %d\n", attach->size, attach->data, attach->id_val)); if (attach->data) { enc = base64_encode (attach->data, attach->size); if (!enc) { @@ -776,6 +774,15 @@ return; } } + else { + // make sure we can fetch data from the id + pst_index_ll *ptr = pst_getID(pst, attach->id_val); + if (!ptr) { + DEBUG_WARN(("Couldn't find ID pointer. Cannot save attachment to file\n")); + DEBUG_RET(); + return; + } + } fprintf(f_output, "\n--%s\n", boundary); if (!attach->mimetype) { @@ -964,7 +971,7 @@ } } } - DEBUG_EMAIL(("skipping to next block after\n%s\n", headers)); + //DEBUG_EMAIL(("skipping to next block after\n%s\n", headers)); headers = temp+2; // skip to next chunk of headers } *extra_mime_headers = headers; @@ -973,20 +980,23 @@ } -void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary) +void write_body_part(FILE* f_output, char *body, int32_t body_was_unicode, char *mime, char *charset, char *boundary, pst_file* pst) { char *needfree = NULL; DEBUG_ENT("write_body_part"); - if (strcasecmp("utf-8", charset)) { - // try to convert to the specified charset since it is not utf-8 + if (body_was_unicode && (strcasecmp("utf-8", charset))) { + // try to convert to the specified charset since the target + // is not utf-8, and the data came from a unicode (utf16) field + // and is now in utf-8. size_t rc; DEBUG_EMAIL(("Convert %s utf-8 to %s\n", mime, charset)); vbuf *newer = vballoc(2); rc = vb_utf8to8bit(newer, body, strlen(body) + 1, charset); if (rc == (size_t)-1) { - // unable to convert, maybe it is already in that character set + // unable to convert, change the charset to utf8 free(newer->b); DEBUG_EMAIL(("Failed to convert %s utf-8 to %s\n", mime, charset)); + charset = "utf-8"; } else { needfree = body = newer->b; @@ -1015,6 +1025,38 @@ } +const char* codepage(int cp) { + static char buffer[20]; + switch (cp) { + case 932 : return "iso-2022-jp"; + case 936 : return "gb2313"; + case 950 : return "big5"; + case 20127 : return "us-ascii"; + case 20269 : return "iso-6937"; + case 20865 : return "iso-8859-15"; + case 20866 : return "koi8-r"; + case 21866 : return "koi8-u"; + case 28591 : return "iso-8859-1"; + case 28592 : return "iso-8859-2"; + case 28595 : return "iso-8859-5"; + case 28596 : return "iso-8859-6"; + case 28597 : return "iso-8859-7"; + case 28598 : return "iso-8859-8"; + case 28599 : return "iso-8859-9"; + case 50220 : return "iso-2022-jp"; + case 50221 : return "csiso2022jp"; + case 51932 : return "euc-jp"; + case 51949 : return "euc-kr"; + case 65000 : return "utf-7"; + case 65001 : return "utf-8"; + default : + snprintf(buffer, sizeof(buffer), "cp%d", cp); + return buffer; + } + return NULL; +} + + void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf, char** extra_mime_headers) { char boundary[60]; @@ -1032,7 +1074,11 @@ DEBUG_ENT("write_normal_email"); // setup default body character set and report type - snprintf(body_charset, sizeof(body_charset), "%s", (item->email->body_charset) ? item->email->body_charset : "utf-8"); + snprintf(body_charset, sizeof(body_charset), "%s", + (item->email->body_charset) ? item->email->body_charset : + (item->email->message_codepage) ? codepage(item->email->message_codepage) : + (item->email->internet_cpid) ? codepage(item->email->internet_cpid) : + "utf-8"); body_report[0] = '\0'; // setup default sender @@ -1123,8 +1169,11 @@ } if (mode != MODE_SEPARATE) { - // most modes need this separator line - fprintf(f_output, "From %s %s\n", sender, c_time); + // most modes need this separator line. + // procmail produces this separator without the quotes around the + // sender email address, but apparently some Mac email client needs + // those quotes, and they don't seem to cause problems for anyone else. + fprintf(f_output, "From \"%s\" %s\n", sender, c_time); } // print the supplied email headers @@ -1198,12 +1247,17 @@ // now dump the body parts if (item->email->body) { - write_body_part(f_output, item->email->body, "text/plain", body_charset, boundary); + write_body_part(f_output, item->email->body, item->email->body_was_unicode, "text/plain", body_charset, boundary, pst); + } + + if ((item->email->report_text) && (body_report[0] != '\0')) { + write_body_part(f_output, item->email->report_text, item->email->report_was_unicode, "text/plain", body_charset, boundary, pst); + fprintf(f_output, "\n"); } if (item->email->htmlbody) { find_html_charset(item->email->htmlbody, body_charset, sizeof(body_charset)); - write_body_part(f_output, item->email->htmlbody, "text/html", body_charset, boundary); + write_body_part(f_output, item->email->htmlbody, item->email->htmlbody_was_unicode, "text/html", body_charset, boundary, pst); } if (item->email->rtf_compressed && save_rtf) {
--- a/src/vbuf.c Sat Feb 14 11:02:37 2009 -0800 +++ b/src/vbuf.c Mon Feb 23 20:40:51 2009 -0800 @@ -43,6 +43,7 @@ static int unicode_up = 0; static iconv_t i16to8; static const char *target_charset = NULL; +static int target_open = 0; static iconv_t i8totarget; @@ -61,10 +62,11 @@ void unicode_close() { iconv_close(i16to8); - if (target_charset) { + if (target_open) { iconv_close(i8totarget); free((char *)target_charset); target_charset = NULL; + target_open = 0; } unicode_up = 0; } @@ -130,19 +132,23 @@ size_t outbytesleft = 0; char *outbuf = NULL; - if (!target_charset || (target_charset && strcasecmp(target_charset, charset))) { - if (target_charset) { + if (!target_charset || strcasecmp(target_charset, charset)) { + if (target_open) { iconv_close(i8totarget); free((char *)target_charset); } target_charset = strdup(charset); + target_open = 1; i8totarget = iconv_open(target_charset, "UTF-8"); if (i8totarget == (iconv_t)-1) { + target_open = 0; fprintf(stderr, "Couldn't open iconv descriptor for UTF-8 to %s.\n", target_charset); return (size_t)-1; } } + if (!target_open) return (size_t)-1; // previous failure to open the target + if (2 > dest->blen) vbresize(dest, 2); dest->dlen = 0;