# HG changeset patch # User carl # Date 1200768436 28800 # Node ID 17654fbdf76be572e9bf05bb4fadd4ffe6cb1600 # Parent f66078abed3865e24db3aeb1741126a4d9eb4ed3 more fixes for 64 bit format diff -r f66078abed38 -r 17654fbdf76b ChangeLog --- a/ChangeLog Fri Jan 18 15:07:12 2008 -0800 +++ b/ChangeLog Sat Jan 19 10:47:16 2008 -0800 @@ -1,10 +1,9 @@ -LibPST 0.6.4 (2008-01-18) +LibPST 0.6.4 (2008-01-19) =============================== * More fixes for Outlook 2003 64 bit parsing. We observed cases of compressed RTF bodies (type 0x1009) with zero length. - * Document type 0x0101 descriptor blocks and try to process them. - This is still not correct. + * Document type 0x0101 descriptor blocks and process them. * Fix large file support - we need to include config.h before any standard headers. diff -r f66078abed38 -r 17654fbdf76b regression/regression-tests.bash --- a/regression/regression-tests.bash Fri Jan 18 15:07:12 2008 -0800 +++ b/regression/regression-tests.bash Sat Jan 19 10:47:16 2008 -0800 @@ -8,25 +8,26 @@ done -#$val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' ams.pst >ams.err 2>&1 -#$val ../src/readpst -cv -o output1 ams.pst >out1.err 2>&1 -#$val ../src/readpst -cl -r -o output2 ams.pst >out2.err 2>&1 -#$val ../src/readpst -S -o output3 ams.pst >out3.err 2>&1 -#$val ../src/readpst -M -o output4 ams.pst >out4.err 2>&1 -#$val ../src/readpst -o output5 mbmg.archive.pst >out5.err 2>&1 +$val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' ams.pst >ams.err 2>&1 +$val ../src/readpst -cv -o output1 ams.pst >out1.err 2>&1 +$val ../src/readpst -cl -r -o output2 ams.pst >out2.err 2>&1 +$val ../src/readpst -S -o output3 ams.pst >out3.err 2>&1 +$val ../src/readpst -M -o output4 ams.pst >out4.err 2>&1 -#$val ../src/readpst -o output6 -d dumper test.pst >out6.err 2>&1 -# ../src/readpstlog -f I dumper >dumpertest.log +$val ../src/readpst -o output5 -d dumper mbmg.archive.pst >out5.err 2>&1 + ../src/readpstlog -f I dumper >mbmg.archive.log -#$val ../src/readpst -cv -o output7 -d dumper sample_64.pst >out7.err 2>&1 -# ../src/readpstlog -f I dumper >sample_64.log -# hexdump -C dumper >dumper.hex +$val ../src/readpst -o output6 -d dumper test.pst >out6.err 2>&1 + ../src/readpstlog -f I dumper >dumpertest.log + +$val ../src/readpst -cv -o output7 -d dumper sample_64.pst >out7.err 2>&1 + ../src/readpstlog -f I dumper >sample_64.log $val ../src/readpst -cv -o output8 -d dumper big_mail.pst >out8.err 2>&1 ../src/readpstlog -f I dumper >big_mail.log -#$val ../src/lspst ams.pst >out8.err 2>&1 -# ../src/readpstlog -f I lspst.debug >lspst.log +$val ../src/lspst ams.pst >out8.err 2>&1 + ../src/readpstlog -f I lspst.debug >lspst.log -#rm -f dumper lspst.debug +rm -f dumper lspst.debug diff -r f66078abed38 -r 17654fbdf76b src/libpst.c --- a/src/libpst.c Fri Jan 18 15:07:12 2008 -0800 +++ b/src/libpst.c Sat Jan 19 10:47:16 2008 -0800 @@ -30,8 +30,7 @@ #include "libpst.h" #include "timeconv.h" -//efine INDEX_DEPTH 0x4C -//efine SECOND_DEPTH 0x5C + #define INDEX_TYPE32 0x0E #define INDEX_TYPE64 0x17 #define INDEX_TYPE_OFFSET (off_t)0x0A @@ -254,7 +253,7 @@ size_t pst_attach_to_mem(pst_file *pf, pst_item_attach *attach, unsigned char **b){ size_t size=0; pst_index_ll *ptr; - struct holder h = {b, NULL, 0, "", 0}; + pst_holder h = {b, NULL, 0, "", 0}; DEBUG_ENT("pst_attach_to_mem"); if (attach->id_val != (uint64_t)-1) { ptr = pst_getID(pf, attach->id_val); @@ -275,7 +274,7 @@ size_t pst_attach_to_file(pst_file *pf, pst_item_attach *attach, FILE* fp) { pst_index_ll *ptr; - struct holder h = {NULL, fp, 0, "", 0}; + pst_holder h = {NULL, fp, 0, "", 0}; size_t size; DEBUG_ENT("pst_attach_to_file"); if (attach->id_val != (uint64_t)-1) { @@ -299,7 +298,7 @@ size_t pst_attach_to_file_base64(pst_file *pf, pst_item_attach *attach, FILE* fp) { pst_index_ll *ptr; - struct holder h = {NULL, fp, 1, "", 0}; + pst_holder h = {NULL, fp, 1, "", 0}; size_t size; char *c; DEBUG_ENT("pst_attach_to_file_base64"); @@ -1253,21 +1252,25 @@ } -static void freeall(unsigned char *buf, pst_block_offset_pointer *p1, - pst_block_offset_pointer *p2, - pst_block_offset_pointer *p3, - pst_block_offset_pointer *p4, - pst_block_offset_pointer *p5, - pst_block_offset_pointer *p6, - pst_block_offset_pointer *p7); -static void freeall(unsigned char *buf, pst_block_offset_pointer *p1, - pst_block_offset_pointer *p2, - pst_block_offset_pointer *p3, - pst_block_offset_pointer *p4, - pst_block_offset_pointer *p5, - pst_block_offset_pointer *p6, - pst_block_offset_pointer *p7) { - if (buf) free(buf); +static void freeall(pst_subblocks *subs, pst_block_offset_pointer *p1, + pst_block_offset_pointer *p2, + pst_block_offset_pointer *p3, + pst_block_offset_pointer *p4, + pst_block_offset_pointer *p5, + pst_block_offset_pointer *p6, + pst_block_offset_pointer *p7); +static void freeall(pst_subblocks *subs, pst_block_offset_pointer *p1, + pst_block_offset_pointer *p2, + pst_block_offset_pointer *p3, + pst_block_offset_pointer *p4, + pst_block_offset_pointer *p5, + pst_block_offset_pointer *p6, + pst_block_offset_pointer *p7) { + size_t i; + for (i=0; isubblock_count; i++) { + if (subs->subs[i].buf) free(subs->subs[i].buf); + } + free(subs->subs); if (p1->needfree) free(p1->from); if (p2->needfree) free(p2->from); if (p3->needfree) free(p3->from); @@ -1280,6 +1283,8 @@ pst_num_array * pst_parse_block(pst_file *pf, uint64_t block_id, pst_index2_ll *i2_head, pst_num_array *na_head) { unsigned char *buf = NULL; + size_t read_size = 0; + pst_subblocks subblocks; pst_num_array *na_ptr = NULL; pst_block_offset_pointer block_offset1; pst_block_offset_pointer block_offset2; @@ -1295,13 +1300,11 @@ int32_t cur_list; int block_type; uint32_t rec_size = 0; - uint32_t ind_ptr; unsigned char* list_start; unsigned char* fr_ptr; unsigned char* to_ptr; unsigned char* ind2_end = NULL; unsigned char* ind2_ptr = NULL; - size_t read_size=0; pst_x_attrib_ll *mapptr; struct { @@ -1366,14 +1369,47 @@ LE32_CPU(block_hdr.offset); DEBUG_EMAIL(("block header (index_offset=%#hx, type=%#hx, offset=%#hx)\n", block_hdr.index_offset, block_hdr.type, block_hdr.offset)); - ind_ptr = block_hdr.index_offset; + if (block_hdr.index_offset == (uint16_t)0x0101) { //type 3 + subblocks.subblock_count = block_hdr.type; + subblocks.subs = malloc(sizeof(pst_subblock) * subblocks.subblock_count); + size_t i; + char *b_ptr = buf + 8; + for (i=0; inext = na_head; na_head = na_ptr; - // allocate an array of count num_recs to contain sizeof(struct pst_num_item) - na_ptr->items = (struct pst_num_item**) xmalloc(sizeof(struct pst_num_item)*num_list); + // allocate an array of count num_recs to contain sizeof(pst_num_item) + na_ptr->items = (pst_num_item**) xmalloc(sizeof(pst_num_item)*num_list); na_ptr->count_item = num_list; na_ptr->orig_count = num_list; na_ptr->count_array = (int32_t)num_recs; // each record will have a record of the total number of records @@ -1550,7 +1570,7 @@ fr_ptr += sizeof(table2_rec); } else { WARN(("Missing code for block_type %i\n", block_type)); - freeall(buf, &block_offset1, &block_offset2, &block_offset3, &block_offset4, &block_offset5, &block_offset6, &block_offset7); + freeall(&subblocks, &block_offset1, &block_offset2, &block_offset3, &block_offset4, &block_offset5, &block_offset6, &block_offset7); if (na_head) pst_free_list(na_head); DEBUG_RET(); return NULL; @@ -1559,9 +1579,9 @@ x, table_rec.type, table_rec.ref_type, table_rec.value)); if (!na_ptr->items[x]) { - na_ptr->items[x] = (struct pst_num_item*) xmalloc(sizeof(struct pst_num_item)); + na_ptr->items[x] = (pst_num_item*) xmalloc(sizeof(pst_num_item)); } - memset(na_ptr->items[x], 0, sizeof(struct pst_num_item)); //init it + memset(na_ptr->items[x], 0, sizeof(pst_num_item)); //init it // check here to see if the id of the attribute is a mapped one mapptr = pf->x_head; @@ -1635,7 +1655,7 @@ na_ptr->items[x]->data = xmalloc(value_size); memcpy(na_ptr->items[x]->data, value_pointer, value_size); } - else if (pst_getBlockOffsetPointer(pf, i2_head, buf, read_size, ind_ptr, table_rec.value, &block_offset7)) { + else if (pst_getBlockOffsetPointer(pf, i2_head, &subblocks, table_rec.value, &block_offset7)) { if ((table_rec.value & 0xf) == (uint32_t)0xf) { DEBUG_WARN(("failed to get block offset for table_rec.value of %#x to be read later.\n", table_rec.value)); na_ptr->items[x]->size = 0; @@ -1697,7 +1717,7 @@ if (na_ptr->items[x]->type == 0) na_ptr->items[x]->type = table_rec.ref_type; } else { WARN(("ERROR Unknown ref_type %#hx\n", table_rec.ref_type)); - freeall(buf, &block_offset1, &block_offset2, &block_offset3, &block_offset4, &block_offset5, &block_offset6, &block_offset7); + freeall(&subblocks, &block_offset1, &block_offset2, &block_offset3, &block_offset4, &block_offset5, &block_offset6, &block_offset7); if (na_head) pst_free_list(na_head); DEBUG_RET(); return NULL; @@ -1707,7 +1727,7 @@ DEBUG_EMAIL(("increasing ind2_ptr by %i [%#x] bytes. Was %#x, Now %#x\n", rec_size, rec_size, ind2_ptr, ind2_ptr+rec_size)); ind2_ptr += rec_size; } - freeall(buf, &block_offset1, &block_offset2, &block_offset3, &block_offset4, &block_offset5, &block_offset6, &block_offset7); + freeall(&subblocks, &block_offset1, &block_offset2, &block_offset3, &block_offset4, &block_offset5, &block_offset6, &block_offset7); DEBUG_RET(); return na_head; } @@ -1781,14 +1801,19 @@ switch (list->items[x]->id) { case PST_ATTRIB_HEADER: // CUSTOM attribute for saying the Extra Headers DEBUG_EMAIL(("Extra Field - ")); - ef = (pst_item_extra_field*) xmalloc(sizeof(pst_item_extra_field)); - memset(ef, 0, sizeof(pst_item_extra_field)); - ef->field_name = (char*) xmalloc(strlen(list->items[x]->extra)+1); - strcpy(ef->field_name, list->items[x]->extra); - LIST_COPY(ef->value, (char*)); - ef->next = item->extra_fields; - item->extra_fields = ef; - DEBUG_EMAIL(("\"%s\" = \"%s\"\n", ef->field_name, ef->value)); + if (list->items[x]->extra) { + ef = (pst_item_extra_field*) xmalloc(sizeof(pst_item_extra_field)); + memset(ef, 0, sizeof(pst_item_extra_field)); + ef->field_name = (char*) xmalloc(strlen(list->items[x]->extra)+1); + strcpy(ef->field_name, list->items[x]->extra); + LIST_COPY(ef->value, (char*)); + ef->next = item->extra_fields; + item->extra_fields = ef; + DEBUG_EMAIL(("\"%s\" = \"%s\"\n", ef->field_name, ef->value)); + } + else { + DEBUG_EMAIL(("NULL extra field\n")); + } break; case 0x0002: // PR_ALTERNATE_RECIPIENT_ALLOWED // If set to true, the sender allows this email to be autoforwarded @@ -3722,19 +3747,23 @@ /** * The offset might be zero, in which case we have no data, so return a pair of null pointers. * Or, the offset might end in 0xf, so it is an id2 pointer, in which case we read the id2 block. - * Otherwise, the offset>>4 is an index into the table of offsets in the buffer. + * Otherwise, the high order 16 bits of offset is the index into the subblocks, and + * the (low order 16 bits of offset)>>4 is an index into the table of offsets in the subblock. */ -int pst_getBlockOffsetPointer(pst_file *pf, pst_index2_ll *i2_head, unsigned char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset_pointer *p) { +int pst_getBlockOffsetPointer(pst_file *pf, pst_index2_ll *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p) { size_t size; pst_block_offset block_offset; DEBUG_ENT("pst_getBlockOffsetPointer"); if (p->needfree) free(p->from); - p->from = NULL; + p->from = NULL; + p->to = NULL; p->needfree = 0; if (!offset) { + // no data p->from = p->to = NULL; } else if ((offset & 0xf) == (uint32_t)0xf) { + // external index reference DEBUG_WARN(("Found id2 %#x value. Will follow it\n", offset)); size = pst_ff_getID2block(pf, offset, i2_head, &(p->from)); if (size) { @@ -3745,12 +3774,19 @@ p->from = p->to = NULL; } } - else if (pst_getBlockOffset(buf, read_size, i_offset, offset, &block_offset)) { - p->from = p->to = NULL; - } else { - p->from = buf + block_offset.from; - p->to = buf + block_offset.to; + // internal index reference + size_t subindex = offset >> 16; + size_t suboffset = offset & 0xffff; + if (subindex < subblocks->subblock_count) { + if (pst_getBlockOffset(subblocks->subs[subindex].buf, + subblocks->subs[subindex].read_size, + subblocks->subs[subindex].i_offset, + suboffset, &block_offset)) { + p->from = subblocks->subs[subindex].buf + block_offset.from; + p->to = subblocks->subs[subindex].buf + block_offset.to; + } + } } DEBUG_RET(); return (p->from) ? 0 : 1; @@ -3764,7 +3800,7 @@ if (!p || !buf || !i_offset || low || (i_offset+2+of1+sizeof(*p) > read_size)) { DEBUG_WARN(("p is NULL or buf is NULL or offset is 0 or offset has low bits or beyond read size (%p, %p, %#x, %i, %i)\n", p, buf, offset, read_size, i_offset)); DEBUG_RET(); - return -1; + return 0; } memcpy(&(p->from), &(buf[(i_offset+2)+of1]), sizeof(p->from)); memcpy(&(p->to), &(buf[(i_offset+2)+of1+sizeof(p->from)]), sizeof(p->to)); @@ -3773,10 +3809,10 @@ DEBUG_WARN(("get block offset finds from=%i(%#x), to=%i(%#x)\n", p->from, p->from, p->to, p->to)); if (p->from > p->to) { DEBUG_WARN(("get block offset from > to")); - return -1; + return 0; } DEBUG_RET(); - return 0; + return 1; } @@ -4138,8 +4174,7 @@ #define PST_PTR_BLOCK_SIZE 0x120 size_t pst_ff_getID2block(pst_file *pf, uint64_t id2, pst_index2_ll *id2_head, unsigned char** buf) { pst_index_ll* ptr; - // size_t ret; - struct holder h = {buf, NULL, 0, "", 0}; + pst_holder h = {buf, NULL, 0, "", 0}; DEBUG_ENT("pst_ff_getID2block"); ptr = pst_getID2(id2_head, id2); @@ -4153,7 +4188,7 @@ } -size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, struct holder *h) { +size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h) { size_t ret; unsigned char *b = NULL, *t; DEBUG_ENT("pst_ff_getID2data"); @@ -4187,7 +4222,7 @@ } -size_t pst_ff_compile_ID(pst_file *pf, uint64_t id, struct holder *h, size_t size) { +size_t pst_ff_compile_ID(pst_file *pf, uint64_t id, pst_holder *h, size_t size) { size_t z, a; uint16_t count, y; uint32_t x, b; diff -r f66078abed38 -r 17654fbdf76b src/libpst.h --- a/src/libpst.h Fri Jan 18 15:07:12 2008 -0800 +++ b/src/libpst.h Sat Jan 19 10:47:16 2008 -0800 @@ -111,383 +111,424 @@ #define PST_APP_LABEL_ANNIVERSARY 9 // Anniversary #define PST_APP_LABEL_PHONE_CALL 10// Phone Call + typedef struct pst_misc_6_struct { - int32_t i1; - int32_t i2; - int32_t i3; - int32_t i4; - int32_t i5; - int32_t i6; + int32_t i1; + int32_t i2; + int32_t i3; + int32_t i4; + int32_t i5; + int32_t i6; } pst_misc_6; + typedef struct pst_entryid_struct { - int32_t u1; - char entryid[16]; - uint32_t id; + int32_t u1; + char entryid[16]; + uint32_t id; } pst_entryid; + typedef struct pst_desc_struct32 { - uint32_t d_id; - uint32_t desc_id; - uint32_t list_id; - uint32_t parent_id; + uint32_t d_id; + uint32_t desc_id; + uint32_t list_id; + uint32_t parent_id; } pst_desc32; + typedef struct pst_desc_structn { - uint64_t d_id; - uint64_t desc_id; - uint64_t list_id; - uint32_t parent_id; // not 64 bit ?? - uint32_t u1; // padding + uint64_t d_id; + uint64_t desc_id; + uint64_t list_id; + uint32_t parent_id; // not 64 bit ?? + uint32_t u1; // padding } pst_descn; + typedef struct pst_index_struct32 { - uint32_t id; - uint32_t offset; - uint16_t size; - int16_t u1; + uint32_t id; + uint32_t offset; + uint16_t size; + int16_t u1; } pst_index32; + typedef struct pst_index_struct { - uint64_t id; - uint64_t offset; - uint16_t size; - int16_t u0; - int32_t u1; + uint64_t id; + uint64_t offset; + uint16_t size; + int16_t u0; + int32_t u1; } pst_index; + typedef struct pst_index_tree32 { - uint32_t id; - uint32_t offset; - uint32_t size; - int32_t u1; - struct pst_index_tree * next; + uint32_t id; + uint32_t offset; + uint32_t size; + int32_t u1; + struct pst_index_tree * next; } pst_index_ll32; + typedef struct pst_index_tree { - uint64_t id; - uint64_t offset; - uint64_t size; - int64_t u1; - struct pst_index_tree * next; + uint64_t id; + uint64_t offset; + uint64_t size; + int64_t u1; + struct pst_index_tree * next; } pst_index_ll; + typedef struct pst_index2_tree { - uint64_t id2; - pst_index_ll *id; - struct pst_index2_tree * next; + uint64_t id2; + pst_index_ll *id; + struct pst_index2_tree * next; } pst_index2_ll; + typedef struct pst_desc_tree { - uint64_t id; - pst_index_ll * list_index; - pst_index_ll * desc; - int32_t no_child; - struct pst_desc_tree * prev; - struct pst_desc_tree * next; - struct pst_desc_tree * parent; - struct pst_desc_tree * child; - struct pst_desc_tree * child_tail; + uint64_t id; + pst_index_ll * list_index; + pst_index_ll * desc; + int32_t no_child; + struct pst_desc_tree * prev; + struct pst_desc_tree * next; + struct pst_desc_tree * parent; + struct pst_desc_tree * child; + struct pst_desc_tree * child_tail; } pst_desc_ll; + typedef struct pst_item_email_subject { - int off1; - int off2; - char *subj; + int off1; + int off2; + char *subj; } pst_item_email_subject; + typedef struct pst_item_email { - FILETIME *arrival_date; - int autoforward; // 1 = true, 0 = not set, -1 = false - char *body; - char *cc_address; - char *common_name; - int32_t conv_index; - int conversion_prohib; // 1 = true, 0 = false - int delete_after_submit; // 1 = true, 0 = false - int delivery_report; // 1 = true, 0 = false - char *encrypted_body; - int32_t encrypted_body_size; - char *encrypted_htmlbody; - int32_t encrypted_htmlbody_size; - int32_t flag; - char *header; - char *htmlbody; - int32_t importance; - char *in_reply_to; - int message_cc_me; // 1 = true, 0 = false - int message_recip_me; // 1 = true, 0 = false - int message_to_me; // 1 = true, 0 = false - char *messageid; - int32_t orig_sensitivity; - char *outlook_recipient; - char *outlook_recipient2; - char *outlook_sender; - char *outlook_sender_name; - char *outlook_sender2; - int32_t priority; - char *proc_subject; - int read_receipt; // 1 = true, 0 = false - char *recip_access; - char *recip_address; - char *recip2_access; - char *recip2_address; - int reply_requested; // 1 = true, 0 = false - char *reply_to; - char *return_path_address; - int32_t rtf_body_char_count; - int32_t rtf_body_crc; - char *rtf_body_tag; - char *rtf_compressed; - uint32_t rtf_compressed_size; - int rtf_in_sync; // 1 = true, 0 = doesn't exist, -1 = false - int32_t rtf_ws_prefix_count; - int32_t rtf_ws_trailing_count; - char *sender_access; - char *sender_address; - char *sender2_access; - char *sender2_address; - int32_t sensitivity; - FILETIME *sent_date; - pst_entryid *sentmail_folder; - char *sentto_address; - pst_item_email_subject *subject; + FILETIME *arrival_date; + int autoforward; // 1 = true, 0 = not set, -1 = false + char *body; + char *cc_address; + char *common_name; + int32_t conv_index; + int conversion_prohib; // 1 = true, 0 = false + int delete_after_submit; // 1 = true, 0 = false + int delivery_report; // 1 = true, 0 = false + char *encrypted_body; + int32_t encrypted_body_size; + char *encrypted_htmlbody; + int32_t encrypted_htmlbody_size; + int32_t flag; + char *header; + char *htmlbody; + int32_t importance; + char *in_reply_to; + int message_cc_me; // 1 = true, 0 = false + int message_recip_me; // 1 = true, 0 = false + int message_to_me; // 1 = true, 0 = false + char *messageid; + int32_t orig_sensitivity; + char *outlook_recipient; + char *outlook_recipient2; + char *outlook_sender; + char *outlook_sender_name; + char *outlook_sender2; + int32_t priority; + char *proc_subject; + int read_receipt; // 1 = true, 0 = false + char *recip_access; + char *recip_address; + char *recip2_access; + char *recip2_address; + int reply_requested; // 1 = true, 0 = false + char *reply_to; + char *return_path_address; + int32_t rtf_body_char_count; + int32_t rtf_body_crc; + char *rtf_body_tag; + char *rtf_compressed; + uint32_t rtf_compressed_size; + int rtf_in_sync; // 1 = true, 0 = doesn't exist, -1 = false + int32_t rtf_ws_prefix_count; + int32_t rtf_ws_trailing_count; + char *sender_access; + char *sender_address; + char *sender2_access; + char *sender2_address; + int32_t sensitivity; + FILETIME *sent_date; + pst_entryid *sentmail_folder; + char *sentto_address; + pst_item_email_subject *subject; } pst_item_email; + typedef struct pst_item_folder { - int32_t email_count; - int32_t unseen_email_count; - int32_t assoc_count; - int subfolder; // 1 = true, 0 = false + int32_t email_count; + int32_t unseen_email_count; + int32_t assoc_count; + int subfolder; // 1 = true, 0 = false } pst_item_folder; + typedef struct pst_item_message_store { - pst_entryid *deleted_items_folder; - pst_entryid *search_root_folder; - pst_entryid *top_of_personal_folder; - pst_entryid *top_of_folder; - int32_t valid_mask; // what folders the message store contains - int32_t pwd_chksum; + pst_entryid *deleted_items_folder; + pst_entryid *search_root_folder; + pst_entryid *top_of_personal_folder; + pst_entryid *top_of_folder; + int32_t valid_mask; // what folders the message store contains + int32_t pwd_chksum; } pst_item_message_store; + typedef struct pst_item_contact { - char *access_method; - char *account_name; - char *address1; - char *address1a; - char *address1_desc; - char *address1_transport; - char *address2; - char *address2a; - char *address2_desc; - char *address2_transport; - char *address3; - char *address3a; - char *address3_desc; - char *address3_transport; - char *assistant_name; - char *assistant_phone; - char *billing_information; - FILETIME *birthday; - char *business_address; - char *business_city; - char *business_country; - char *business_fax; - char *business_homepage; - char *business_phone; - char *business_phone2; - char *business_po_box; - char *business_postal_code; - char *business_state; - char *business_street; - char *callback_phone; - char *car_phone; - char *company_main_phone; - char *company_name; - char *computer_name; - char *customer_id; - char *def_postal_address; - char *department; - char *display_name_prefix; - char *first_name; - char *followup; - char *free_busy_address; - char *ftp_site; - char *fullname; - int32_t gender; - char *gov_id; - char *hobbies; - char *home_address; - char *home_city; - char *home_country; - char *home_fax; - char *home_phone; - char *home_phone2; - char *home_po_box; - char *home_postal_code; - char *home_state; - char *home_street; - char *initials; - char *isdn_phone; - char *job_title; - char *keyword; - char *language; - char *location; - int mail_permission; // 1 = true, 0 = false - char *manager_name; - char *middle_name; - char *mileage; - char *mobile_phone; - char *nickname; - char *office_loc; - char *org_id; - char *other_address; - char *other_city; - char *other_country; - char *other_phone; - char *other_po_box; - char *other_postal_code; - char *other_state; - char *other_street; - char *pager_phone; - char *personal_homepage; - char *pref_name; - char *primary_fax; - char *primary_phone; - char *profession; - char *radio_phone; - int rich_text; // 1 = true, 0 = false - char *spouse_name; - char *suffix; - char *surname; - char *telex; - char *transmittable_display_name; - char *ttytdd_phone; - FILETIME *wedding_anniversary; + char *access_method; + char *account_name; + char *address1; + char *address1a; + char *address1_desc; + char *address1_transport; + char *address2; + char *address2a; + char *address2_desc; + char *address2_transport; + char *address3; + char *address3a; + char *address3_desc; + char *address3_transport; + char *assistant_name; + char *assistant_phone; + char *billing_information; + FILETIME *birthday; + char *business_address; + char *business_city; + char *business_country; + char *business_fax; + char *business_homepage; + char *business_phone; + char *business_phone2; + char *business_po_box; + char *business_postal_code; + char *business_state; + char *business_street; + char *callback_phone; + char *car_phone; + char *company_main_phone; + char *company_name; + char *computer_name; + char *customer_id; + char *def_postal_address; + char *department; + char *display_name_prefix; + char *first_name; + char *followup; + char *free_busy_address; + char *ftp_site; + char *fullname; + int32_t gender; + char *gov_id; + char *hobbies; + char *home_address; + char *home_city; + char *home_country; + char *home_fax; + char *home_phone; + char *home_phone2; + char *home_po_box; + char *home_postal_code; + char *home_state; + char *home_street; + char *initials; + char *isdn_phone; + char *job_title; + char *keyword; + char *language; + char *location; + int mail_permission; // 1 = true, 0 = false + char *manager_name; + char *middle_name; + char *mileage; + char *mobile_phone; + char *nickname; + char *office_loc; + char *org_id; + char *other_address; + char *other_city; + char *other_country; + char *other_phone; + char *other_po_box; + char *other_postal_code; + char *other_state; + char *other_street; + char *pager_phone; + char *personal_homepage; + char *pref_name; + char *primary_fax; + char *primary_phone; + char *profession; + char *radio_phone; + int rich_text; // 1 = true, 0 = false + char *spouse_name; + char *suffix; + char *surname; + char *telex; + char *transmittable_display_name; + char *ttytdd_phone; + FILETIME *wedding_anniversary; } pst_item_contact; + typedef struct pst_item_attach { - char *filename1; - char *filename2; - char *mimetype; - char *data; - size_t size; - uint64_t id2_val; - uint64_t id_val; // calculated from id2_val during creation of record - int32_t method; - int32_t position; - int32_t sequence; - struct pst_item_attach *next; + char *filename1; + char *filename2; + char *mimetype; + char *data; + size_t size; + uint64_t id2_val; + uint64_t id_val; // calculated from id2_val during creation of record + int32_t method; + int32_t position; + int32_t sequence; + struct pst_item_attach *next; } pst_item_attach; + typedef struct pst_item_extra_field { - char *field_name; - char *value; - struct pst_item_extra_field *next; + char *field_name; + char *value; + struct pst_item_extra_field *next; } pst_item_extra_field; + typedef struct pst_item_journal { - FILETIME *end; - FILETIME *start; - char *type; + FILETIME *end; + FILETIME *start; + char *type; } pst_item_journal; + typedef struct pst_item_appointment { - FILETIME *end; - char *location; - FILETIME *reminder; - FILETIME *start; - char *timezonestring; - int32_t showas; - int32_t label; - int all_day; // 1 = true, 0 = false + FILETIME *end; + char *location; + FILETIME *reminder; + FILETIME *start; + char *timezonestring; + int32_t showas; + int32_t label; + int all_day; // 1 = true, 0 = false } pst_item_appointment; + typedef struct pst_item { - struct pst_item_email *email; // data reffering to email - struct pst_item_folder *folder; // data reffering to folder - struct pst_item_contact *contact; // data reffering to contact - struct pst_item_attach *attach; // linked list of attachments - struct pst_item_message_store *message_store; // data referring to the message store - struct pst_item_extra_field *extra_fields; // linked list of extra headers and such - struct pst_item_journal *journal; // data reffering to a journal entry - struct pst_item_appointment *appointment; // data reffering to a calendar entry - int type; - char *ascii_type; - char *file_as; - char *comment; - int32_t message_size; - char *outlook_version; - char *record_key; // probably 16 bytes long. - size_t record_key_size; - int response_requested; // 1 = true, 0 = false - FILETIME *create_date; - FILETIME *modify_date; - int private_member; // 1 = true, 0 = false + struct pst_item_email *email; // data reffering to email + struct pst_item_folder *folder; // data reffering to folder + struct pst_item_contact *contact; // data reffering to contact + struct pst_item_attach *attach; // linked list of attachments + struct pst_item_message_store *message_store; // data referring to the message store + struct pst_item_extra_field *extra_fields; // linked list of extra headers and such + struct pst_item_journal *journal; // data reffering to a journal entry + struct pst_item_appointment *appointment; // data reffering to a calendar entry + int type; + char *ascii_type; + char *file_as; + char *comment; + int32_t message_size; + char *outlook_version; + char *record_key; // probably 16 bytes long. + size_t record_key_size; + int response_requested; // 1 = true, 0 = false + FILETIME *create_date; + FILETIME *modify_date; + int private_member; // 1 = true, 0 = false } pst_item; + typedef struct pst_x_attrib_ll { - uint32_t type; - uint32_t mytype; - uint32_t map; - void *data; - struct pst_x_attrib_ll *next; + uint32_t type; + uint32_t mytype; + uint32_t map; + void *data; + struct pst_x_attrib_ll *next; } pst_x_attrib_ll; + typedef struct pst_file { - pst_index_ll *i_head, *i_tail; - pst_index2_ll *i2_head; - pst_desc_ll *d_head, *d_tail; - pst_x_attrib_ll *x_head; + pst_index_ll *i_head, *i_tail; + pst_index2_ll *i2_head; + pst_desc_ll *d_head, *d_tail; + pst_x_attrib_ll *x_head; - //set this to 0 to read 32-bit pst files (pre Outlook 2003) - //set this to 1 to read 64-bit pst files (Outlook 2003 and later) - int do_read64; + //set this to 0 to read 32-bit pst files (pre Outlook 2003) + //set this to 1 to read 64-bit pst files (Outlook 2003 and later) + int do_read64; - uint64_t index1; - uint64_t index1_back; - uint64_t index2; - uint64_t index2_back; - FILE * fp; // file pointer to opened PST file - uint64_t size; // pst file size - unsigned char encryption; // pst encryption setting - unsigned char ind_type; // pst index type + uint64_t index1; + uint64_t index1_back; + uint64_t index2; + uint64_t index2_back; + FILE * fp; // file pointer to opened PST file + uint64_t size; // pst file size + unsigned char encryption; // pst encryption setting + unsigned char ind_type; // pst index type } pst_file; + typedef struct pst_block_offset { - int16_t from; - int16_t to; + int16_t from; + int16_t to; } pst_block_offset; + typedef struct pst_block_offset_pointer { - unsigned char *from; - unsigned char *to; - int needfree; + unsigned char *from; + unsigned char *to; + int needfree; } pst_block_offset_pointer; -struct pst_num_item { - uint32_t id; - unsigned char *data; - uint32_t type; - size_t size; - char *extra; -}; + +typedef struct pst_num_item { + uint32_t id; // not an id1 or id2, this is actually some sort of type code + unsigned char *data; + uint32_t type; + size_t size; + char *extra; +} pst_num_item; + typedef struct pst_num_array { - int32_t count_item; - int32_t orig_count; - int32_t count_array; - struct pst_num_item ** items; - struct pst_num_array *next; + int32_t count_item; + int32_t orig_count; + int32_t count_array; + struct pst_num_item ** items; + struct pst_num_array *next; } pst_num_array; -struct holder { - unsigned char **buf; - FILE * fp; - int base64; - char base64_extra_chars[3]; - uint32_t base64_extra; -}; + +typedef struct pst_holder { + unsigned char **buf; + FILE * fp; + int base64; + char base64_extra_chars[3]; + uint32_t base64_extra; +} pst_holder; + + +typedef struct pst_subblock { + unsigned char *buf; + size_t read_size; + size_t i_offset; +} pst_subblock; + + +typedef struct pst_subblocks { + size_t subblock_count; + pst_subblock *subs; +} pst_subblocks; + // prototypes int pst_open(pst_file *pf, char *name, char *mode); @@ -512,7 +553,7 @@ void pst_free_id (pst_index_ll *head); void pst_free_desc (pst_desc_ll *head); void pst_free_xattrib(pst_x_attrib_ll *x); -int pst_getBlockOffsetPointer(pst_file *pf, pst_index2_ll *i2_head, unsigned char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset_pointer *p); +int pst_getBlockOffsetPointer(pst_file *pf, pst_index2_ll *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p); int pst_getBlockOffset(unsigned char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p); pst_index2_ll* pst_build_id2(pst_file *pf, pst_index_ll* list, pst_index2_ll* head_ptr); pst_index_ll* pst_getID(pst_file* pf, uint64_t id); @@ -527,8 +568,8 @@ size_t pst_ff_getIDblock_dec(pst_file *pf, uint64_t id, unsigned char **b); size_t pst_ff_getIDblock(pst_file *pf, uint64_t id, unsigned char** b); size_t pst_ff_getID2block(pst_file *pf, uint64_t id2, pst_index2_ll *id2_head, unsigned char** buf); -size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, struct holder *h); -size_t pst_ff_compile_ID(pst_file *pf, uint64_t id, struct holder *h, size_t size); +size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h); +size_t pst_ff_compile_ID(pst_file *pf, uint64_t id, pst_holder *h, size_t size); int pst_strincmp(char *a, char *b, size_t x); int pst_stricmp(char *a, char *b); diff -r f66078abed38 -r 17654fbdf76b xml/libpst.in --- a/xml/libpst.in Fri Jan 18 15:07:12 2008 -0800 +++ b/xml/libpst.in Sat Jan 19 10:47:16 2008 -0800 @@ -1367,16 +1367,26 @@ Note the b5offset of 0x0020, which is a type that I will call an index - reference. Such index references have at least two different forms, and - may point to data either in this block, or in some other block. + reference. Such index references have at least two different forms, + and may point to data either in this block, or in some other block. External pointer references have the low order 4 bits all set, and are ID2 values that can be used to fetch data. This value of 0x0020 is an - internal pointer reference, which needs to be right shifted by 4 bits to - become 0x0002, which is then a byte offset to be added to the above + internal pointer reference, which needs to be right shifted by 4 bits + to become 0x0002, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0xc, 0x14) pair. + So far we have only described internal index references where the high + order 16 bits are zero. That suffices for single descriptor + blocks. But in the case of the type 0x0101 descriptor block, we have + an array of subblocks. In this case, the high order 16 bits of an + internal index reference are used to select the subblock. Each + subblock starts with a 16 bit indexOffset which points to the count + and array of 16 bit integer pairs which are offsets in the current + subblock. + + Finally, we have the offset and size of the "b5" block located at offset 0xc with a size of 8 bytes in this descriptor block. The "b5" block has the following format: @@ -1433,10 +1443,6 @@ The following item types are known, but not all of these are implemented in the code yet. - Note: it appears that some types can have a IPOS value or a ID2 value - depending on the size of the field in question. It is safer to check - every field than for me to say what the "usually" contain. Absolute - values though, are generally going to be constant. 32 bit Associated Descriptor Item 0x0101 - This descriptor block contains a list of ID1 values. It is used when an - ID1 (that would normally point to a type 0x7cec or 0xbcec descriptor block) - contains more data than can fit in any single descriptor of those types. - In this case, it points to a type 0x0101 block, which contains a list - of ID1 values that themselves point to the actual descriptor blocks. - The descriptor blocks that are pointed to by this list of ID1 values - are almost but not quite the same as the standard 0xbcec blocks. Decoding - these blocks is still incomplete. + This descriptor block contains a list of ID1 values. It is used when + an ID1 (that would normally point to a type 0x7cec or 0xbcec + descriptor block) contains more data than can fit in any single + descriptor of those types. In this case, it points to a type 0x0101 + block, which contains a list of ID1 values that themselves point to + the actual descriptor blocks. The total length value in the 0x0101 + header is the sum of the lengths of the blocks pointed to by the list + of ID1 values. The result is an array of subblocks, that may contain + index references where the high order 16 bits specify which descriptor + subblock to use. Only the first descriptor subblock contains the + signature (0xbcec or 0x7cec). @@ -1810,10 +1819,10 @@ 0000 signature [2 bytes] 0x0101 constant 0002 count [2 bytes] 0x0002 in this case -0004 unknown [4 bytes] 0x0029ea in this case +0004 total length [4 bytes] 0x0029ea in this case repeating -0008 id [8 bytes] 0x008310 in this case -0010 id [8 bytes] 0x00831c in this case +0008 id1 [8 bytes] 0x008310 in this case +0010 id1 [8 bytes] 0x00831c in this case ]]>