# HG changeset patch # User Carl Byington # Date 1236270212 28800 # Node ID 06aa84023b48e6298d09d1d5c1336b53ff7cea61 # Parent f9773b6368e04f6b950457a582414dbc7eb6b92e rename some structure fields to reflect our better understanding of the pst format diff -r f9773b6368e0 -r 06aa84023b48 ChangeLog --- a/ChangeLog Sat Feb 28 11:55:48 2009 -0800 +++ b/ChangeLog Thu Mar 05 08:23:32 2009 -0800 @@ -2,6 +2,8 @@ =============================== * improve documentation of .pst format. * remove decrypt option from getidblock - we always decrypt. + * rename some structure fields to reflect our better understanding + of the pst format. LibPST 0.6.29 (2009-02-24) =============================== diff -r f9773b6368e0 -r 06aa84023b48 regression/regression-tests.bash --- a/regression/regression-tests.bash Sat Feb 28 11:55:48 2009 -0800 +++ b/regression/regression-tests.bash Thu Mar 05 08:23:32 2009 -0800 @@ -13,6 +13,21 @@ } +function doldif() +{ + n="$1" + fn="$2" + echo $fn + ba=$(basename "$fn" .pst) + rm -rf output$n + mkdir output$n + #$val ../src/pst2ldif -d dumper -b 'o=ams-cc.com, c=US' -c 'newPerson' -o $fn >$ba.ldif.err 2>&1 + $val ../src/pst2ldif -d dumper -b 'o=ams-cc.com, c=US' -c 'inetOrgPerson' $fn >$ba.ldif.err 2>&1 + ../src/readpstlog -f I dumper >$ba.ldif.log + rm -f dumper +} + + function dopst() { n="$1" @@ -26,10 +41,6 @@ $val ../src/readpst -cv -o output$n -d dumper $fn >$ba.err 2>&1 ../src/readpstlog -f I dumper >$ba.log - #$val ../src/pst2ldif -d dumper -b 'o=ams-cc.com, c=US' -c 'newPerson' -o $fn >$ba.ldif.err 2>&1 - #$val ../src/pst2ldif -d dumper -b 'o=ams-cc.com, c=US' -c 'inetOrgPerson' $fn >$ba.ldif.err 2>&1 - # ../src/readpstlog -f I dumper >$ba.ldif.log - #../src/getidblock -d -p $fn 0 >$ba.fulldump #../src/readpstlog -f I getidblock.log >$ba.fulldump.log @@ -39,7 +50,7 @@ val="valgrind --leak-check=full" -#val='' +val='' pushd .. make || exit @@ -50,25 +61,46 @@ dodii 2 sample_64.pst dodii 3 test.pst dodii 4 big_mail.pst +elif [ "$1" == "ldif" ]; then + #doldif 1 ams.pst + #doldif 2 sample_64.pst + #doldif 3 test.pst + #doldif 4 big_mail.pst + #doldif 5 mbmg.archive.pst + #doldif 6 Single2003-read.pst + #doldif 7 Single2003-unread.pst + #doldif 8 ol2k3high.pst + #doldif 9 ol97high.pst + #doldif 10 returned_message.pst + #doldif 11 flow.pst + #doldif 12 test-html.pst + #doldif 13 test-text.pst + #doldif 14 joe.romanowski.pst + #doldif 15 hourig1.pst + #doldif 16 hourig2.pst + #doldif 17 hourig3.pst + #doldif 18 test-mac.pst + doldif 19 harris.pst else - dopst 1 ams.pst - dopst 2 sample_64.pst - dopst 3 test.pst - dopst 4 big_mail.pst - dopst 5 mbmg.archive.pst - dopst 6 Single2003-read.pst - dopst 7 Single2003-unread.pst - dopst 8 ol2k3high.pst - dopst 9 ol97high.pst - dopst 10 returned_message.pst - dopst 11 flow.pst - dopst 12 test-html.pst - dopst 13 test-text.pst - dopst 14 joe.romanowski.pst - dopst 15 hourig1.pst - #dopst 16 hourig2.pst - #dopst 17 hourig3.pst - dopst 18 test-mac.pst + dopst 1 ams.pst + dopst 2 sample_64.pst + dopst 3 test.pst + dopst 4 big_mail.pst + dopst 5 mbmg.archive.pst + dopst 6 Single2003-read.pst + dopst 7 Single2003-unread.pst + dopst 8 ol2k3high.pst + dopst 9 ol97high.pst + dopst 10 returned_message.pst + dopst 11 flow.pst + dopst 12 test-html.pst + dopst 13 test-text.pst + dopst 14 joe.romanowski.pst + dopst 15 hourig1.pst + #dopst 16 hourig2.pst + #dopst 17 hourig3.pst + dopst 18 test-mac.pst + dopst 19 harris.pst fi grep 'lost:' *err | grep -v 'lost: 0 ' diff -r f9773b6368e0 -r 06aa84023b48 src/Makefile.am --- a/src/Makefile.am Sat Feb 28 11:55:48 2009 -0800 +++ b/src/Makefile.am Thu Mar 05 08:23:32 2009 -0800 @@ -76,7 +76,7 @@ libstrfunc.h\ timeconv.h \ vbuf.h - libpst_la_LDFLAGS = $(NO_UNDEFINED) -version-info 1:1:0 + libpst_la_LDFLAGS = $(NO_UNDEFINED) -version-info 1:2:0 endif libpst_la_SOURCES = $(common_source) $(common_header) diff -r f9773b6368e0 -r 06aa84023b48 src/getidblock.c --- a/src/getidblock.c Sat Feb 28 11:55:48 2009 -0800 +++ b/src/getidblock.c Thu Mar 05 08:23:32 2009 -0800 @@ -45,7 +45,7 @@ DEBUG_MAIN(("Parsing block id %#"PRIx64"\n", id)); ptr = pstfile.d_head; while (ptr) { - if (ptr->list_index && ptr->list_index->id == id) + if (ptr->assoc_tree && ptr->assoc_tree->id == id) break; if (ptr->desc && ptr->desc->id == id) break; @@ -53,8 +53,8 @@ } if (!ptr) { ptr = (pst_desc_ll *) xmalloc(sizeof(pst_desc_ll)); + memset(ptr, 0, sizeof(pst_desc_ll)); ptr->desc = pst_getID(&pstfile, id); - ptr->list_index = NULL; } pst_item *item = pst_parse_item(&pstfile, ptr, NULL); if (item) pst_freeItem(item); @@ -66,9 +66,9 @@ void dump_desc(pst_desc_ll *ptr) { while (ptr) { - DEBUG_MAIN(("\n\n\nLooking at block desc id %#"PRIx64"\n", ptr->id)); + DEBUG_MAIN(("\n\n\nLooking at block desc id %#"PRIx64"\n", ptr->d_id)); if (ptr->desc && ptr->desc->id) dumper(ptr->desc->id); - if (ptr->list_index && ptr->list_index->id) dumper(ptr->list_index->id); + if (ptr->assoc_tree && ptr->assoc_tree->id) dumper(ptr->assoc_tree->id); if (ptr->child) dump_desc(ptr->child); ptr = ptr->next; } diff -r f9773b6368e0 -r 06aa84023b48 src/libpst.c --- a/src/libpst.c Sat Feb 28 11:55:48 2009 -0800 +++ b/src/libpst.c Thu Mar 05 08:23:32 2009 -0800 @@ -73,16 +73,16 @@ typedef struct pst_id2_assoc32 { uint32_t id2; uint32_t id; - uint32_t table2; + uint32_t child_id; } pst_id2_assoc32; typedef struct pst_id2_assoc { - uint32_t id2; // only 32 bit here? + uint32_t id2; // only 32 bit here uint16_t unknown1; uint16_t unknown2; uint64_t id; - uint64_t table2; + uint64_t child_id; } pst_id2_assoc; @@ -103,9 +103,10 @@ } pst_block_hdr; -// for "compressible" encryption, just a simple substitution cipher -// this is an array of the un-encrypted values. the un-encrypted value is in the position -// of the encrypted value. ie the encrypted value 0x13 represents 0x02 +/** for "compressible" encryption, just a simple substitution cipher, + * plaintext = comp_enc[ciphertext]; + * for "strong" encryption, this is the first rotor of an Enigma 3 rotor cipher. + */ static unsigned char comp_enc [] = { 0x47, 0xf1, 0xb4, 0xe6, 0x0b, 0x6a, 0x72, 0x48, 0x85, 0x4e, 0x9e, 0xeb, 0xe2, 0xf8, 0x94, 0x53, 0xe0, 0xbb, 0xa0, 0x02, 0xe8, 0x5a, 0x09, 0xab, 0xdb, 0xe3, 0xba, 0xc6, 0x7c, 0xc3, 0x10, 0xdd, @@ -125,9 +126,8 @@ 0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2, 0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec }; -// for "strong" encryption, we have the two additional tables, -// which (with the previous table) are used as the keys in an -// Enigma 3 rotor cipher +/** for "strong" encryption, this is the second rotor of an Enigma 3 rotor cipher. + */ static unsigned char comp_high1 [] = { 0x41, 0x36, 0x13, 0x62, 0xa8, 0x21, 0x6e, 0xbb, 0xf4, 0x16, 0xcc, 0x04, 0x7f, 0x64, 0xe8, 0x5d, 0x1e, 0xf2, 0xcb, 0x2a, 0x74, 0xc5, 0x5e, 0x35, 0xd2, 0x95, 0x47, 0x9e, 0x96, 0x2d, 0x9a, 0x88, @@ -147,6 +147,8 @@ 0xa2, 0x01, 0xf7, 0x2e, 0xbc, 0x24, 0x68, 0x75, 0x0d, 0xfe, 0xba, 0x2f, 0xb5, 0xd0, 0xda, 0x3d }; +/** for "strong" encryption, this is the third rotor of an Enigma 3 rotor cipher. + */ static unsigned char comp_high2 [] = { 0x14, 0x53, 0x0f, 0x56, 0xb3, 0xc8, 0x7a, 0x9c, 0xeb, 0x65, 0x48, 0x17, 0x16, 0x15, 0x9f, 0x02, 0xcc, 0x54, 0x7c, 0x83, 0x00, 0x0d, 0x0c, 0x0b, 0xa2, 0x62, 0xa8, 0x76, 0xdb, 0xd9, 0xed, 0xc7, @@ -166,6 +168,7 @@ 0x61, 0xe0, 0xc6, 0xc1, 0x59, 0xab, 0xbb, 0x58, 0xde, 0x5f, 0xdf, 0x60, 0x79, 0x7e, 0xb2, 0x8a }; + int pst_open(pst_file *pf, char *name) { int32_t sig; @@ -304,9 +307,9 @@ // find any orphan children of this node, and collect them pst_desc_ll *n = pf->d_head; while (n) { - if (n->parent_id == node->id) { + if (n->parent_d_id == node->d_id) { // found a child of this node - DEBUG_INDEX(("Found orphan child %#"PRIx64" of parent %#"PRIx64"\n", n->id, node->id)); + DEBUG_INDEX(("Found orphan child %#"PRIx64" of parent %#"PRIx64"\n", n->d_id, node->d_id)); pst_desc_ll *nn = n->next; pst_desc_ll *pp = n->prev; node->no_child++; @@ -322,18 +325,18 @@ } // now hook this node into the global tree - if (node->parent_id == 0) { + if (node->parent_d_id == 0) { // add top level node to the descriptor tree //DEBUG_INDEX(("Null parent\n")); add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); } - else if (node->parent_id == node->id) { + else if (node->parent_d_id == node->d_id) { // add top level node to the descriptor tree DEBUG_INDEX(("%#"PRIx64" is its own parent. What is this world coming to?\n")); add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); } else { //DEBUG_INDEX(("Searching for parent %#"PRIx64" of %#"PRIx64"\n", node->parent_id, node->id)); - pst_desc_ll *parent = pst_getDptr(pf, node->parent_id); + pst_desc_ll *parent = pst_getDptr(pf, node->parent_d_id); if (parent) { //DEBUG_INDEX(("Found parent %#"PRIx64"\n", node->parent_id)); parent->no_child++; @@ -341,7 +344,7 @@ add_descriptor_to_list(node, &parent->child, &parent->child_tail); } else { - DEBUG_INDEX(("No parent %#"PRIx64", have an orphan child %#"PRIx64"\n", node->parent_id, node->id)); + DEBUG_INDEX(("No parent %#"PRIx64", have an orphan child %#"PRIx64"\n", node->parent_d_id, node->d_id)); add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); } } @@ -356,11 +359,11 @@ * @param head pointer to the subtree to be copied * @return pointer to the new copy of the subtree */ -static pst_index2_ll* deep_copy(pst_index2_ll *head); -static pst_index2_ll* deep_copy(pst_index2_ll *head) +static pst_id2_ll* deep_copy(pst_id2_ll *head); +static pst_id2_ll* deep_copy(pst_id2_ll *head) { if (!head) return NULL; - pst_index2_ll* me = (pst_index2_ll*) xmalloc(sizeof(pst_index2_ll)); + pst_id2_ll* me = (pst_id2_ll*) xmalloc(sizeof(pst_id2_ll)); me->id2 = head->id2; me->id = head->id; me->child = deep_copy(head->child); @@ -389,11 +392,11 @@ topnode = pst_getDptr(pf, (uint64_t)topid); if (!topnode) { // add dummy top record to pickup orphan children - topnode = (pst_desc_ll*) xmalloc(sizeof(pst_desc_ll)); - topnode->id = topid; - topnode->parent_id = 0; - topnode->list_index = NULL; - topnode->desc = NULL; + topnode = (pst_desc_ll*) xmalloc(sizeof(pst_desc_ll)); + topnode->d_id = topid; + topnode->parent_d_id = 0; + topnode->assoc_tree = NULL; + topnode->desc = NULL; record_descriptor(pf, topnode); // add to the global tree } DEBUG_RET(); @@ -519,7 +522,7 @@ // for PST files this will load up ID2 0x61 and check it's "list" attribute. pst_desc_ll *p; pst_num_array *na; - pst_index2_ll *id2_head = NULL; + pst_id2_ll *id2_head = NULL; char *buffer=NULL, *headerbuffer=NULL; size_t bsize=0, hsize=0, bptr=0; pst_x_attrib xattrib; @@ -540,8 +543,8 @@ return 0; } - if (p->list_index) { - id2_head = pst_build_id2(pf, p->list_index); + if (p->assoc_tree) { + id2_head = pst_build_id2(pf, p->assoc_tree); pst_printID2ptr(id2_head); } else { DEBUG_WARN(("Have not been able to fetch any id2 values for item 0x61. Brace yourself!\n")); @@ -696,13 +699,13 @@ memcpy(&d32, buf, sizeof(pst_desc32)); LE32_CPU(d32.d_id); LE32_CPU(d32.desc_id); - LE32_CPU(d32.list_id); - LE32_CPU(d32.parent_id); - desc->d_id = d32.d_id; - desc->desc_id = d32.desc_id; - desc->list_id = d32.list_id; - desc->parent_id = d32.parent_id; - desc->u1 = 0; + LE32_CPU(d32.tree_id); + LE32_CPU(d32.parent_d_id); + desc->d_id = d32.d_id; + desc->desc_id = d32.desc_id; + desc->tree_id = d32.tree_id; + desc->parent_d_id = d32.parent_d_id; + desc->u1 = 0; r = sizeof(pst_desc32); } return r; @@ -780,7 +783,7 @@ memcpy(assoc, buf, sizeof(pst_id2_assoc)); LE32_CPU(assoc->id2); LE64_CPU(assoc->id); - LE64_CPU(assoc->table2); + LE64_CPU(assoc->child_id); r = sizeof(pst_id2_assoc); } else { pst_id2_assoc32 assoc32; @@ -790,9 +793,9 @@ LE32_CPU(assoc32.id2); LE32_CPU(assoc32.id); LE32_CPU(assoc32.table2); - assoc->id2 = assoc32.id2; - assoc->id = assoc32.id; - assoc->table2 = assoc32.table2; + assoc->id2 = assoc32.id2; + assoc->id = assoc32.id; + assoc->child_id = assoc32.child_id; r = sizeof(pst_id2_assoc32); } return r; @@ -821,6 +824,11 @@ } +/** Process the index1 b-tree from the pst file and create the + * pf->i_head linked list from it. This tree holds the location + * (offset and size) of lower level objects (0xbcec descriptor + * blocks, etc) in the pst file. + */ int pst_build_id_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val) { struct pst_table_ptr_structn table, table2; pst_index_ll *i_ptr=NULL; @@ -934,6 +942,10 @@ } +/** Process the index2 b-tree from the pst file and create the + * pf->d_head tree from it. This tree holds descriptions of the + * higher level objects (email, contact, etc) in the pst file. + */ int pst_build_desc_ptr (pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val) { struct pst_table_ptr_structn table, table2; pst_descn desc_rec; @@ -977,8 +989,8 @@ } for (x=0; x= end_val) || (desc_rec.d_id < old)) { DEBUG_WARN(("This item isn't right. Must be corruption, or I got it wrong!\n")); DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, 16); @@ -995,12 +1007,12 @@ return -1; } } - DEBUG_INDEX(("New Record %#"PRIx64" with parent %#x\n", desc_rec.d_id, desc_rec.parent_id)); + DEBUG_INDEX(("New Record %#"PRIx64" with parent %#x\n", desc_rec.d_id, desc_rec.parent_d_id)); { pst_desc_ll *d_ptr = (pst_desc_ll*) xmalloc(sizeof(pst_desc_ll)); - d_ptr->id = desc_rec.d_id; - d_ptr->parent_id = desc_rec.parent_id; - d_ptr->list_index = pst_getID(pf, desc_rec.list_id); + d_ptr->d_id = desc_rec.d_id; + d_ptr->parent_d_id = desc_rec.parent_d_id; + d_ptr->assoc_tree = pst_getID(pf, desc_rec.tree_id); d_ptr->desc = pst_getID(pf, desc_rec.desc_id); record_descriptor(pf, d_ptr); // add to the global tree } @@ -1049,10 +1061,12 @@ } -pst_item* pst_parse_item(pst_file *pf, pst_desc_ll *d_ptr, pst_index2_ll *m_head) { +/** Process a high level object from the pst file. + */ +pst_item* pst_parse_item(pst_file *pf, pst_desc_ll *d_ptr, pst_id2_ll *m_head) { pst_num_array * list; - pst_index2_ll *id2_head = m_head; - pst_index2_ll *id2_ptr = NULL; + pst_id2_ll *id2_head = m_head; + pst_id2_ll *id2_ptr = NULL; pst_item *item = NULL; pst_item_attach *attach = NULL; int32_t x; @@ -1069,12 +1083,12 @@ return NULL; } - if (d_ptr->list_index) { + if (d_ptr->assoc_tree) { if (m_head) { DEBUG_WARN(("supplied master head, but have a list that is building a new id2_head")); m_head = NULL; } - id2_head = pst_build_id2(pf, d_ptr->list_index); + id2_head = pst_build_id2(pf, d_ptr->assoc_tree); } pst_printID2ptr(id2_head); @@ -1239,7 +1253,12 @@ } -pst_num_array * pst_parse_block(pst_file *pf, uint64_t block_id, pst_index2_ll *i2_head, pst_num_array *na_head) { +/** Process a low level descriptor block (0x0101, 0xbcec, 0x7cec) into a + * list of objects, each of which contains a list of MAPI elements. + * + * @return list of objects + */ +pst_num_array * pst_parse_block(pst_file *pf, uint64_t block_id, pst_id2_ll *i2_head, pst_num_array *na_head) { char *buf = NULL; size_t read_size = 0; pst_subblocks subblocks; @@ -1738,16 +1757,16 @@ /** - * process the list of items produced from parse_block() + * process the list of objects produced from parse_block() * - * @param list pointer to the linked list of things from parse_block() - * @param item pointer to the item to be updated from the list. + * @param list pointer to the list of objects from parse_block() + * @param item pointer to the high level item to be updated from the list. * this item may be an email, contact or other sort of item. - * the type of this item is generally set by the things + * the type of this item is generally set by the MAPI elements * from the list. - * @param attach pointer to the linked list of attachment records. If + * @param attach pointer to the list of attachment records. If * this is non-null, the length of the this attachment list - * must be at least as large as the length of the list. + * must be at least as large as the length of the objects list. * * @return 0 for ok, -1 for error. */ @@ -1788,9 +1807,8 @@ *pp = '\0'; char *set = strdup(p); *pp = '"'; - MALLOC_EMAIL(item); - if (item->email->body_charset) free(item->email->body_charset); - item->email->body_charset = set; + if (item->body_charset) free(item->body_charset); + item->body_charset = set; DEBUG_EMAIL(("body charset %s from content-type extra field\n", set)); } } @@ -3111,17 +3129,15 @@ DEBUG_EMAIL(("%s\n", item->contact->other_po_box)); break; case 0x3FDE: // PR_INTERNET_CPID - MALLOC_EMAIL(item); - memcpy(&(item->email->internet_cpid), list->items[x]->data, sizeof(item->email->internet_cpid)); - LE32_CPU(item->email->internet_cpid); - t = item->email->internet_cpid; + memcpy(&(item->internet_cpid), list->items[x]->data, sizeof(item->internet_cpid)); + LE32_CPU(item->internet_cpid); + t = item->internet_cpid; DEBUG_EMAIL(("Internet code page %i\n", (int)t)); break; case 0x3FFD: // PR_MESSAGE_CODEPAGE - MALLOC_EMAIL(item); - memcpy(&(item->email->message_codepage), list->items[x]->data, sizeof(item->email->message_codepage)); - LE32_CPU(item->email->message_codepage); - t = item->email->message_codepage; + memcpy(&(item->message_codepage), list->items[x]->data, sizeof(item->message_codepage)); + LE32_CPU(item->message_codepage); + t = item->message_codepage; DEBUG_EMAIL(("Message code page %i\n", (int)t)); break; case 0x65E3: // Entry ID? @@ -3663,8 +3679,8 @@ } -void pst_free_id2(pst_index2_ll * head) { - pst_index2_ll *t; +void pst_free_id2(pst_id2_ll * head) { + pst_id2_ll *t; DEBUG_ENT("pst_free_id2"); while (head) { if (head->child) pst_free_id2(head->child); @@ -3725,15 +3741,15 @@ } -pst_index2_ll * pst_build_id2(pst_file *pf, pst_index_ll* list) { +pst_id2_ll * pst_build_id2(pst_file *pf, pst_index_ll* list) { pst_block_header block_head; - pst_index2_ll *head = NULL, *tail = NULL; + pst_id2_ll *head = NULL, *tail = NULL; uint16_t x = 0; char *b_ptr = NULL; char *buf = NULL; pst_id2_assoc id2_rec; pst_index_ll *i_ptr = NULL; - pst_index2_ll *i2_ptr = NULL; + pst_id2_ll *i2_ptr = NULL; DEBUG_ENT("pst_build_id2"); if (pst_read_block_size(pf, list->offset, list->size, &buf) < list->size) { @@ -3762,14 +3778,14 @@ b_ptr = buf + ((pf->do_read64) ? 0x08 : 0x04); while (x < block_head.count) { b_ptr += pst_decode_assoc(pf, &id2_rec, b_ptr); - DEBUG_INDEX(("id2 = %#x, id = %#"PRIx64", table2 = %#"PRIx64"\n", id2_rec.id2, id2_rec.id, id2_rec.table2)); + DEBUG_INDEX(("id2 = %#x, id = %#"PRIx64", child id = %#"PRIx64"\n", id2_rec.id2, id2_rec.id, id2_rec.child_id)); if ((i_ptr = pst_getID(pf, id2_rec.id)) == NULL) { DEBUG_WARN(("%#"PRIx64" - Not Found\n", id2_rec.id)); } else { DEBUG_INDEX(("%#"PRIx64" - Offset %#"PRIx64", u1 %#"PRIx64", Size %"PRIi64"(%#"PRIx64")\n", i_ptr->id, i_ptr->offset, i_ptr->u1, i_ptr->size, i_ptr->size)); // add it to the tree - i2_ptr = (pst_index2_ll*) xmalloc(sizeof(pst_index2_ll)); + i2_ptr = (pst_id2_ll*) xmalloc(sizeof(pst_id2_ll)); i2_ptr->id2 = id2_rec.id2; i2_ptr->id = i_ptr; i2_ptr->child = NULL; @@ -3777,12 +3793,11 @@ if (!head) head = i2_ptr; if (tail) tail->next = i2_ptr; tail = i2_ptr; - if (id2_rec.table2) { - if ((i_ptr = pst_getID(pf, id2_rec.table2)) == NULL) { - DEBUG_WARN(("Table2 [%#"PRIi64"] not found\n", id2_rec.table2)); + if (id2_rec.child_id) { + if ((i_ptr = pst_getID(pf, id2_rec.child_id)) == NULL) { + DEBUG_WARN(("child id [%#"PRIi64"] not found\n", id2_rec.child_id)); } else { - DEBUG_INDEX(("Going deeper for table2 [%#"PRIi64"]\n", id2_rec.table2)); i2_ptr->child = pst_build_id2(pf, i_ptr); } } @@ -3818,7 +3833,6 @@ if (item->email) { SAFE_FREE(item->email->arrival_date); SAFE_FREE(item->email->body); - SAFE_FREE(item->email->body_charset); SAFE_FREE(item->email->cc_address); SAFE_FREE(item->email->bcc_address); SAFE_FREE(item->email->common_name); @@ -4003,6 +4017,7 @@ free(item->appointment); } SAFE_FREE(item->ascii_type); + SAFE_FREE(item->body_charset); SAFE_FREE(item->comment); SAFE_FREE(item->create_date); SAFE_FREE(item->file_as); @@ -4021,7 +4036,7 @@ * Otherwise, the high order 16 bits of offset is the index into the subblocks, and * the (low order 16 bits of offset)>>4 is an index into the table of offsets in the subblock. */ -int pst_getBlockOffsetPointer(pst_file *pf, pst_index2_ll *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p) { +int pst_getBlockOffsetPointer(pst_file *pf, pst_id2_ll *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p) { size_t size; pst_block_offset block_offset; DEBUG_ENT("pst_getBlockOffsetPointer"); @@ -4116,14 +4131,14 @@ } -pst_index2_ll *pst_getID2(pst_index2_ll *head, uint64_t id2) { +pst_id2_ll *pst_getID2(pst_id2_ll *head, uint64_t id2) { DEBUG_ENT("pst_getID2"); DEBUG_INDEX(("looking for id2 = %#"PRIx64"\n", id2)); - pst_index2_ll *ptr = head; + pst_id2_ll *ptr = head; while (ptr) { if (ptr->id2 == id2) break; if (ptr->child) { - pst_index2_ll *rc = pst_getID2(ptr->child, id2); + pst_id2_ll *rc = pst_getID2(ptr->child, id2); if (rc) { DEBUG_RET(); return rc; @@ -4146,15 +4161,15 @@ * find the id in the descriptor tree rooted at pf->d_head * * @param pf global pst file pointer - * @param id the id we are looking for + * @param d_id the id we are looking for * * @return pointer to the pst_desc_ll node in the descriptor tree */ -pst_desc_ll* pst_getDptr(pst_file *pf, uint64_t id) { +pst_desc_ll* pst_getDptr(pst_file *pf, uint64_t d_id) { pst_desc_ll *ptr = pf->d_head; DEBUG_ENT("pst_getDptr"); - while (ptr && (ptr->id != id)) { - //DEBUG_INDEX(("Looking for %#"PRIx64" at node %#"PRIx64" with parent %#"PRIx64"\n", id, ptr->id, ptr->parent_id)); + while (ptr && (ptr->d_id != d_id)) { + //DEBUG_INDEX(("Looking for %#"PRIx64" at node %#"PRIx64" with parent %#"PRIx64"\n", id, ptr->d_id, ptr->parent_d_id)); if (ptr->child) { ptr = ptr->child; continue; @@ -4172,9 +4187,9 @@ void pst_printDptr(pst_file *pf, pst_desc_ll *ptr) { DEBUG_ENT("pst_printDptr"); while (ptr) { - DEBUG_INDEX(("%#"PRIx64" [%i] desc=%#"PRIx64", list=%#"PRIx64"\n", ptr->id, ptr->no_child, - (ptr->desc ? ptr->desc->id : (uint64_t)0), - (ptr->list_index ? ptr->list_index->id : (uint64_t)0))); + DEBUG_INDEX(("%#"PRIx64" [%i] desc=%#"PRIx64", assoc tree=%#"PRIx64"\n", ptr->d_id, ptr->no_child, + (ptr->desc ? ptr->desc->id : (uint64_t)0), + (ptr->assoc_tree ? ptr->assoc_tree->id : (uint64_t)0))); if (ptr->child) { pst_printDptr(pf, ptr->child); } @@ -4195,7 +4210,7 @@ } -void pst_printID2ptr(pst_index2_ll *ptr) { +void pst_printID2ptr(pst_id2_ll *ptr) { DEBUG_ENT("pst_printID2ptr"); while (ptr) { DEBUG_INDEX(("%#"PRIx64" id=%#"PRIx64"\n", ptr->id2, (ptr->id ? ptr->id->id : (uint64_t)0))); @@ -4417,9 +4432,9 @@ #define PST_PTR_BLOCK_SIZE 0x120 -size_t pst_ff_getID2block(pst_file *pf, uint64_t id2, pst_index2_ll *id2_head, char** buf) { +size_t pst_ff_getID2block(pst_file *pf, uint64_t id2, pst_id2_ll *id2_head, char** buf) { size_t ret; - pst_index2_ll* ptr; + pst_id2_ll* ptr; pst_holder h = {buf, NULL, 0}; DEBUG_ENT("pst_ff_getID2block"); ptr = pst_getID2(id2_head, id2); diff -r f9773b6368e0 -r 06aa84023b48 src/libpst.h --- a/src/libpst.h Sat Feb 28 11:55:48 2009 -0800 +++ b/src/libpst.h Thu Mar 05 08:23:32 2009 -0800 @@ -95,17 +95,17 @@ typedef struct pst_desc_struct32 { uint32_t d_id; uint32_t desc_id; - uint32_t list_id; - uint32_t parent_id; + uint32_t tree_id; + uint32_t parent_d_id; } pst_desc32; typedef struct pst_desc_structn { uint64_t d_id; uint64_t desc_id; - uint64_t list_id; - uint32_t parent_id; // not 64 bit ?? - uint32_t u1; // padding + uint64_t tree_id; + uint32_t parent_d_id; // not 64 bit ?? + uint32_t u1; // padding } pst_descn; @@ -144,19 +144,19 @@ } pst_index_ll; -typedef struct pst_index2_tree { +typedef struct pst_id2_tree { uint64_t id2; pst_index_ll *id; - struct pst_index2_tree *child; - struct pst_index2_tree *next; -} pst_index2_ll; + struct pst_id2_tree *child; + struct pst_id2_tree *next; +} pst_id2_ll; typedef struct pst_desc_tree { - uint64_t id; - uint64_t parent_id; - pst_index_ll *list_index; + uint64_t d_id; + uint64_t parent_d_id; pst_index_ll *desc; + pst_index_ll *assoc_tree; int32_t no_child; struct pst_desc_tree *prev; struct pst_desc_tree *next; @@ -178,7 +178,6 @@ int autoforward; // 1 = true, 0 = not set, -1 = false char *body; int32_t body_was_unicode; // 1 = true, 0 = false - char *body_charset; // null if not specified char *cc_address; char *bcc_address; char *common_name; @@ -233,8 +232,6 @@ char *sender2_access; char *sender2_address; int32_t sensitivity; - int32_t internet_cpid; - int32_t message_codepage; FILETIME *sent_date; pst_entryid *sentmail_folder; char *sentto_address; @@ -383,7 +380,7 @@ size_t size; uint64_t id2_val; uint64_t id_val; // calculated from id2_val during creation of record - pst_index2_ll *id2_head; // deep copy from child + pst_id2_ll *id2_head; // deep copy from child int32_t method; int32_t position; int32_t sequence; @@ -425,26 +422,29 @@ typedef struct pst_item { - struct pst_item_email *email; // data reffering to email - struct pst_item_folder *folder; // data reffering to folder - struct pst_item_contact *contact; // data reffering to contact + struct pst_item_email *email; // data referring to email + struct pst_item_folder *folder; // data referring to folder + struct pst_item_contact *contact; // data referring to contact struct pst_item_attach *attach; // linked list of attachments struct pst_item_message_store *message_store; // data referring to the message store struct pst_item_extra_field *extra_fields; // linked list of extra headers and such - struct pst_item_journal *journal; // data reffering to a journal entry - struct pst_item_appointment *appointment; // data reffering to a calendar entry - int type; - char *ascii_type; - char *file_as; - char *comment; - int32_t message_size; - char *outlook_version; - char *record_key; // probably 16 bytes long. - size_t record_key_size; - int response_requested; // 1 = true, 0 = false - FILETIME *create_date; - FILETIME *modify_date; - int private_member; // 1 = true, 0 = false + struct pst_item_journal *journal; // data referring to a journal entry + struct pst_item_appointment *appointment; // data referring to a calendar entry + int type; + char *ascii_type; + char *file_as; + char *comment; + char *body_charset; // null if not specified + int32_t internet_cpid; + int32_t message_codepage; + int32_t message_size; + char *outlook_version; + char *record_key; // probably 16 bytes long. + size_t record_key_size; + int response_requested; // 1 = true, 0 = false + FILETIME *create_date; + FILETIME *modify_date; + int private_member; // 1 = true, 0 = false } pst_item; @@ -549,20 +549,20 @@ int pst_build_id_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val); int pst_build_desc_ptr(pst_file *pf, int64_t offset, int32_t depth, uint64_t linku1, uint64_t start_val, uint64_t end_val); pst_item* pst_getItem(pst_file *pf, pst_desc_ll *d_ptr); -pst_item* pst_parse_item (pst_file *pf, pst_desc_ll *d_ptr, pst_index2_ll *m_head); -pst_num_array* pst_parse_block(pst_file *pf, uint64_t block_id, pst_index2_ll *i2_head, pst_num_array *na_head); +pst_item* pst_parse_item (pst_file *pf, pst_desc_ll *d_ptr, pst_id2_ll *m_head); +pst_num_array* pst_parse_block(pst_file *pf, uint64_t block_id, pst_id2_ll *i2_head, pst_num_array *na_head); int pst_process(pst_num_array *list, pst_item *item, pst_item_attach *attach); void pst_free_list(pst_num_array *list); void pst_freeItem(pst_item *item); -void pst_free_id2(pst_index2_ll * head); +void pst_free_id2(pst_id2_ll * head); void pst_free_id (pst_index_ll *head); void pst_free_desc (pst_desc_ll *head); void pst_free_xattrib(pst_x_attrib_ll *x); -int pst_getBlockOffsetPointer(pst_file *pf, pst_index2_ll *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p); +int pst_getBlockOffsetPointer(pst_file *pf, pst_id2_ll *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p); int pst_getBlockOffset(char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p); -pst_index2_ll* pst_build_id2(pst_file *pf, pst_index_ll* list); +pst_id2_ll* pst_build_id2(pst_file *pf, pst_index_ll* list); pst_index_ll* pst_getID(pst_file* pf, uint64_t id); -pst_index2_ll* pst_getID2(pst_index2_ll * ptr, uint64_t id); +pst_id2_ll* pst_getID2(pst_id2_ll * ptr, uint64_t id); pst_desc_ll* pst_getDptr(pst_file *pf, uint64_t id); size_t pst_read_block_size(pst_file *pf, int64_t offset, size_t size, char **buf); int pst_decrypt(uint64_t id, char *buf, size_t size, unsigned char type); @@ -571,7 +571,7 @@ size_t pst_getAtPos(pst_file *pf, int64_t pos, void* buf, size_t size); size_t pst_ff_getIDblock_dec(pst_file *pf, uint64_t id, char **b); size_t pst_ff_getIDblock(pst_file *pf, uint64_t id, char** b); -size_t pst_ff_getID2block(pst_file *pf, uint64_t id2, pst_index2_ll *id2_head, char** buf); +size_t pst_ff_getID2block(pst_file *pf, uint64_t id2, pst_id2_ll *id2_head, char** buf); size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h); size_t pst_ff_compile_ID(pst_file *pf, uint64_t id, pst_holder *h, size_t size); @@ -587,7 +587,7 @@ void pst_printDptr(pst_file *pf, pst_desc_ll *ptr); void pst_printIDptr(pst_file* pf); -void pst_printID2ptr(pst_index2_ll *ptr); +void pst_printID2ptr(pst_id2_ll *ptr); // switch from maximal packing back to default packing diff -r f9773b6368e0 -r 06aa84023b48 src/lspst.c --- a/src/lspst.c Sat Feb 28 11:55:48 2009 -0800 +++ b/src/lspst.c Thu Mar 05 08:23:32 2009 -0800 @@ -53,13 +53,12 @@ create_enter_dir(&ff, outeritem); while (d_ptr) { - DEBUG_MAIN(("main: New item record, d_ptr = %p.\n", d_ptr)); if (!d_ptr->desc) { - DEBUG_WARN(("main: ERROR ?? item's desc record is NULL\n")); + DEBUG_WARN(("main: ERROR item's desc record is NULL\n")); ff.skip_count++; } else { - DEBUG_MAIN(("main: Desc Email ID %x [d_ptr->id = %x]\n", d_ptr->desc->id, d_ptr->id)); + DEBUG_MAIN(("main: Desc Email ID %"PRIx64" [d_ptr->d_id = %"PRIx64"]\n", d_ptr->desc->id, d_ptr->d_id)); item = pst_parse_item(&pstfile, d_ptr, NULL); DEBUG_MAIN(("main: About to process item @ %p.\n", item)); diff -r f9773b6368e0 -r 06aa84023b48 src/readpst.c --- a/src/readpst.c Sat Feb 28 11:55:48 2009 -0800 +++ b/src/readpst.c Thu Mar 05 08:23:32 2009 -0800 @@ -132,11 +132,11 @@ while (d_ptr) { DEBUG_MAIN(("main: New item record\n")); if (!d_ptr->desc) { - DEBUG_WARN(("main: ERROR ?? item's desc record is NULL\n")); + DEBUG_WARN(("main: ERROR item's desc record is NULL\n")); ff.skip_count++; } else { - DEBUG_MAIN(("main: Desc Email ID %#"PRIx64" [d_ptr->id = %#"PRIx64"]\n", d_ptr->desc->id, d_ptr->id)); + DEBUG_MAIN(("main: Desc Email ID %#"PRIx64" [d_ptr->d_id = %#"PRIx64"]\n", d_ptr->desc->id, d_ptr->d_id)); item = pst_parse_item(&pstfile, d_ptr, NULL); DEBUG_MAIN(("main: About to process item\n")); @@ -748,16 +748,16 @@ ptr = pst_getID(pf, attach->id_val); pst_desc_ll d_ptr; - d_ptr.id = ptr->id; - d_ptr.parent_id = 0; - d_ptr.list_index = NULL; - d_ptr.desc = ptr; - d_ptr.no_child = 0; - d_ptr.prev = NULL; - d_ptr.next = NULL; - d_ptr.parent = NULL; - d_ptr.child = NULL; - d_ptr.child_tail = NULL; + d_ptr.d_id = 0; + d_ptr.parent_d_id = 0; + d_ptr.assoc_tree = NULL; + d_ptr.desc = ptr; + d_ptr.no_child = 0; + d_ptr.prev = NULL; + d_ptr.next = NULL; + d_ptr.parent = NULL; + d_ptr.child = NULL; + d_ptr.child_tail = NULL; pst_item *item = pst_parse_item(pf, &d_ptr, attach->id2_head); write_normal_email(f_output, "", item, MODE_NORMAL, 0, pf, 0, extra_mime_headers); @@ -1082,9 +1082,9 @@ // setup default body character set and report type snprintf(body_charset, sizeof(body_charset), "%s", - (item->email->body_charset) ? item->email->body_charset : - (item->email->message_codepage) ? codepage(item->email->message_codepage) : - (item->email->internet_cpid) ? codepage(item->email->internet_cpid) : + (item->body_charset) ? item->body_charset : + (item->message_codepage) ? codepage(item->message_codepage) : + (item->internet_cpid) ? codepage(item->internet_cpid) : "utf-8"); body_report[0] = '\0'; diff -r f9773b6368e0 -r 06aa84023b48 xml/libpst.in --- a/xml/libpst.in Sat Feb 28 11:55:48 2009 -0800 +++ b/xml/libpst.in Thu Mar 05 08:23:32 2009 -0800 @@ -1479,10 +1479,12 @@ that pointed to this node. - Each item in this node is a tuple of (D_ID, DESC-I_ID, TREE-I_ID, PARENT-D_ID). - The DESC-I_ID points to the main data for this item via the index1 tree. - The TREE-I_ID is zero or points to Associated Tree Item 0x0002 via the index1 tree. - The PARENT-D_ID points to the parent of this item in this index2 tree. + Each item in this node is a tuple of (D_ID, DESC-I_ID, TREE-I_ID, + PARENT-D_ID) The DESC-I_ID points to the main data for this item + (Associated Descriptor Items 0x7cec, 0xbcec, or 0x0101) via the index1 + tree. The TREE-I_ID is zero or points to an Associated Tree Item + 0x0002 via the index1 tree. The PARENT-D_ID points to the parent of + this item in this index2 tree. @@ -1539,10 +1541,12 @@ that pointed to this node. - Each item in this node is a tuple of (D_ID, DESC-I_ID, TREE-I_ID, PARENT-D_ID) - The DESC-I_ID points to the main data for this item via the index1 tree. - The TREE-I_ID is zero or points to Associated Tree Item 0x0002 via the index1 tree. - The PARENT-D_ID points to the parent of this item in this index2 tree. + Each item in this node is a tuple of (D_ID, DESC-I_ID, TREE-I_ID, + PARENT-D_ID) The DESC-I_ID points to the main data for this item + (Associated Descriptor Items 0x7cec, 0xbcec, or 0x0101) via the index1 + tree. The TREE-I_ID is zero or points to an Associated Tree Item + 0x0002 via the index1 tree. The PARENT-D_ID points to the parent of + this item in this index2 tree. @@ -1607,6 +1611,7 @@ 0x00e638, 0, 0) 0x00e638 is the I_ID of the associated descriptor, and we can lookup that I_ID value in the index1 b-tree to find the (offset,size) of the data in the .pst file. + This descriptor is eventually decoded to a list of MAPI elements. - Note the descoffset of 0x0040, which again is an index reference. In this - case, it is an internal pointer reference, which needs to be right shifted by 4 bits - to become 0x0004, which is then a byte offset to be added to the above - indexOffset plus two (to skip the count), so it points to the (0x14, 0x7c) - pair. The datasize (6) plus the b5 code (02) gives the size of the entries, - in this case 8 bytes. We now have the offset 0x14 of the descriptor array, - composed of 8 byte entries. Each descriptor entry has the following format: + Note the descoffset of 0x0040, which again is an index reference. In + this case, it is an internal pointer reference, which needs to be + right shifted by 4 bits to become 0x0004, which is then a byte offset + to be added to the above indexOffset plus two (to skip the count), so + it points to the (0x14, 0x7c) pair. The datasize (6) plus the b5 code + (02) gives the size of the entries, in this case 8 bytes. We now have + the offset 0x14 of the descriptor array, composed of 8 byte entries + that describe MAPI elements. Each descriptor entry has the following + format: @@ -1972,6 +1988,7 @@ Associated Descriptor Item 0x7cec This style of descriptor block is similar to the 0xbcec format. + This descriptor is also eventually decoded to a list of MAPI elements.