# HG changeset patch # User carl # Date 1199837966 28800 # Node ID d4606d460dafa6b6bcbe2717434c153b02166660 # Parent f6db1f060a955bfc5200b9b22d6cf271d5e00c12 more fixes for 64 bit format diff -r f6db1f060a95 -r d4606d460daf regression/regression-tests.bash --- a/regression/regression-tests.bash Sun Jan 06 14:47:06 2008 -0800 +++ b/regression/regression-tests.bash Tue Jan 08 16:19:26 2008 -0800 @@ -7,6 +7,7 @@ mkdir output$i done + $val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' ams.pst >ams.err 2>&1 $val ../src/readpst -cv -o output1 ams.pst >out1.err 2>&1 $val ../src/readpst -cl -r -o output2 ams.pst >out2.err 2>&1 @@ -14,9 +15,8 @@ $val ../src/readpst -M -o output4 ams.pst >out4.err 2>&1 $val ../src/readpst -o output5 mbmg.archive.pst >out5.err 2>&1 $val ../src/readpst -o output6 test.pst >out6.err 2>&1 - -$val ../src/readpst -o output7 -d dumper ams.pst >out7.err 2>&1 - ../src/readpstlog -f I dumper >dumperams.log +$val ../src/readpst -cv -o output7 -d dumper sample_64.pst >out7.err 2>&1 + ../src/readpstlog -f I dumper >sample_64.log $val ../src/lspst ams.pst >out8.err 2>&1 ../src/readpstlog -f I lspst.debug >lspst.log diff -r f6db1f060a95 -r d4606d460daf src/libpst.c --- a/src/libpst.c Sun Jan 06 14:47:06 2008 -0800 +++ b/src/libpst.c Tue Jan 08 16:19:26 2008 -0800 @@ -29,8 +29,8 @@ #include "libpst.h" #include "timeconv.h" -#define INDEX_DEPTH 0x4C -#define SECOND_DEPTH 0x5C +//efine INDEX_DEPTH 0x4C +//efine SECOND_DEPTH 0x5C #define INDEX_TYPE32 0x0E #define INDEX_TYPE64 0x17 @@ -124,7 +124,7 @@ 0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec}; /*0xff*/ -void dump_desc(off_t off, int depth, int i, pst_descn *desc_rec ) { // {{{ +void dump_desc(off_t off, int depth, int i, pst_descn *desc_rec ) { //desc_rec->d_id = 0x0102030405060708; DEBUG_INDEX(("%08x [%i] Item(%#x) = [d_id = %#llx, desc_id = %#llx, " "list_id = %#llx, parent_id = %#x, u1 = %#x] %#x %p %p\n", @@ -192,6 +192,9 @@ DEBUG_RET(); return -1; } + else { + WARN(("switching to 64 bit format...\n")); + } } // read encryption setting @@ -511,22 +514,41 @@ return 1; } - -#define BLOCK_SIZE 516 // index blocks -#define DESC_BLOCK_SIZE 516 // descriptor blocks -#define ITEM_COUNT_OFFSET 0x1f0 // count byte -#define LEVEL_INDICATOR_OFFSET 0x1f3 // node or leaf -#define BACKLINK_OFFSET 0x1f8 // backlink u1 value -#define ITEM_SIZE 12 -#define DESC_SIZE 16 -#define INDEX_COUNT_MAX 41 // max active items -#define DESC_COUNT_MAX 31 // max active items +#define BLOCK_SIZE32 516 // index blocks +#define DESC_BLOCK_SIZE32 516 // descriptor blocks +#define ITEM_COUNT_OFFSET32 0x1f0 // count byte +#define LEVEL_INDICATOR_OFFSET32 0x1f3 // node or leaf +#define BACKLINK_OFFSET32 0x1f8 // backlink u1 value +#define ITEM_SIZE32 12 +#define DESC_SIZE32 16 +#define INDEX_COUNT_MAX32 41 // max active items +#define DESC_COUNT_MAX32 31 // max active items + +#define BLOCK_SIZE64 512 // index blocks +#define DESC_BLOCK_SIZE64 512 // descriptor blocks +#define ITEM_COUNT_OFFSET64 0x1e8 // count byte +#define LEVEL_INDICATOR_OFFSET64 0x1eb // node or leaf +#define BACKLINK_OFFSET64 0x1f8 // backlink u1 value +#define ITEM_SIZE64 24 +#define DESC_SIZE64 32 +#define INDEX_COUNT_MAX64 20 // max active items +#define DESC_COUNT_MAX64 15 // max active items + +#define BLOCK_SIZE ((do_read64) ? BLOCK_SIZE64 : BLOCK_SIZE32) +#define DESC_BLOCK_SIZE ((do_read64) ? DESC_BLOCK_SIZE64 : DESC_BLOCK_SIZE32) +#define ITEM_COUNT_OFFSET ((do_read64) ? ITEM_COUNT_OFFSET64 : ITEM_COUNT_OFFSET32) +#define LEVEL_INDICATOR_OFFSET ((do_read64) ? LEVEL_INDICATOR_OFFSET64 : LEVEL_INDICATOR_OFFSET32) +#define BACKLINK_OFFSET ((do_read64) ? BACKLINK_OFFSET64 : BACKLINK_OFFSET32) +#define ITEM_SIZE ((do_read64) ? ITEM_SIZE64 : ITEM_SIZE32) +#define DESC_SIZE ((do_read64) ? DESC_SIZE64 : DESC_SIZE32) +#define INDEX_COUNT_MAX ((do_read64) ? INDEX_COUNT_MAX64 : INDEX_COUNT_MAX32) +#define DESC_COUNT_MAX ((do_read64) ? DESC_COUNT_MAX64 : DESC_COUNT_MAX32) int _pst_decode_desc( pst_descn *desc, char *buf ) { int r; if (do_read64) { - DEBUG_INDEX(("Decoding desc64 ")); + DEBUG_INDEX(("Decoding desc64\n")); DEBUG_HEXDUMPC(buf, sizeof(pst_descn), 0x10); memcpy(desc, buf, sizeof(pst_descn)); LE64_CPU(desc->d_id); @@ -538,7 +560,7 @@ } else { pst_desc32 d32; - DEBUG_INDEX(("Decoding desc32 ")); + DEBUG_INDEX(("Decoding desc32\n")); DEBUG_HEXDUMPC(buf, sizeof(pst_desc32), 0x10); memcpy(&d32, buf, sizeof(pst_desc32)); LE32_CPU(d32.d_id); @@ -559,7 +581,7 @@ int _pst_decode_table( struct _pst_table_ptr_structn *table, char *buf ) { int r; if (do_read64) { - DEBUG_INDEX(("Decoding table64")); + DEBUG_INDEX(("Decoding table64\n")); DEBUG_HEXDUMPC(buf, sizeof(struct _pst_table_ptr_structn), 0x10); memcpy(table, buf, sizeof(struct _pst_table_ptr_structn)); LE64_CPU(table->start); @@ -569,7 +591,7 @@ } else { struct _pst_table_ptr_struct32 t32; - DEBUG_INDEX(("Decoding table32")); + DEBUG_INDEX(("Decoding table32\n")); DEBUG_HEXDUMPC(buf, sizeof( struct _pst_table_ptr_struct32), 0x10); memcpy(&t32, buf, sizeof(struct _pst_table_ptr_struct32)); LE32_CPU(t32.start); @@ -587,7 +609,7 @@ int _pst_decode_index( pst_index *index, char *buf ) { int r; if (do_read64) { - DEBUG_INDEX(("Decoding index64")); + DEBUG_INDEX(("Decoding index64\n")); DEBUG_HEXDUMPC(buf, sizeof(pst_index), 0x10); memcpy(index, buf, sizeof(pst_index)); LE64_CPU(index->id); @@ -598,7 +620,7 @@ r = sizeof(pst_index); } else { pst_index32 index32; - DEBUG_INDEX(("Decoding index32")); + DEBUG_INDEX(("Decoding index32\n")); DEBUG_HEXDUMPC(buf, sizeof(pst_index32), 0x10); memcpy(&index32, buf, sizeof(pst_index32)); LE32_CPU(index32->id); @@ -638,7 +660,7 @@ return -1; } bptr = buf; - DEBUG_HEXDUMPC(buf, BLOCK_SIZE, ITEM_SIZE); + DEBUG_HEXDUMPC(buf, BLOCK_SIZE, ITEM_SIZE32); item_count = (int)(unsigned)(buf[ITEM_COUNT_OFFSET]); if (item_count > INDEX_COUNT_MAX) { DEBUG_WARN(("Item count %i too large, max is %i\n", item_count, INDEX_COUNT_MAX)); @@ -737,7 +759,7 @@ struct cache_list_node { pst_desc_ll *ptr; /** only used for lost and found lists */ - uint32_t parent; + uint64_t parent; struct cache_list_node *next; struct cache_list_node *prev; }; @@ -750,7 +772,7 @@ /** add the d_ptr descriptor into the global tree */ -void record_descriptor(pst_file *pf, pst_desc_ll *d_ptr, uint32_t parent_id) { +void record_descriptor(pst_file *pf, pst_desc_ll *d_ptr, uint64_t parent_id) { struct cache_list_node *lostfound_ptr = NULL; struct cache_list_node *cache_ptr = NULL; pst_desc_ll *parent = NULL; @@ -873,7 +895,7 @@ } if (buf[LEVEL_INDICATOR_OFFSET] == '\0') { // this node contains leaf pointers - DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, 16); + DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, DESC_SIZE32); if (item_count > DESC_COUNT_MAX) { DEBUG_WARN(("Item count %i too large, max is %i\n", item_count, DESC_COUNT_MAX)); if (buf) free(buf); @@ -987,7 +1009,7 @@ } } else { // this node contains node pointers - DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, ITEM_SIZE); + DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, ITEM_SIZE32); if (item_count > INDEX_COUNT_MAX) { DEBUG_WARN(("Item count %i too large, max is %i\n", item_count, INDEX_COUNT_MAX)); if (buf) free(buf); @@ -996,7 +1018,7 @@ } x = 0; while (x < item_count) { - bptr+=_pst_decode_table(&table, bptr); + bptr += _pst_decode_table(&table, bptr); x++; if (table.start == 0) break; if (x < item_count) { @@ -1046,7 +1068,7 @@ } -void* _pst_parse_item(pst_file *pf, pst_desc_ll *d_ptr) { +pst_item* _pst_parse_item(pst_file *pf, pst_desc_ll *d_ptr) { pst_num_array * list; pst_index2_ll *id2_head = NULL; pst_index_ll *id_ptr = NULL; @@ -1591,14 +1613,14 @@ //need UTF-16 zero-termination vbset(strbuf, na_ptr->items[x]->data, na_ptr->items[x]->size); vbappend(strbuf, "\0\0", 2); - DEBUG_INDEX(("Iconv in: ")); + DEBUG_INDEX(("Iconv in:\n")); DEBUG_HEXDUMPC(strbuf->b, strbuf->dlen, 0x10); vb_utf16to8(unibuf, strbuf->b, strbuf->dlen); free(na_ptr->items[x]->data); na_ptr->items[x]->size = unibuf->dlen; na_ptr->items[x]->data = xmalloc(unibuf->dlen); memcpy(na_ptr->items[x]->data, unibuf->b, unibuf->dlen); - DEBUG_INDEX(("Iconv out: ")); + DEBUG_INDEX(("Iconv out:\n")); DEBUG_HEXDUMPC(na_ptr->items[x]->data, na_ptr->items[x]->size, 0x10); } if (na_ptr->items[x]->type == 0) na_ptr->items[x]->type = table_rec.ref_type; diff -r f6db1f060a95 -r d4606d460daf src/libpst.h --- a/src/libpst.h Sun Jan 06 14:47:06 2008 -0800 +++ b/src/libpst.h Tue Jan 08 16:19:26 2008 -0800 @@ -61,9 +61,9 @@ #ifdef _MSC_VER #include "windows.h" -#define int32_t int +#define int32_t int #define uint32_t unsigned int -#define int16_t short int +#define int16_t short int #define uint16_t unsigned short int #endif // _MSC_VER @@ -505,7 +505,7 @@ int32_t _pst_build_id_ptr(pst_file *pf, off_t offset, int32_t depth, int64_t linku1, uint64_t start_val, uint64_t end_val); int32_t _pst_build_desc_ptr (pst_file *pf, off_t offset, int32_t depth, int64_t linku1, uint64_t *high_id, uint64_t start_id, uint64_t end_val); pst_item* _pst_getItem(pst_file *pf, pst_desc_ll *d_ptr); -void * _pst_parse_item (pst_file *pf, pst_desc_ll *d_ptr); +pst_item* _pst_parse_item (pst_file *pf, pst_desc_ll *d_ptr); pst_num_array * _pst_parse_block(pst_file *pf, uint32_t block_id, pst_index2_ll *i2_head); int32_t _pst_process(pst_num_array *list, pst_item *item, pst_item_attach *attach); int32_t _pst_free_list(pst_num_array *list); diff -r f6db1f060a95 -r d4606d460daf xml/libpst.in --- a/xml/libpst.in Sun Jan 06 14:47:06 2008 -0800 +++ b/xml/libpst.in Tue Jan 08 16:19:26 2008 -0800 @@ -19,7 +19,7 @@ - 2008-01-06 + 2008-01-08 @@ -212,7 +212,7 @@ - 2008-01-06 + 2008-01-08 @@ -286,7 +286,7 @@ - 2008-01-06 + 2008-01-08 @@ -461,7 +461,7 @@ - 2008-01-06 + 2008-01-08 @@ -585,7 +585,7 @@ - 2008-01-06 + 2008-01-08 @@ -610,13 +610,16 @@ Each item in a .pst file is identified by two id values ID1 and ID2. There are two separate b-trees indexed by these ID1 and ID2 values. + Starting with Outlook 2003, the file format changed from one with 32 + bit pointers, to one with 64 bit pointers. We describe both formats + here. - - File Header + + 32 bit File Header - The file header is located at offset 0 in the .pst file. + The 32 bit file header is located at offset 0 in the .pst file. - We only support index type 0x0E and encryption type 0x01. + We only support index types 0x0e and 0x17, and encryption types + 0x00 and 0x01. Index type 0x0e is the older 32 bit Outlook format. + Index type 0x17 is the newer 64 bit Outlook format. Encryption + type 0x00 is no encryption, and type 0x01 is the only other supported + encryption type. offsetIndex1 is the file offset of the root of the @@ -678,10 +685,62 @@ - - Index 1 Node + + 64 bit File Header - The index1 b-tree nodes are 516 byte blocks with the following format. + The 64 bit file header is located at offset 0 in the .pst file. + + + + + + 32 bit Index 1 Node + + The 32 bit index1 b-tree nodes are 516 byte blocks with the + following format. - - Index 1 Leaf Node + + 64 bit Index 1 Node - The index1 b-tree leaf nodes are 516 byte blocks with the following format. + The 64 bit index1 b-tree nodes are 512 byte blocks with the + following format. + + + + The itemCount specifies the number of 24 byte records that + are active. The nodeLevel is non-zero for this style of nodes. + The leaf nodes have a different format. The backPointer must + match the backPointer from the triple that pointed to this node. + + + Each item in this node is a triple of (ID1, backPointer, offset) + where the offset points to the next deeper node in the tree, the + backPointer value must match the backPointer in that deeper node, + and ID1 is the lowest ID1 value in the subtree. + + + + + 32 bit Index 1 Leaf Node + + The 32 bit index1 b-tree leaf nodes are 516 byte blocks with the + following format. @@ -819,10 +949,84 @@ - - Index 2 Node + + 64 bit Index 1 Leaf Node - The index2 b-tree nodes are 516 byte blocks with the following format. + The 64 bit index1 b-tree leaf nodes are 512 byte blocks with the + following format. + + + + The itemCount specifies the number of 24 byte records that + are active. The nodeLevel is zero for these leaf nodes. + The backPointer must match the backPointer from the triple + that pointed to this node. + + + Each item in this node is a tuple of (ID1, offset, size, unknown) + The two low order bits of the ID1 value seem to be flags. I have + never seen a case with bit zero set. Bit one indicates that the + item is not encrypted. Note that references + to these ID1 values elsewhere may have the low order bit set (and + I don't know what that means), but when we do the search in this + tree we need to clear that bit so that we can find the correct item. + + + + + 32 bit Index 2 Node + + The 32 bit index2 b-tree nodes are 516 byte blocks with the + following format. - - Index 2 Leaf Node + + 64 bit Index 2 Node - The index2 b-tree leaf nodes are 516 byte blocks with the following format. + The 64 bit index2 b-tree nodes are 512 byte blocks with the + following format. + + + + The itemCount specifies the number of 24 byte records that + are active. The nodeLevel is non-zero for this style of nodes. + The leaf nodes have a different format. The backPointer must + match the backPointer from the triple that pointed to this node. + + + Each item in this node is a triple of (ID2, backPointer, offset) + where the offset points to the next deeper node in the tree, the + backPointer value must match the backPointer in that deeper node, + and ID2 is the lowest ID2 value in the subtree. + + + + + 32 bit Index 2 Leaf Node + + The 32 bit index2 b-tree leaf nodes are 516 byte blocks with the + following format. + + 64 bit Index 2 Leaf Node + + The 64 bit index2 b-tree leaf nodes are 512 byte blocks with the + following format. + + + + The itemCount specifies the number of 32 byte records that + are active. The nodeLevel is zero for these leaf nodes. + The backPointer must match the backPointer from the triple + that pointed to this node. + + + Each item in this node is a tuple of (ID2, DESC-ID1, LIST-ID1, PARENT-ID2) + + + Associated List Item Contains associations between id1 and id2 for the items controlled by the record. - In the above leaf node, we have a tuple of (0x61, 0x02a82c, 0x02a836, 0) + In the above 32 bit leaf node, we have a tuple of (0x61, 0x02a82c, 0x02a836, 0) 0x02a836 is the ID1 of the associated list, and we can lookup that ID1 value in the index1 b-tree to find the (offset,size) of the data in the .pst file.