Mercurial > libpst
comparison src/libpst.c @ 360:26c48ea9d896
From Jeffrey Morlan:
pst_build_id_ptr reads the Block BTree into a linked list, which
pst_getID does a linear scan through. For large PSTs that have
millions of blocks, this is extremely slow - almost all time is spent
in pst_getID. Since the BTree entries must be in order, this can be
dramatically improved by reading into an array and using binary
search.
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Wed, 06 Jul 2016 10:21:08 -0700 |
parents | a3e674fade6c |
children | 8700cc322c0c |
comparison
equal
deleted
inserted
replaced
359:a3e674fade6c | 360:26c48ea9d896 |
---|---|
266 static size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h); | 266 static size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h); |
267 static size_t pst_finish_cleanup_holder(pst_holder *h, size_t size); | 267 static size_t pst_finish_cleanup_holder(pst_holder *h, size_t size); |
268 static void pst_free_attach(pst_item_attach *attach); | 268 static void pst_free_attach(pst_item_attach *attach); |
269 static void pst_free_desc (pst_desc_tree *head); | 269 static void pst_free_desc (pst_desc_tree *head); |
270 static void pst_free_id2(pst_id2_tree * head); | 270 static void pst_free_id2(pst_id2_tree * head); |
271 static void pst_free_id (pst_index_ll *head); | |
272 static void pst_free_list(pst_mapi_object *list); | 271 static void pst_free_list(pst_mapi_object *list); |
273 static void pst_free_xattrib(pst_x_attrib_ll *x); | 272 static void pst_free_xattrib(pst_x_attrib_ll *x); |
274 static size_t pst_getAtPos(pst_file *pf, int64_t pos, void* buf, size_t size); | 273 static size_t pst_getAtPos(pst_file *pf, int64_t pos, void* buf, size_t size); |
275 static int pst_getBlockOffsetPointer(pst_file *pf, pst_id2_tree *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p); | 274 static int pst_getBlockOffsetPointer(pst_file *pf, pst_id2_tree *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p); |
276 static int pst_getBlockOffset(char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p); | 275 static int pst_getBlockOffset(char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p); |
402 DEBUG_WARN(("fclose returned non-zero value\n")); | 401 DEBUG_WARN(("fclose returned non-zero value\n")); |
403 } | 402 } |
404 // free the paths | 403 // free the paths |
405 free(pf->cwd); | 404 free(pf->cwd); |
406 free(pf->fname); | 405 free(pf->fname); |
407 // we must free the id linklist and the desc tree | 406 // we must free the id array and the desc tree |
408 pst_free_id(pf->i_head); | 407 free(pf->i_table); |
409 pst_free_desc(pf->d_head); | 408 pst_free_desc(pf->d_head); |
410 pst_free_xattrib(pf->x_head); | 409 pst_free_xattrib(pf->x_head); |
411 DEBUG_RET(); | 410 DEBUG_RET(); |
412 return 0; | 411 return 0; |
413 } | 412 } |
1041 if (buf) free(buf); | 1040 if (buf) free(buf); |
1042 DEBUG_RET(); | 1041 DEBUG_RET(); |
1043 return -1; | 1042 return -1; |
1044 } | 1043 } |
1045 old = index.id; | 1044 old = index.id; |
1046 i_ptr = (pst_index_ll*) pst_malloc(sizeof(pst_index_ll)); | 1045 if (pf->i_count == pf->i_capacity) { |
1046 pf->i_capacity += (pf->i_capacity >> 1) + 16; // arbitrary growth rate | |
1047 pf->i_table = pst_realloc(pf->i_table, pf->i_capacity * sizeof(pst_index_ll)); | |
1048 } | |
1049 i_ptr = &pf->i_table[pf->i_count++]; | |
1047 i_ptr->i_id = index.id; | 1050 i_ptr->i_id = index.id; |
1048 i_ptr->offset = index.offset; | 1051 i_ptr->offset = index.offset; |
1049 i_ptr->u1 = index.u1; | 1052 i_ptr->u1 = index.u1; |
1050 i_ptr->size = index.size; | 1053 i_ptr->size = index.size; |
1051 i_ptr->next = NULL; | |
1052 if (pf->i_tail) pf->i_tail->next = i_ptr; | |
1053 if (!pf->i_head) pf->i_head = i_ptr; | |
1054 pf->i_tail = i_ptr; | |
1055 } | 1054 } |
1056 } else { | 1055 } else { |
1057 // this node contains node pointers | 1056 // this node contains node pointers |
1058 x = 0; | 1057 x = 0; |
1059 while (x < item_count) { | 1058 while (x < item_count) { |
3200 static void pst_free_id2(pst_id2_tree * head) { | 3199 static void pst_free_id2(pst_id2_tree * head) { |
3201 pst_id2_tree *t; | 3200 pst_id2_tree *t; |
3202 DEBUG_ENT("pst_free_id2"); | 3201 DEBUG_ENT("pst_free_id2"); |
3203 while (head) { | 3202 while (head) { |
3204 pst_free_id2(head->child); | 3203 pst_free_id2(head->child); |
3205 t = head->next; | |
3206 free(head); | |
3207 head = t; | |
3208 } | |
3209 DEBUG_RET(); | |
3210 } | |
3211 | |
3212 | |
3213 static void pst_free_id (pst_index_ll *head) { | |
3214 pst_index_ll *t; | |
3215 DEBUG_ENT("pst_free_id"); | |
3216 while (head) { | |
3217 t = head->next; | 3204 t = head->next; |
3218 free(head); | 3205 free(head); |
3219 head = t; | 3206 head = t; |
3220 } | 3207 } |
3221 DEBUG_RET(); | 3208 DEBUG_RET(); |
3618 DEBUG_RET(); | 3605 DEBUG_RET(); |
3619 return 1; | 3606 return 1; |
3620 } | 3607 } |
3621 | 3608 |
3622 | 3609 |
3610 static int pst_getID_compare(const void *key, const void *entry) { | |
3611 uint64_t key_id = *(const uint64_t*)key; | |
3612 uint64_t entry_id = ((const pst_index_ll*)entry)->i_id; | |
3613 return (key_id > entry_id) - (key_id < entry_id); | |
3614 } | |
3615 | |
3616 | |
3623 /** */ | 3617 /** */ |
3624 pst_index_ll* pst_getID(pst_file* pf, uint64_t i_id) { | 3618 pst_index_ll* pst_getID(pst_file* pf, uint64_t i_id) { |
3625 pst_index_ll *ptr; | 3619 pst_index_ll *ptr; |
3626 DEBUG_ENT("pst_getID"); | 3620 DEBUG_ENT("pst_getID"); |
3627 if (i_id == 0) { | 3621 if (i_id == 0) { |
3632 //if (i_id & 1) DEBUG_INFO(("have odd id bit %#"PRIx64"\n", i_id)); | 3626 //if (i_id & 1) DEBUG_INFO(("have odd id bit %#"PRIx64"\n", i_id)); |
3633 //if (i_id & 2) DEBUG_INFO(("have two id bit %#"PRIx64"\n", i_id)); | 3627 //if (i_id & 2) DEBUG_INFO(("have two id bit %#"PRIx64"\n", i_id)); |
3634 i_id -= (i_id & 1); | 3628 i_id -= (i_id & 1); |
3635 | 3629 |
3636 DEBUG_INFO(("Trying to find %#"PRIx64"\n", i_id)); | 3630 DEBUG_INFO(("Trying to find %#"PRIx64"\n", i_id)); |
3637 ptr = pf->i_head; | 3631 ptr = bsearch(&i_id, pf->i_table, pf->i_count, sizeof *pf->i_table, pst_getID_compare); |
3638 while (ptr && (ptr->i_id != i_id)) { | |
3639 ptr = ptr->next; | |
3640 } | |
3641 if (ptr) {DEBUG_INFO(("Found Value %#"PRIx64"\n", i_id)); } | 3632 if (ptr) {DEBUG_INFO(("Found Value %#"PRIx64"\n", i_id)); } |
3642 else {DEBUG_INFO(("ERROR: Value %#"PRIx64" not found\n", i_id)); } | 3633 else {DEBUG_INFO(("ERROR: Value %#"PRIx64" not found\n", i_id)); } |
3643 DEBUG_RET(); | 3634 DEBUG_RET(); |
3644 return ptr; | 3635 return ptr; |
3645 } | 3636 } |