comparison src/libpst.c @ 360:26c48ea9d896

From Jeffrey Morlan: pst_build_id_ptr reads the Block BTree into a linked list, which pst_getID does a linear scan through. For large PSTs that have millions of blocks, this is extremely slow - almost all time is spent in pst_getID. Since the BTree entries must be in order, this can be dramatically improved by reading into an array and using binary search.
author Carl Byington <carl@five-ten-sg.com>
date Wed, 06 Jul 2016 10:21:08 -0700
parents a3e674fade6c
children 8700cc322c0c
comparison
equal deleted inserted replaced
359:a3e674fade6c 360:26c48ea9d896
266 static size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h); 266 static size_t pst_ff_getID2data(pst_file *pf, pst_index_ll *ptr, pst_holder *h);
267 static size_t pst_finish_cleanup_holder(pst_holder *h, size_t size); 267 static size_t pst_finish_cleanup_holder(pst_holder *h, size_t size);
268 static void pst_free_attach(pst_item_attach *attach); 268 static void pst_free_attach(pst_item_attach *attach);
269 static void pst_free_desc (pst_desc_tree *head); 269 static void pst_free_desc (pst_desc_tree *head);
270 static void pst_free_id2(pst_id2_tree * head); 270 static void pst_free_id2(pst_id2_tree * head);
271 static void pst_free_id (pst_index_ll *head);
272 static void pst_free_list(pst_mapi_object *list); 271 static void pst_free_list(pst_mapi_object *list);
273 static void pst_free_xattrib(pst_x_attrib_ll *x); 272 static void pst_free_xattrib(pst_x_attrib_ll *x);
274 static size_t pst_getAtPos(pst_file *pf, int64_t pos, void* buf, size_t size); 273 static size_t pst_getAtPos(pst_file *pf, int64_t pos, void* buf, size_t size);
275 static int pst_getBlockOffsetPointer(pst_file *pf, pst_id2_tree *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p); 274 static int pst_getBlockOffsetPointer(pst_file *pf, pst_id2_tree *i2_head, pst_subblocks *subblocks, uint32_t offset, pst_block_offset_pointer *p);
276 static int pst_getBlockOffset(char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p); 275 static int pst_getBlockOffset(char *buf, size_t read_size, uint32_t i_offset, uint32_t offset, pst_block_offset *p);
402 DEBUG_WARN(("fclose returned non-zero value\n")); 401 DEBUG_WARN(("fclose returned non-zero value\n"));
403 } 402 }
404 // free the paths 403 // free the paths
405 free(pf->cwd); 404 free(pf->cwd);
406 free(pf->fname); 405 free(pf->fname);
407 // we must free the id linklist and the desc tree 406 // we must free the id array and the desc tree
408 pst_free_id(pf->i_head); 407 free(pf->i_table);
409 pst_free_desc(pf->d_head); 408 pst_free_desc(pf->d_head);
410 pst_free_xattrib(pf->x_head); 409 pst_free_xattrib(pf->x_head);
411 DEBUG_RET(); 410 DEBUG_RET();
412 return 0; 411 return 0;
413 } 412 }
1041 if (buf) free(buf); 1040 if (buf) free(buf);
1042 DEBUG_RET(); 1041 DEBUG_RET();
1043 return -1; 1042 return -1;
1044 } 1043 }
1045 old = index.id; 1044 old = index.id;
1046 i_ptr = (pst_index_ll*) pst_malloc(sizeof(pst_index_ll)); 1045 if (pf->i_count == pf->i_capacity) {
1046 pf->i_capacity += (pf->i_capacity >> 1) + 16; // arbitrary growth rate
1047 pf->i_table = pst_realloc(pf->i_table, pf->i_capacity * sizeof(pst_index_ll));
1048 }
1049 i_ptr = &pf->i_table[pf->i_count++];
1047 i_ptr->i_id = index.id; 1050 i_ptr->i_id = index.id;
1048 i_ptr->offset = index.offset; 1051 i_ptr->offset = index.offset;
1049 i_ptr->u1 = index.u1; 1052 i_ptr->u1 = index.u1;
1050 i_ptr->size = index.size; 1053 i_ptr->size = index.size;
1051 i_ptr->next = NULL;
1052 if (pf->i_tail) pf->i_tail->next = i_ptr;
1053 if (!pf->i_head) pf->i_head = i_ptr;
1054 pf->i_tail = i_ptr;
1055 } 1054 }
1056 } else { 1055 } else {
1057 // this node contains node pointers 1056 // this node contains node pointers
1058 x = 0; 1057 x = 0;
1059 while (x < item_count) { 1058 while (x < item_count) {
3200 static void pst_free_id2(pst_id2_tree * head) { 3199 static void pst_free_id2(pst_id2_tree * head) {
3201 pst_id2_tree *t; 3200 pst_id2_tree *t;
3202 DEBUG_ENT("pst_free_id2"); 3201 DEBUG_ENT("pst_free_id2");
3203 while (head) { 3202 while (head) {
3204 pst_free_id2(head->child); 3203 pst_free_id2(head->child);
3205 t = head->next;
3206 free(head);
3207 head = t;
3208 }
3209 DEBUG_RET();
3210 }
3211
3212
3213 static void pst_free_id (pst_index_ll *head) {
3214 pst_index_ll *t;
3215 DEBUG_ENT("pst_free_id");
3216 while (head) {
3217 t = head->next; 3204 t = head->next;
3218 free(head); 3205 free(head);
3219 head = t; 3206 head = t;
3220 } 3207 }
3221 DEBUG_RET(); 3208 DEBUG_RET();
3618 DEBUG_RET(); 3605 DEBUG_RET();
3619 return 1; 3606 return 1;
3620 } 3607 }
3621 3608
3622 3609
3610 static int pst_getID_compare(const void *key, const void *entry) {
3611 uint64_t key_id = *(const uint64_t*)key;
3612 uint64_t entry_id = ((const pst_index_ll*)entry)->i_id;
3613 return (key_id > entry_id) - (key_id < entry_id);
3614 }
3615
3616
3623 /** */ 3617 /** */
3624 pst_index_ll* pst_getID(pst_file* pf, uint64_t i_id) { 3618 pst_index_ll* pst_getID(pst_file* pf, uint64_t i_id) {
3625 pst_index_ll *ptr; 3619 pst_index_ll *ptr;
3626 DEBUG_ENT("pst_getID"); 3620 DEBUG_ENT("pst_getID");
3627 if (i_id == 0) { 3621 if (i_id == 0) {
3632 //if (i_id & 1) DEBUG_INFO(("have odd id bit %#"PRIx64"\n", i_id)); 3626 //if (i_id & 1) DEBUG_INFO(("have odd id bit %#"PRIx64"\n", i_id));
3633 //if (i_id & 2) DEBUG_INFO(("have two id bit %#"PRIx64"\n", i_id)); 3627 //if (i_id & 2) DEBUG_INFO(("have two id bit %#"PRIx64"\n", i_id));
3634 i_id -= (i_id & 1); 3628 i_id -= (i_id & 1);
3635 3629
3636 DEBUG_INFO(("Trying to find %#"PRIx64"\n", i_id)); 3630 DEBUG_INFO(("Trying to find %#"PRIx64"\n", i_id));
3637 ptr = pf->i_head; 3631 ptr = bsearch(&i_id, pf->i_table, pf->i_count, sizeof *pf->i_table, pst_getID_compare);
3638 while (ptr && (ptr->i_id != i_id)) {
3639 ptr = ptr->next;
3640 }
3641 if (ptr) {DEBUG_INFO(("Found Value %#"PRIx64"\n", i_id)); } 3632 if (ptr) {DEBUG_INFO(("Found Value %#"PRIx64"\n", i_id)); }
3642 else {DEBUG_INFO(("ERROR: Value %#"PRIx64" not found\n", i_id)); } 3633 else {DEBUG_INFO(("ERROR: Value %#"PRIx64" not found\n", i_id)); }
3643 DEBUG_RET(); 3634 DEBUG_RET();
3644 return ptr; 3635 return ptr;
3645 } 3636 }