diff src/dumpblocks.c @ 360:26c48ea9d896

From Jeffrey Morlan: pst_build_id_ptr reads the Block BTree into a linked list, which pst_getID does a linear scan through. For large PSTs that have millions of blocks, this is extremely slow - almost all time is spent in pst_getID. Since the BTree entries must be in order, this can be dramatically improved by reading into an array and using binary search.
author Carl Byington <carl@five-ten-sg.com>
date Wed, 06 Jul 2016 10:21:08 -0700
parents 201464dd356e
children
line wrap: on
line diff
--- a/src/dumpblocks.c	Wed Jul 06 10:20:12 2016 -0700
+++ b/src/dumpblocks.c	Wed Jul 06 10:21:08 2016 -0700
@@ -5,7 +5,7 @@
 int main(int argc, char* const* argv)
 {
     pst_file pstfile;
-    pst_index_ll *ptr;
+    size_t i;
     char *outdir = NULL, *file = NULL, *outname = NULL;
     char *buf = NULL;
     int c;
@@ -50,12 +50,11 @@
             exit(1);
         }
 
-    ptr = pstfile.i_head;
     outname = (char *) pst_malloc(OUT_BUF);
     printf("Saving blocks\n");
-    while (ptr != NULL) {
-        size_t c;
-        c = pst_ff_getIDblock_dec(&pstfile, ptr->i_id, &buf);
+    for (i = 0; i < pstfile.i_count; i++) {
+        pst_index_ll *ptr = &pstfile.i_table[i];
+        size_t c = pst_ff_getIDblock_dec(&pstfile, ptr->i_id, &buf);
         if (c) {
             snprintf(outname, OUT_BUF, "%#"PRIx64, ptr->i_id);
             if ((fp = fopen(outname, "wb")) == NULL) {
@@ -67,7 +66,6 @@
         } else {
             printf("Failed to read block i_id %#"PRIx64"\n", ptr->i_id);
         }
-        ptr = ptr->next;
     }
     pst_close(&pstfile);
     DEBUG_RET();