annotate src/dumpblocks.c @ 360:26c48ea9d896

From Jeffrey Morlan: pst_build_id_ptr reads the Block BTree into a linked list, which pst_getID does a linear scan through. For large PSTs that have millions of blocks, this is extremely slow - almost all time is spent in pst_getID. Since the BTree entries must be in order, this can be dramatically improved by reading into an array and using binary search.
author Carl Byington <carl@five-ten-sg.com>
date Wed, 06 Jul 2016 10:21:08 -0700
parents 201464dd356e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
122
bdb38b434c0a more changes from Fridrich Strba to avoid installing our config.h
Carl Byington <carl@five-ten-sg.com>
parents: 120
diff changeset
1 #include "define.h"
bdb38b434c0a more changes from Fridrich Strba to avoid installing our config.h
Carl Byington <carl@five-ten-sg.com>
parents: 120
diff changeset
2
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
3 #define OUT_BUF 20
122
bdb38b434c0a more changes from Fridrich Strba to avoid installing our config.h
Carl Byington <carl@five-ten-sg.com>
parents: 120
diff changeset
4
118
0f1492b7fe8b patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents: 87
diff changeset
5 int main(int argc, char* const* argv)
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
6 {
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
7 pst_file pstfile;
360
26c48ea9d896 From Jeffrey Morlan:
Carl Byington <carl@five-ten-sg.com>
parents: 298
diff changeset
8 size_t i;
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
9 char *outdir = NULL, *file = NULL, *outname = NULL;
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
10 char *buf = NULL;
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
11 int c;
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
12 FILE *fp;
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
13
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
14 while ((c = getopt(argc, argv, "o:")) != -1) {
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
15 switch (c) {
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
16 case 'o':
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
17 outdir = optarg;
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
18 break;
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
19 default:
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
20 printf("Unknown switch %c\n", c);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
21 }
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
22 }
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
23 if (optind < argc) {
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
24 file = argv[optind];
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
25 } else {
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
26 printf("Usage: dumpblocks [options] pstfile\n");
77
87216aefc6df spelling fixup
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
27 printf("\tcopies the datablocks from the pst file into separate files\n");
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
28 printf("Options: \n");
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
29 printf("\t-o target\tSpecify the output directory\n");
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
30 exit(1);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
31 }
202
2f38c4ce606f remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents: 188
diff changeset
32 DEBUG_INIT("dumpblocks.log", NULL);
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
33 DEBUG_ENT("main");
46
b2a7f2e0926a more fixes for 64 bit format
carl
parents: 16
diff changeset
34
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
35 printf("Opening file %s\n", file);
298
201464dd356e add default character set for items where the pst file does not specify a character set
Carl Byington <carl@five-ten-sg.com>
parents: 202
diff changeset
36 if (pst_open(&pstfile, file, NULL)) {
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
37 printf("Failed to open file %s\n", file);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
38 exit(1);
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
39 }
46
b2a7f2e0926a more fixes for 64 bit format
carl
parents: 16
diff changeset
40
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
41 printf("Reading Indexes\n");
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
42 if (pst_load_index(&pstfile)) {
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
43 printf("Failed to load indexes in file %s\n", argv[1]);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
44 exit(1);
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
45 }
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
46
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
47 if (outdir != NULL)
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
48 if (chdir(outdir)) {
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
49 printf("Failed to change into directory %s\n", outdir);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
50 exit(1);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
51 }
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
52
172
6954d315aaa8 move version-info into main configure.in, and set it properly.
Carl Byington <carl@five-ten-sg.com>
parents: 164
diff changeset
53 outname = (char *) pst_malloc(OUT_BUF);
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
54 printf("Saving blocks\n");
360
26c48ea9d896 From Jeffrey Morlan:
Carl Byington <carl@five-ten-sg.com>
parents: 298
diff changeset
55 for (i = 0; i < pstfile.i_count; i++) {
26c48ea9d896 From Jeffrey Morlan:
Carl Byington <carl@five-ten-sg.com>
parents: 298
diff changeset
56 pst_index_ll *ptr = &pstfile.i_table[i];
26c48ea9d896 From Jeffrey Morlan:
Carl Byington <carl@five-ten-sg.com>
parents: 298
diff changeset
57 size_t c = pst_ff_getIDblock_dec(&pstfile, ptr->i_id, &buf);
188
d588dafd03e8 prep for fedora build
Carl Byington <carl@five-ten-sg.com>
parents: 172
diff changeset
58 if (c) {
164
ab384fed78c5 Compensate for iconv conversion to utf-7 that produces strings that are not null terminated.
Carl Byington <carl@five-ten-sg.com>
parents: 129
diff changeset
59 snprintf(outname, OUT_BUF, "%#"PRIx64, ptr->i_id);
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
60 if ((fp = fopen(outname, "wb")) == NULL) {
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
61 printf("Failed to open file %s\n", outname);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
62 continue;
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
63 }
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
64 pst_fwrite(buf, 1, c, fp);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
65 fclose(fp);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
66 } else {
164
ab384fed78c5 Compensate for iconv conversion to utf-7 that produces strings that are not null terminated.
Carl Byington <carl@five-ten-sg.com>
parents: 129
diff changeset
67 printf("Failed to read block i_id %#"PRIx64"\n", ptr->i_id);
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
68 }
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
69 }
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
70 pst_close(&pstfile);
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
71 DEBUG_RET();
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 59
diff changeset
72 return 0;
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
73 }