Mercurial > libpst
annotate src/lspst.c @ 355:d1f930be4711
From Jeffrey Morlan:
pst_build_id_ptr and pst_build_desc_ptr require that the first child
of a BTree page have the same starting ID as itself. This is not
required by the spec, and is not true in many real-world PSTs
(presumably, the original first child of the page got
deleted). Because of this, many emails are not being extracted from
these PSTs. It also triggers an infinite loop in lspst (a separate
bug, also fixed)
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Wed, 06 Jul 2016 10:12:22 -0700 |
parents | 201464dd356e |
children | ad7b880ad3d1 |
rev | line source |
---|---|
16 | 1 /*** |
2 * lspst.c | |
3 * Part of the LibPST project | |
4 * Author: Joe Nahmias <joe@nahmias.net> | |
5 * Based on readpst.c by by David Smith <dave.s@earthcorp.com> | |
6 * | |
7 */ | |
8 | |
122
bdb38b434c0a
more changes from Fridrich Strba to avoid installing our config.h
Carl Byington <carl@five-ten-sg.com>
parents:
120
diff
changeset
|
9 #include "define.h" |
bdb38b434c0a
more changes from Fridrich Strba to avoid installing our config.h
Carl Byington <carl@five-ten-sg.com>
parents:
120
diff
changeset
|
10 |
16 | 11 struct file_ll { |
43 | 12 char *dname; |
13 int32_t stored_count; | |
167
40e9de445038
improve consistency checking when fetching items from the pst file.
Carl Byington <carl@five-ten-sg.com>
parents:
164
diff
changeset
|
14 int32_t item_count; |
43 | 15 int32_t skip_count; |
16 int32_t type; | |
16 | 17 }; |
43 | 18 |
19 | |
16 | 20 void canonicalize_filename(char *fname); |
21 void debug_print(char *fmt, ...); | |
118
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
22 void usage(char *prog_name); |
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
23 void version(); |
43 | 24 |
25 // global settings | |
26 pst_file pstfile; | |
27 | |
16 | 28 |
43 | 29 void create_enter_dir(struct file_ll* f, pst_item *item) |
30 { | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
31 pst_convert_utf8(item, &item->file_as); |
167
40e9de445038
improve consistency checking when fetching items from the pst file.
Carl Byington <carl@five-ten-sg.com>
parents:
164
diff
changeset
|
32 f->item_count = 0; |
43 | 33 f->skip_count = 0; |
34 f->type = item->type; | |
167
40e9de445038
improve consistency checking when fetching items from the pst file.
Carl Byington <carl@five-ten-sg.com>
parents:
164
diff
changeset
|
35 f->stored_count = (item->folder) ? item->folder->item_count : 0; |
172
6954d315aaa8
move version-info into main configure.in, and set it properly.
Carl Byington <carl@five-ten-sg.com>
parents:
167
diff
changeset
|
36 f->dname = strdup(item->file_as.str); |
43 | 37 } |
16 | 38 |
39 | |
43 | 40 void close_enter_dir(struct file_ll *f) |
41 { | |
42 free(f->dname); | |
43 } | |
16 | 44 |
45 | |
186
0a4f7ecd7452
more cleanup of external names in the shared library
Carl Byington <carl@five-ten-sg.com>
parents:
172
diff
changeset
|
46 void process(pst_item *outeritem, pst_desc_tree *d_ptr) |
43 | 47 { |
48 struct file_ll ff; | |
49 pst_item *item = NULL; | |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
50 char *result = NULL; |
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
51 size_t resultlen = 0; |
43 | 52 |
53 DEBUG_ENT("process"); | |
54 memset(&ff, 0, sizeof(ff)); | |
55 create_enter_dir(&ff, outeritem); | |
16 | 56 |
43 | 57 while (d_ptr) { |
58 if (!d_ptr->desc) { | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
59 DEBUG_WARN(("ERROR item's desc record is NULL\n")); |
43 | 60 ff.skip_count++; |
61 } | |
62 else { | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
63 DEBUG_INFO(("Desc Email ID %"PRIx64" [d_ptr->d_id = %"PRIx64"]\n", d_ptr->desc->i_id, d_ptr->d_id)); |
16 | 64 |
143
fdc58ad2c758
fix embedded rfc822 messages with attachments
Carl Byington <carl@five-ten-sg.com>
parents:
129
diff
changeset
|
65 item = pst_parse_item(&pstfile, d_ptr, NULL); |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
66 DEBUG_INFO(("About to process item @ %p.\n", item)); |
43 | 67 if (item) { |
68 if (item->message_store) { | |
69 // there should only be one message_store, and we have already done it | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
70 DIE(("A second message_store has been found. Sorry, this must be an error.\n")); |
43 | 71 } |
16 | 72 |
43 | 73 if (item->folder && d_ptr->child) { |
74 // if this is a folder, we want to recurse into it | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
75 pst_convert_utf8(item, &item->file_as); |
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
76 printf("Folder \"%s\"\n", item->file_as.str); |
43 | 77 process(item, d_ptr->child); |
16 | 78 |
43 | 79 } else if (item->contact && (item->type == PST_TYPE_CONTACT)) { |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
80 if (!ff.type) ff.type = item->type; |
43 | 81 // Process Contact item |
82 if (ff.type != PST_TYPE_CONTACT) { | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
83 DEBUG_INFO(("I have a contact, but the folder isn't a contacts folder. Processing anyway\n")); |
43 | 84 } |
85 printf("Contact"); | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
86 if (item->contact->fullname.str) |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
87 printf("\t%s", pst_rfc2426_escape(item->contact->fullname.str, &result, &resultlen)); |
43 | 88 printf("\n"); |
16 | 89 |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
90 } else if (item->email && ((item->type == PST_TYPE_NOTE) || (item->type == PST_TYPE_SCHEDULE) || (item->type == PST_TYPE_REPORT))) { |
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
91 if (!ff.type) ff.type = item->type; |
43 | 92 // Process Email item |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
93 if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) { |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
94 DEBUG_INFO(("I have an email, but the folder isn't an email folder. Processing anyway\n")); |
43 | 95 } |
96 printf("Email"); | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
97 if (item->email->outlook_sender_name.str) |
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
98 printf("\tFrom: %s", item->email->outlook_sender_name.str); |
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
99 if (item->subject.str) |
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
100 printf("\tSubject: %s", item->subject.str); |
43 | 101 printf("\n"); |
16 | 102 |
43 | 103 } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) { |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
104 if (!ff.type) ff.type = item->type; |
43 | 105 // Process Journal item |
106 if (ff.type != PST_TYPE_JOURNAL) { | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
107 DEBUG_INFO(("I have a journal entry, but folder isn't specified as a journal type. Processing...\n")); |
43 | 108 } |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
109 if (item->subject.str) |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
110 printf("Journal\t%s\n", pst_rfc2426_escape(item->subject.str, &result, &resultlen)); |
16 | 111 |
43 | 112 } else if (item->appointment && (item->type == PST_TYPE_APPOINTMENT)) { |
199
e3a46f66332b
more changes in recurrence decoding
Carl Byington <carl@five-ten-sg.com>
parents:
198
diff
changeset
|
113 char time_buffer[30]; |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
114 if (!ff.type) ff.type = item->type; |
43 | 115 // Process Calendar Appointment item |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
116 DEBUG_INFO(("Processing Appointment Entry\n")); |
43 | 117 if (ff.type != PST_TYPE_APPOINTMENT) { |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
118 DEBUG_INFO(("I have an appointment, but folder isn't specified as an appointment type. Processing...\n")); |
43 | 119 } |
120 printf("Appointment"); | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
121 if (item->subject.str) |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
122 printf("\tSUMMARY: %s", pst_rfc2426_escape(item->subject.str, &result, &resultlen)); |
50 | 123 if (item->appointment->start) |
199
e3a46f66332b
more changes in recurrence decoding
Carl Byington <carl@five-ten-sg.com>
parents:
198
diff
changeset
|
124 printf("\tSTART: %s", pst_rfc2445_datetime_format(item->appointment->start, sizeof(time_buffer), time_buffer)); |
50 | 125 if (item->appointment->end) |
199
e3a46f66332b
more changes in recurrence decoding
Carl Byington <carl@five-ten-sg.com>
parents:
198
diff
changeset
|
126 printf("\tEND: %s", pst_rfc2445_datetime_format(item->appointment->end, sizeof(time_buffer), time_buffer)); |
50 | 127 printf("\tALL DAY: %s", (item->appointment->all_day==1 ? "Yes" : "No")); |
43 | 128 printf("\n"); |
16 | 129 |
43 | 130 } else { |
131 ff.skip_count++; | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
132 DEBUG_INFO(("Unknown item type. %i. Ascii1=\"%s\"\n", |
43 | 133 item->type, item->ascii_type)); |
134 } | |
46 | 135 pst_freeItem(item); |
43 | 136 } else { |
137 ff.skip_count++; | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
138 DEBUG_INFO(("A NULL item was seen\n")); |
43 | 139 } |
140 } | |
355 | 141 d_ptr = d_ptr->next; |
43 | 142 } |
143 close_enter_dir(&ff); | |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
144 if (result) free(result); |
52 | 145 DEBUG_RET(); |
43 | 146 } |
16 | 147 |
43 | 148 |
118
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
149 void usage(char *prog_name) { |
50 | 150 DEBUG_ENT("usage"); |
151 version(); | |
152 printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); | |
153 printf("OPTIONS:\n"); | |
154 printf("\t-d <filename> \t- Debug to file. This is a binary log. Use readlog to print it\n"); | |
155 printf("\t-h\t- Help. This screen\n"); | |
156 printf("\t-V\t- Version. Display program version\n"); | |
157 DEBUG_RET(); | |
158 } | |
159 | |
160 | |
118
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
161 void version() { |
50 | 162 DEBUG_ENT("version"); |
163 printf("lspst / LibPST v%s\n", VERSION); | |
164 #if BYTE_ORDER == BIG_ENDIAN | |
165 printf("Big Endian implementation being used.\n"); | |
166 #elif BYTE_ORDER == LITTLE_ENDIAN | |
167 printf("Little Endian implementation being used.\n"); | |
168 #else | |
169 # error "Byte order not supported by this library" | |
170 #endif | |
171 DEBUG_RET(); | |
172 } | |
173 | |
174 | |
118
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
175 int main(int argc, char* const* argv) { |
43 | 176 pst_item *item = NULL; |
186
0a4f7ecd7452
more cleanup of external names in the shared library
Carl Byington <carl@five-ten-sg.com>
parents:
172
diff
changeset
|
177 pst_desc_tree *d_ptr; |
43 | 178 char *temp = NULL; //temporary char pointer |
50 | 179 int c; |
43 | 180 char *d_log = NULL; |
16 | 181 |
50 | 182 while ((c = getopt(argc, argv, "d:hV"))!= -1) { |
183 switch (c) { | |
184 case 'd': | |
185 d_log = optarg; | |
186 break; | |
187 case 'h': | |
188 usage(argv[0]); | |
189 exit(0); | |
190 break; | |
191 case 'V': | |
192 version(); | |
193 exit(0); | |
194 break; | |
195 default: | |
196 usage(argv[0]); | |
197 exit(1); | |
198 break; | |
199 } | |
200 } | |
43 | 201 |
48 | 202 #ifdef DEBUG_ALL |
203 // force a log file | |
204 if (!d_log) d_log = "lspst.log"; | |
205 #endif // defined DEBUG_ALL | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
206 DEBUG_INIT(d_log, NULL); |
43 | 207 DEBUG_ENT("main"); |
208 | |
50 | 209 if (argc <= optind) { |
210 usage(argv[0]); | |
211 exit(2); | |
212 } | |
213 | |
43 | 214 // Open PST file |
298
201464dd356e
add default character set for items where the pst file does not specify a character set
Carl Byington <carl@five-ten-sg.com>
parents:
285
diff
changeset
|
215 if (pst_open(&pstfile, argv[optind], NULL)) DIE(("Error opening File\n")); |
16 | 216 |
43 | 217 // Load PST index |
218 if (pst_load_index(&pstfile)) DIE(("Index Error\n")); | |
219 | |
220 pst_load_extended_attributes(&pstfile); | |
16 | 221 |
43 | 222 d_ptr = pstfile.d_head; // first record is main record |
143
fdc58ad2c758
fix embedded rfc822 messages with attachments
Carl Byington <carl@five-ten-sg.com>
parents:
129
diff
changeset
|
223 item = pst_parse_item(&pstfile, d_ptr, NULL); |
43 | 224 if (!item || !item->message_store) { |
225 DEBUG_RET(); | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
226 DIE(("Could not get root record\n")); |
43 | 227 } |
16 | 228 |
43 | 229 // default the file_as to the same as the main filename if it doesn't exist |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
230 if (!item->file_as.str) { |
43 | 231 if (!(temp = strrchr(argv[1], '/'))) |
232 if (!(temp = strrchr(argv[1], '\\'))) | |
233 temp = argv[1]; | |
234 else | |
235 temp++; // get past the "\\" | |
236 else | |
237 temp++; // get past the "/" | |
172
6954d315aaa8
move version-info into main configure.in, and set it properly.
Carl Byington <carl@five-ten-sg.com>
parents:
167
diff
changeset
|
238 item->file_as.str = strdup(temp); |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
239 item->file_as.is_utf8 = 1; |
43 | 240 } |
16 | 241 |
43 | 242 d_ptr = pst_getTopOfFolders(&pstfile, item); |
243 if (!d_ptr) DIE(("Top of folders record not found. Cannot continue\n")); | |
16 | 244 |
43 | 245 process(item, d_ptr->child); // do the childred of TOPF |
46 | 246 pst_freeItem(item); |
43 | 247 pst_close(&pstfile); |
16 | 248 |
43 | 249 DEBUG_RET(); |
250 return 0; | |
16 | 251 } |
43 | 252 |
253 | |
254 // This function will make sure that a filename is in cannonical form. That | |
16 | 255 // is, it will replace any slashes, backslashes, or colons with underscores. |
256 void canonicalize_filename(char *fname) { | |
43 | 257 DEBUG_ENT("canonicalize_filename"); |
258 if (fname == NULL) { | |
259 DEBUG_RET(); | |
260 return; | |
261 } | |
73
3cb02cb1e6cd
Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents:
59
diff
changeset
|
262 while ((fname = strpbrk(fname, "/\\:"))) |
43 | 263 *fname = '_'; |
264 DEBUG_RET(); | |
16 | 265 } |
43 | 266 |
267 |