Mercurial > libpst
annotate lspst.c @ 12:3f627519a92d stable-0-5-2
properly ignore (second block zero) errors. that will just drop some unknown attachments, but the rest of the data is still found
author | carl |
---|---|
date | Sat, 18 Feb 2006 09:56:00 -0800 |
parents | a818f3c2e589 |
children |
rev | line source |
---|---|
3 | 1 /*** |
2 * lspst.c | |
3 * Part of the LibPST project | |
4 * Author: Joe Nahmias <joe@nahmias.net> | |
5 * Based on readpst.c by by David Smith <dave.s@earthcorp.com> | |
6 * | |
7 */ | |
8 | |
9 // header file includes {{{1 | |
10 #include <stdio.h> | |
11 #include <stdlib.h> | |
12 #include <time.h> | |
13 #include <string.h> | |
14 #include <ctype.h> | |
15 #include <errno.h> | |
16 | |
17 #include "libpst.h" | |
18 #include "define.h" | |
19 #include "timeconv.h" | |
20 // }}}1 | |
21 // struct file_ll {{{1 | |
22 struct file_ll { | |
23 char *name; | |
24 char *dname; | |
25 FILE * output; | |
26 int32_t stored_count; | |
27 int32_t email_count; | |
28 int32_t skip_count; | |
29 int32_t type; | |
30 struct file_ll *next; | |
31 }; | |
32 // }}}1 | |
33 // Function Declarations {{{1 | |
34 void canonicalize_filename(char *fname); | |
35 int chr_count(char *str, char x); | |
36 void debug_print(char *fmt, ...); | |
37 char *rfc2426_escape(char *str); | |
38 char *rfc2445_datetime_format(FILETIME *ft); | |
39 // }}}1 | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
40 #ifndef DEBUG_MAIN |
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
41 #define DEBUG_MAIN(x) debug_print x; |
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
42 #endif |
3 | 43 // int main(int argc, char** argv) {{{1 |
44 int main(int argc, char** argv) { | |
45 | |
46 // declarations {{{2 | |
47 pst_item *item = NULL; | |
48 pst_file pstfile; | |
49 pst_desc_ll *d_ptr; | |
50 char *temp = NULL; //temporary char pointer | |
51 int skip_child = 0; | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
52 struct file_ll *f, *head; |
3 | 53 // }}}2 |
54 | |
55 if (argc <= 1) | |
56 DIE(("Missing PST filename.\n")); | |
57 | |
58 // Open PST file | |
59 if ( pst_open(&pstfile, argv[1], "r") ) | |
60 DIE(("Error opening File\n")); | |
61 // Load PST index | |
62 if ( pst_load_index(&pstfile) ) | |
63 DIE(("Index Error\n")); | |
64 pst_load_extended_attributes(&pstfile); | |
65 | |
66 d_ptr = pstfile.d_head; // first record is main record | |
67 if ((item = _pst_parse_item(&pstfile, d_ptr)) == NULL || item->message_store == NULL) { | |
68 DIE(("main: Could not get root record\n")); | |
69 } | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
70 |
3 | 71 // default the file_as to the same as the main filename if it doesn't exist |
72 if (item->file_as == NULL) { | |
73 if ((temp = strrchr(argv[1], '/')) == NULL) | |
74 if ((temp = strrchr(argv[1], '\\')) == NULL) | |
75 temp = argv[1]; | |
76 else | |
77 temp++; // get past the "\\" | |
78 else | |
79 temp++; // get past the "/" | |
80 item->file_as = (char*)xmalloc(strlen(temp)+1); | |
81 strcpy(item->file_as, temp); | |
82 } | |
83 fprintf(stderr, "item->file_as = '%s'.\n", item->file_as); | |
84 | |
85 // setup head file_ll | |
86 head = (struct file_ll*) malloc(sizeof(struct file_ll)); | |
87 memset(head, 0, sizeof(struct file_ll)); | |
88 head->email_count = 0; | |
89 head->skip_count = 0; | |
90 head->next = NULL; | |
91 head->name = "mbox"; | |
92 head->dname = (char*) malloc(strlen(item->file_as)+1); | |
93 strcpy(head->dname, item->file_as); | |
94 head->type = item->type; | |
95 DEBUG_MAIN(("head @ %p: name = '%s', dname = '%s', next = %p.\n", head, head->name, head->dname, head->next)); | |
96 | |
97 if ((d_ptr = pst_getTopOfFolders(&pstfile, item)) == NULL) { | |
98 DIE(("Top of folders record not found. Cannot continue\n")); | |
99 } | |
100 DEBUG_MAIN(("d_ptr(TOF) = %p.\n", d_ptr)); | |
101 | |
102 if (item){ | |
103 _pst_freeItem(item); | |
104 item = NULL; | |
105 } | |
106 | |
107 d_ptr = d_ptr->child; // do the children of TOPF | |
108 DEBUG_MAIN(("d_ptr(TOF->child) = %p.\n", d_ptr)); | |
109 | |
110 DEBUG_MAIN(("main: About to do email stuff\n")); | |
111 while (d_ptr != NULL) { | |
112 // Process d_ptr {{{2 | |
113 DEBUG_MAIN(("main: New item record, d_ptr = %p.\n", d_ptr)); | |
114 if (d_ptr->desc == NULL) { | |
115 DEBUG_WARN(("main: ERROR ?? item's desc record is NULL\n")); | |
116 f->skip_count++; | |
117 goto check_parent; | |
118 } | |
119 DEBUG_MAIN(("main: Desc Email ID %x [d_ptr->id = %x]\n", d_ptr->desc->id, d_ptr->id)); | |
120 | |
121 item = _pst_parse_item(&pstfile, d_ptr); | |
122 DEBUG_MAIN(("main: About to process item @ %p.\n", item)); | |
123 if (item != NULL) { | |
124 | |
125 // there should only be one message_store, and we have already | |
126 // done it | |
127 if (item->message_store != NULL) { | |
128 DIE(("ERROR(main): A second message_store has been found.\n")); | |
129 } | |
130 | |
131 if (item->folder != NULL) { | |
132 // Process Folder item {{{3 | |
133 // if this is a folder, we want to recurse into it | |
134 printf("Folder"); | |
135 if (item->file_as != NULL) | |
136 printf("\t%s/", item->file_as); | |
137 printf("\n"); | |
138 | |
139 DEBUG_MAIN(("main: I think I may try to go into folder \"%s\"\n", item->file_as)); | |
140 f = (struct file_ll*) malloc(sizeof(struct file_ll)); | |
141 memset(f, 0, sizeof(struct file_ll)); | |
142 f->next = head; | |
143 f->email_count = 0; | |
144 f->type = item->type; | |
145 f->stored_count = item->folder->email_count; | |
146 head = f; | |
147 f->name = "mbox"; | |
148 f->dname = (char*) xmalloc(strlen(item->file_as)+1); | |
149 strcpy(f->dname, item->file_as); | |
150 | |
151 DEBUG_MAIN(("main: f->name = %s\nitem->folder_name = %s\n", f->name, item->file_as)); | |
152 canonicalize_filename(f->name); | |
153 | |
154 if (d_ptr->child != NULL) { | |
155 d_ptr = d_ptr->child; | |
156 skip_child = 1; | |
157 } else { | |
158 DEBUG_MAIN(("main: Folder has NO children. Creating directory, and closing again\n")); | |
159 // printf("\tNo items to process in folder \"%s\", should have been %i\n", f->dname, f->stored_count); | |
160 head = f->next; | |
161 if (f->output != NULL) | |
162 fclose(f->output); | |
163 free(f->dname); | |
164 free(f->name); | |
165 free(f); | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
166 |
3 | 167 f = head; |
168 } | |
169 _pst_freeItem(item); | |
170 item = NULL; // just for the odd situations! | |
171 goto check_parent; | |
172 // }}}3 | |
173 } else if (item->contact != NULL) { | |
174 // Process Contact item {{{3 | |
175 if (f->type != PST_TYPE_CONTACT) { | |
176 DEBUG_MAIN(("main: I have a contact, but the folder isn't a contacts folder. " | |
177 "Will process anyway\n")); | |
178 } | |
179 if (item->type != PST_TYPE_CONTACT) { | |
180 DEBUG_MAIN(("main: I have an item that has contact info, but doesn't say that" | |
181 " it is a contact. Type is \"%s\"\n", item->ascii_type)); | |
182 DEBUG_MAIN(("main: Processing anyway\n")); | |
183 } | |
184 | |
185 printf("Contact"); | |
186 if (item->contact->fullname != NULL) | |
187 printf("\t%s", rfc2426_escape(item->contact->fullname)); | |
188 printf("\n"); | |
189 // }}}3 | |
190 } else if (item->email != NULL && | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
191 (item->type == PST_TYPE_NOTE || item->type == PST_TYPE_REPORT)) { |
3 | 192 // Process Email item {{{3 |
193 printf("Email"); | |
194 if (item->email->outlook_sender_name != NULL) | |
195 printf("\tFrom: %s", item->email->outlook_sender_name); | |
196 if (item->email->subject->subj != NULL) | |
197 printf("\tSubject: %s", item->email->subject->subj); | |
198 printf("\n"); | |
199 // }}}3 | |
200 } else if (item->type == PST_TYPE_JOURNAL) { | |
201 // Process Journal item {{{3 | |
202 if (f->type != PST_TYPE_JOURNAL) { | |
203 DEBUG_MAIN(("main: I have a journal entry, but folder isn't specified as a journal type. Processing...\n")); | |
204 } | |
205 | |
206 printf("Journal\t%s\n", rfc2426_escape(item->email->subject->subj)); | |
207 // }}}3 | |
208 } else if (item->type == PST_TYPE_APPOINTMENT) { | |
209 // Process Calendar Appointment item {{{3 | |
210 // deal with Calendar appointments | |
211 | |
212 DEBUG_MAIN(("main: Processing Appointment Entry\n")); | |
213 if (f->type != PST_TYPE_APPOINTMENT) { | |
214 DEBUG_MAIN(("main: I have an appointment, but folder isn't specified as an appointment type. Processing...\n")); | |
215 } | |
216 | |
217 printf("Appointment"); | |
218 if (item->email != NULL && item->email->subject != NULL) | |
219 printf("\tSUMMARY: %s", rfc2426_escape(item->email->subject->subj)); | |
220 if (item->appointment != NULL && item->appointment->start != NULL) | |
221 printf("\tSTART: %s", rfc2445_datetime_format(item->appointment->start)); | |
222 printf("\n"); | |
223 | |
224 // }}}3 | |
225 } else { | |
226 f->skip_count++; | |
227 DEBUG_MAIN(("main: Unknown item type. %i. Ascii1=\"%s\"\n", \ | |
228 item->type, item->ascii_type)); | |
229 } | |
230 } else { | |
231 f->skip_count++; | |
232 DEBUG_MAIN(("main: A NULL item was seen\n")); | |
233 } | |
234 | |
235 check_parent: | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
236 // _pst_freeItem(item); |
3 | 237 while (!skip_child && d_ptr->next == NULL && d_ptr->parent != NULL) { |
238 DEBUG_MAIN(("main: Going to Parent\n")); | |
239 head = f->next; | |
240 if (f->output != NULL) | |
241 fclose(f->output); | |
242 DEBUG_MAIN(("main: Email Count for folder %s is %i\n", f->dname, f->email_count)); | |
243 /* | |
244 printf("\t\"%s\" - %i items done, skipped %i, should have been %i\n", \ | |
245 f->dname, f->email_count, f->skip_count, f->stored_count); | |
246 */ | |
247 | |
248 free(f->name); | |
249 free(f->dname); | |
250 free(f); | |
251 f = head; | |
252 if (head == NULL) { //we can't go higher. Must be at start? | |
253 DEBUG_MAIN(("main: We are now trying to go above the highest level. We must be finished\n")); | |
254 break; //from main while loop | |
255 } | |
256 d_ptr = d_ptr->parent; | |
257 skip_child = 0; | |
258 } | |
259 | |
260 if (item != NULL) { | |
261 DEBUG_MAIN(("main: Freeing memory used by item\n")); | |
262 _pst_freeItem(item); | |
263 item = NULL; | |
264 } | |
265 | |
266 if (!skip_child) | |
267 d_ptr = d_ptr->next; | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
268 else |
3 | 269 skip_child = 0; |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
270 |
3 | 271 if (d_ptr == NULL) { DEBUG_MAIN(("main: d_ptr is now NULL\n")); } |
272 | |
273 // }}}2 | |
274 } // end while(d_ptr != NULL) | |
275 DEBUG_MAIN(("main: Finished.\n")); | |
276 | |
277 // Cleanup {{{2 | |
278 pst_close(&pstfile); | |
279 while (f != NULL) { | |
280 if (f->output != NULL) | |
281 fclose(f->output); | |
282 free(f->name); | |
283 free(f->dname); | |
284 | |
285 head = f->next; | |
286 free(f); | |
287 f = head; | |
288 } | |
289 DEBUG_RET(); | |
290 // }}}2 | |
291 | |
292 return 0; | |
293 } | |
294 // }}}1 | |
295 // void canonicalize_filename(char *fname) {{{1 | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
296 // This function will make sure that a filename is in cannonical form. That |
3 | 297 // is, it will replace any slashes, backslashes, or colons with underscores. |
298 void canonicalize_filename(char *fname) { | |
299 DEBUG_ENT("canonicalize_filename"); | |
300 if (fname == NULL) { | |
301 DEBUG_RET(); | |
302 return; | |
303 } | |
304 while ((fname = strpbrk(fname, "/\\:")) != NULL) | |
305 *fname = '_'; | |
306 DEBUG_RET(); | |
307 } | |
308 // }}}1 | |
309 // int chr_count(char *str, char x) {{{1 | |
310 int chr_count(char *str, char x) { | |
311 int r = 0; | |
312 if (str == NULL) return 0; | |
313 while (*str != '\0') { | |
314 if (*str == x) | |
315 r++; | |
316 str++; | |
317 } | |
318 return r; | |
319 } | |
320 // }}}1 | |
321 // void debug_print(char *fmt, ...) {{{1 | |
322 void debug_print(char *fmt, ...) { | |
323 // shamlessly stolen from minprintf() in K&R pg. 156 | |
324 va_list ap; | |
325 char *p, *sval; | |
326 void *pval; | |
327 int ival; | |
328 double dval; | |
329 FILE *fp = stderr; | |
330 | |
331 va_start(ap, fmt); | |
332 for(p = fmt; *p; p++) { | |
333 if (*p != '%') { | |
334 fputc(*p, fp); | |
335 continue; | |
336 } | |
337 switch (tolower(*++p)) { | |
338 case 'd': case 'i': | |
339 ival = va_arg(ap, int); | |
340 fprintf(fp, "%d", ival); | |
341 break; | |
342 case 'f': | |
343 dval = va_arg(ap, double); | |
344 fprintf(fp, "%f", dval); | |
345 break; | |
346 case 's': | |
347 for (sval = va_arg(ap, char *); *sval; ++sval) | |
348 fputc(*sval, fp); | |
349 break; | |
350 case 'p': | |
351 pval = va_arg(ap, void *); | |
352 fprintf(fp, "%p", pval); | |
353 break; | |
354 case 'x': | |
355 ival = va_arg(ap, int); | |
356 fprintf(fp, "%#010x", ival); | |
357 break; | |
358 default: | |
359 fputc(*p, fp); | |
360 break; | |
361 } | |
362 } | |
363 va_end(ap); | |
364 } | |
365 // }}}1 | |
366 // char *rfc2426_escape(char *str) {{{1 | |
367 char *rfc2426_escape(char *str) { | |
368 static char *buf = NULL; | |
369 char *a, *b; | |
370 int y, z; | |
371 | |
372 DEBUG_ENT("rfc2426_escape"); | |
373 if (str == NULL) { | |
374 DEBUG_RET(); | |
375 return NULL; | |
376 } | |
377 | |
378 // calculate space required to escape all the commas, semi-colons, backslashes, and newlines | |
379 y = chr_count(str, ',') + chr_count(str, '\\') + chr_count(str, ';') + chr_count(str, '\n'); | |
380 // count how many carriage-returns we have to skip | |
381 z = chr_count(str, '\r'); | |
382 | |
383 if (y == 0 && z == 0) { | |
384 // there isn't any work required | |
385 DEBUG_RET(); | |
386 return str; | |
387 } | |
388 | |
389 buf = (char *) realloc( buf, strlen(str) + y - z + 1 ); | |
390 for (a = str, b = buf; *a != '\0'; ++a, ++b) | |
391 switch (*a) { | |
392 case ',' : case '\\': case ';' : case '\n': | |
393 // insert backslash to escape | |
394 *(b++) = '\\'; | |
395 *b = *a; | |
396 break; | |
397 case '\r': | |
398 // skip | |
399 break; | |
400 default: | |
401 *b = *a; | |
402 } | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
403 *b = '\0'; // NUL-terminate the string |
3 | 404 |
405 DEBUG_RET(); | |
406 return buf; | |
10
a818f3c2e589
fix tree walk, we now use the item counts in the node blocks
carl
parents:
3
diff
changeset
|
407 } |
3 | 408 // }}}1 |
409 // char *rfc2445_datetime_format(FILETIME *ft) {{{1 | |
410 char *rfc2445_datetime_format(FILETIME *ft) { | |
411 static char* buffer = NULL; | |
412 struct tm *stm = NULL; | |
413 DEBUG_ENT("rfc2445_datetime_format"); | |
414 if (buffer == NULL) | |
415 buffer = malloc(30); // should be enough | |
416 stm = fileTimeToStructTM(ft); | |
417 if (strftime(buffer, 30, "%Y%m%dT%H%M%SZ", stm)==0) { | |
418 DEBUG_INFO(("Problem occured formatting date\n")); | |
419 } | |
420 DEBUG_RET(); | |
421 return buffer; | |
422 } | |
423 // }}}1 | |
424 | |
425 // vim:sw=4 ts=4: | |
426 // vim600: set foldlevel=0 foldmethod=marker: |