Mercurial > libpst
comparison src/readpst.c @ 239:aa50c23a6935
patch from Lee Ayres to add file name extensions in separate mode; allow mixed items types in a folder in separate mode
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Mon, 14 Sep 2009 10:56:39 -0700 |
parents | 410b6422d65b |
children | 67b24d6a45d6 |
comparison
equal
deleted
inserted
replaced
238:410b6422d65b | 239:aa50c23a6935 |
---|---|
9 #include "lzfu.h" | 9 #include "lzfu.h" |
10 | 10 |
11 #define OUTPUT_TEMPLATE "%s" | 11 #define OUTPUT_TEMPLATE "%s" |
12 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" | 12 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" |
13 #define KMAIL_INDEX ".%s.index" | 13 #define KMAIL_INDEX ".%s.index" |
14 #define SEP_MAIL_FILE_TEMPLATE "%i" | 14 #define SEP_MAIL_FILE_TEMPLATE "%i%s" |
15 | 15 |
16 // max size of the c_time char*. It will store the date of the email | 16 // max size of the c_time char*. It will store the date of the email |
17 #define C_TIME_SIZE 500 | 17 #define C_TIME_SIZE 500 |
18 | 18 |
19 struct file_ll { | 19 struct file_ll { |
37 int close_kmail_dir(); | 37 int close_kmail_dir(); |
38 char* mk_recurse_dir(char* dir, int32_t folder_type); | 38 char* mk_recurse_dir(char* dir, int32_t folder_type); |
39 int close_recurse_dir(); | 39 int close_recurse_dir(); |
40 char* mk_separate_dir(char *dir); | 40 char* mk_separate_dir(char *dir); |
41 int close_separate_dir(); | 41 int close_separate_dir(); |
42 int mk_separate_file(struct file_ll *f); | 42 int mk_separate_file(struct file_ll *f, char *extension); |
43 char* my_stristr(char *haystack, char *needle); | 43 char* my_stristr(char *haystack, char *needle); |
44 void check_filename(char *fname); | 44 void check_filename(char *fname); |
45 void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst); | 45 void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst); |
46 void write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, char** extra_mime_headers); | 46 void write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, char** extra_mime_headers); |
47 void write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst); | 47 void write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst); |
79 // contains only one file which stores the emails in mbox format. | 79 // contains only one file which stores the emails in mbox format. |
80 #define MODE_RECURSE 2 | 80 #define MODE_RECURSE 2 |
81 | 81 |
82 // separate mode creates the same directory structure as recurse. The emails are stored in | 82 // separate mode creates the same directory structure as recurse. The emails are stored in |
83 // separate files, numbering from 1 upward. Attachments belonging to the emails are | 83 // separate files, numbering from 1 upward. Attachments belonging to the emails are |
84 // saved as email_no-filename (e.g. 1-samplefile.doc or 000001-Attachment2.zip) | 84 // saved as email_no-filename (e.g. 1-samplefile.doc or 1-Attachment2.zip) |
85 #define MODE_SEPARATE 3 | 85 #define MODE_SEPARATE 3 |
86 | 86 |
87 | 87 |
88 // Output Normal just prints the standard information about what is going on | 88 // Output Normal just prints the standard information about what is going on |
89 #define OUTPUT_NORMAL 0 | 89 #define OUTPUT_NORMAL 0 |
116 #define RTF_ATTACH_TYPE "application/rtf" | 116 #define RTF_ATTACH_TYPE "application/rtf" |
117 | 117 |
118 // global settings | 118 // global settings |
119 int mode = MODE_NORMAL; | 119 int mode = MODE_NORMAL; |
120 int mode_MH = 0; // a submode of MODE_SEPARATE | 120 int mode_MH = 0; // a submode of MODE_SEPARATE |
121 int mode_EX = 0; // a submode of MODE_SEPARATE | |
121 int mode_thunder = 0; // a submode of MODE_RECURSE | 122 int mode_thunder = 0; // a submode of MODE_RECURSE |
122 int output_mode = OUTPUT_NORMAL; | 123 int output_mode = OUTPUT_NORMAL; |
123 int contact_mode = CMODE_VCARD; | 124 int contact_mode = CMODE_VCARD; |
124 int deleted_mode = DMODE_EXCLUDE; | 125 int deleted_mode = DMODE_EXCLUDE; |
125 int output_type_mode = 0xff; // Default to all. | 126 int output_type_mode = 0xff; // Default to all. |
126 int contact_mode_specified = 0; | 127 int contact_mode_specified = 0; |
127 int overwrite = 0; | 128 int overwrite = 0; |
128 int save_rtf_body = 1; | 129 int save_rtf_body = 1; |
130 int file_name_len = 10; // enough room for MODE_SPEARATE file name | |
129 pst_file pstfile; | 131 pst_file pstfile; |
130 regex_t meta_charset_pattern; | 132 regex_t meta_charset_pattern; |
131 | 133 |
132 int number_processors = 1; // number of cpus we have | 134 int number_processors = 1; // number of cpus we have |
133 int max_children = 0; // based on number of cpus and command line args | 135 int max_children = 0; // based on number of cpus and command line args |
279 ff.skip_count++; | 281 ff.skip_count++; |
280 DEBUG_INFO(("skipping contact: not in output type list\n")); | 282 DEBUG_INFO(("skipping contact: not in output type list\n")); |
281 } | 283 } |
282 else { | 284 else { |
283 if (!ff.type) ff.type = item->type; | 285 if (!ff.type) ff.type = item->type; |
284 if (ff.type != PST_TYPE_CONTACT) { | 286 if ((ff.type != PST_TYPE_CONTACT) && (mode != MODE_SEPARATE)) { |
285 ff.skip_count++; | 287 ff.skip_count++; |
286 DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type)); | 288 DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type)); |
287 } | 289 } |
288 else { | 290 else { |
289 ff.item_count++; | 291 ff.item_count++; |
290 if (mode == MODE_SEPARATE) mk_separate_file(&ff); | 292 if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".vcf" : ""); |
291 if (contact_mode == CMODE_VCARD) { | 293 if (contact_mode == CMODE_VCARD) { |
292 pst_convert_utf8_null(item, &item->comment); | 294 pst_convert_utf8_null(item, &item->comment); |
293 write_vcard(ff.output, item, item->contact, item->comment.str); | 295 write_vcard(ff.output, item, item->contact, item->comment.str); |
294 } | 296 } |
295 else { | 297 else { |
306 ff.skip_count++; | 308 ff.skip_count++; |
307 DEBUG_INFO(("skipping email: not in output type list\n")); | 309 DEBUG_INFO(("skipping email: not in output type list\n")); |
308 } | 310 } |
309 else { | 311 else { |
310 if (!ff.type) ff.type = item->type; | 312 if (!ff.type) ff.type = item->type; |
311 if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) { | 313 if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT) && (mode != MODE_SEPARATE)) { |
312 ff.skip_count++; | 314 ff.skip_count++; |
313 DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type)); | 315 DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type)); |
314 } | 316 } |
315 else { | 317 else { |
316 char *extra_mime_headers = NULL; | 318 char *extra_mime_headers = NULL; |
317 ff.item_count++; | 319 ff.item_count++; |
318 if (mode == MODE_SEPARATE) mk_separate_file(&ff); | 320 if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".eml" : ""); |
319 write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers); | 321 write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers); |
320 } | 322 } |
321 } | 323 } |
322 | 324 |
323 } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) { | 325 } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) { |
326 ff.skip_count++; | 328 ff.skip_count++; |
327 DEBUG_INFO(("skipping journal entry: not in output type list\n")); | 329 DEBUG_INFO(("skipping journal entry: not in output type list\n")); |
328 } | 330 } |
329 else { | 331 else { |
330 if (!ff.type) ff.type = item->type; | 332 if (!ff.type) ff.type = item->type; |
331 if (ff.type != PST_TYPE_JOURNAL) { | 333 if ((ff.type != PST_TYPE_JOURNAL) && (mode != MODE_SEPARATE)) { |
332 ff.skip_count++; | 334 ff.skip_count++; |
333 DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type)); | 335 DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type)); |
334 } | 336 } |
335 else { | 337 else { |
336 ff.item_count++; | 338 ff.item_count++; |
337 if (mode == MODE_SEPARATE) mk_separate_file(&ff); | 339 if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : ""); |
338 write_journal(ff.output, item); | 340 write_journal(ff.output, item); |
339 fprintf(ff.output, "\n"); | 341 fprintf(ff.output, "\n"); |
340 } | 342 } |
341 } | 343 } |
342 | 344 |
346 ff.skip_count++; | 348 ff.skip_count++; |
347 DEBUG_INFO(("skipping appointment: not in output type list\n")); | 349 DEBUG_INFO(("skipping appointment: not in output type list\n")); |
348 } | 350 } |
349 else { | 351 else { |
350 if (!ff.type) ff.type = item->type; | 352 if (!ff.type) ff.type = item->type; |
351 if (ff.type != PST_TYPE_APPOINTMENT) { | 353 if ((ff.type != PST_TYPE_APPOINTMENT) && (mode != MODE_SEPARATE)) { |
352 ff.skip_count++; | 354 ff.skip_count++; |
353 DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type)); | 355 DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type)); |
354 } | 356 } |
355 else { | 357 else { |
356 ff.item_count++; | 358 ff.item_count++; |
357 if (mode == MODE_SEPARATE) mk_separate_file(&ff); | 359 if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : ""); |
358 write_appointment(ff.output, item, 0); | 360 write_schedule_part_data(ff.output, item, NULL, NULL); |
359 fprintf(ff.output, "\n"); | 361 fprintf(ff.output, "\n"); |
360 } | 362 } |
361 } | 363 } |
362 | 364 |
363 } else if (item->message_store) { | 365 } else if (item->message_store) { |
394 printf("cannot compile regex pattern to find content charset in html bodies\n"); | 396 printf("cannot compile regex pattern to find content charset in html bodies\n"); |
395 exit(3); | 397 exit(3); |
396 } | 398 } |
397 | 399 |
398 // command-line option handling | 400 // command-line option handling |
399 while ((c = getopt(argc, argv, "bc:Dd:hj:kMo:qrSt:uVw"))!= -1) { | 401 while ((c = getopt(argc, argv, "bc:Dd:ehj:kMo:qrSt:uVw"))!= -1) { |
400 switch (c) { | 402 switch (c) { |
401 case 'b': | 403 case 'b': |
402 save_rtf_body = 0; | 404 save_rtf_body = 0; |
403 break; | 405 break; |
404 case 'c': | 406 case 'c': |
433 mode = MODE_KMAIL; | 435 mode = MODE_KMAIL; |
434 break; | 436 break; |
435 case 'M': | 437 case 'M': |
436 mode = MODE_SEPARATE; | 438 mode = MODE_SEPARATE; |
437 mode_MH = 1; | 439 mode_MH = 1; |
440 mode_EX = 0; | |
441 break; | |
442 case 'e': | |
443 mode = MODE_SEPARATE; | |
444 mode_MH = 1; | |
445 mode_EX = 1; | |
446 file_name_len = 14; | |
438 break; | 447 break; |
439 case 'o': | 448 case 'o': |
440 output_dir = optarg; | 449 output_dir = optarg; |
441 break; | 450 break; |
442 case 'q': | 451 case 'q': |
447 mode_thunder = 0; | 456 mode_thunder = 0; |
448 break; | 457 break; |
449 case 'S': | 458 case 'S': |
450 mode = MODE_SEPARATE; | 459 mode = MODE_SEPARATE; |
451 mode_MH = 0; | 460 mode_MH = 0; |
461 mode_EX = 0; | |
452 break; | 462 break; |
453 case 't': | 463 case 't': |
454 // email, appointment, contact, other | 464 // email, appointment, contact, other |
455 if (!optarg) { | 465 if (!optarg) { |
456 usage(); | 466 usage(); |
640 version(); | 650 version(); |
641 printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); | 651 printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); |
642 printf("OPTIONS:\n"); | 652 printf("OPTIONS:\n"); |
643 printf("\t-V\t- Version. Display program version\n"); | 653 printf("\t-V\t- Version. Display program version\n"); |
644 printf("\t-D\t- Include deleted items in output\n"); | 654 printf("\t-D\t- Include deleted items in output\n"); |
645 printf("\t-M\t- MH. Write emails in the MH format\n"); | 655 printf("\t-M\t- Write emails in the MH (rfc822) format\n"); |
646 printf("\t-S\t- Separate. Write emails in the separate format\n"); | 656 printf("\t-S\t- Separate. Write emails in the separate format\n"); |
647 printf("\t-b\t- Don't save RTF-Body attachments\n"); | 657 printf("\t-b\t- Don't save RTF-Body attachments\n"); |
648 printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); | 658 printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); |
649 printf("\t-d <filename> \t- Debug to file.\n"); | 659 printf("\t-d <filename> \t- Debug to file.\n"); |
660 printf("\t-e\t- As with -M, but include extensions on output files\n"); | |
650 printf("\t-h\t- Help. This screen\n"); | 661 printf("\t-h\t- Help. This screen\n"); |
651 printf("\t-j <integer>\t- Number of parallel jobs to run\n"); | 662 printf("\t-j <integer>\t- Number of parallel jobs to run\n"); |
652 printf("\t-k\t- KMail. Output in kmail format\n"); | 663 printf("\t-k\t- KMail. Output in kmail format\n"); |
653 printf("\t-o <dirname>\t- Output directory to write files to. CWD is changed *after* opening pst file\n"); | 664 printf("\t-o <dirname>\t- Output directory to write files to. CWD is changed *after* opening pst file\n"); |
654 printf("\t-q\t- Quiet. Only print error messages\n"); | 665 printf("\t-q\t- Quiet. Only print error messages\n"); |
796 DEBUG_ENT("mk_separate_dir"); | 807 DEBUG_ENT("mk_separate_dir"); |
797 do { | 808 do { |
798 if (y == 0) | 809 if (y == 0) |
799 snprintf(dir_name, dirsize, "%s", dir); | 810 snprintf(dir_name, dirsize, "%s", dir); |
800 else | 811 else |
801 snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y); // enough for 9 digits allocated above | 812 snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y, ""); // enough for 9 digits allocated above |
802 | 813 |
803 check_filename(dir_name); | 814 check_filename(dir_name); |
804 DEBUG_INFO(("about to try creating %s\n", dir_name)); | 815 DEBUG_INFO(("about to try creating %s\n", dir_name)); |
805 if (D_MKDIR(dir_name)) { | 816 if (D_MKDIR(dir_name)) { |
806 if (errno != EEXIST) { // if there is an error, and it doesn't already exist | 817 if (errno != EEXIST) { // if there is an error, and it doesn't already exist |
856 DEBUG_RET(); | 867 DEBUG_RET(); |
857 return 0; | 868 return 0; |
858 } | 869 } |
859 | 870 |
860 | 871 |
861 int mk_separate_file(struct file_ll *f) { | 872 int mk_separate_file(struct file_ll *f, char *extension) { |
862 const int name_offset = 1; | 873 const int name_offset = 1; |
863 DEBUG_ENT("mk_separate_file"); | 874 DEBUG_ENT("mk_separate_file"); |
864 DEBUG_INFO(("opening next file to save email\n")); | 875 DEBUG_INFO(("opening next file to save email\n")); |
865 if (f->item_count > 999999999) { // bigger than nine 9's | 876 if (f->item_count > 999999999) { // bigger than nine 9's |
866 DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n")); | 877 DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n")); |
867 } | 878 } |
868 sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset); | 879 sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset, extension); |
869 if (f->output) fclose(f->output); | 880 if (f->output) fclose(f->output); |
870 f->output = NULL; | 881 f->output = NULL; |
871 check_filename(f->name); | 882 check_filename(f->name); |
872 if (!(f->output = fopen(f->name, "w"))) { | 883 if (!(f->output = fopen(f->name, "w"))) { |
873 DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name)); | 884 DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name)); |
1263 void write_schedule_part_data(FILE* f_output, pst_item* item, const char* sender, const char* method) | 1274 void write_schedule_part_data(FILE* f_output, pst_item* item, const char* sender, const char* method) |
1264 { | 1275 { |
1265 fprintf(f_output, "BEGIN:VCALENDAR\n"); | 1276 fprintf(f_output, "BEGIN:VCALENDAR\n"); |
1266 fprintf(f_output, "VERSION:2.0\n"); | 1277 fprintf(f_output, "VERSION:2.0\n"); |
1267 fprintf(f_output, "PRODID:LibPST v%s\n", VERSION); | 1278 fprintf(f_output, "PRODID:LibPST v%s\n", VERSION); |
1268 fprintf(f_output, "METHOD:%s\n", method); | 1279 if (method) fprintf(f_output, "METHOD:%s\n", method); |
1269 fprintf(f_output, "BEGIN:VEVENT\n"); | 1280 fprintf(f_output, "BEGIN:VEVENT\n"); |
1270 fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender); | 1281 if (sender) fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender); |
1271 write_appointment(f_output, item, 1); | 1282 write_appointment(f_output, item, 1); |
1272 fprintf(f_output, "END:VCALENDAR\n"); | 1283 fprintf(f_output, "END:VCALENDAR\n"); |
1273 } | 1284 } |
1274 | 1285 |
1275 | 1286 |
1910 fclose(type_file); | 1921 fclose(type_file); |
1911 } | 1922 } |
1912 } else if (mode == MODE_SEPARATE) { | 1923 } else if (mode == MODE_SEPARATE) { |
1913 // do similar stuff to recurse here. | 1924 // do similar stuff to recurse here. |
1914 mk_separate_dir(item->file_as.str); | 1925 mk_separate_dir(item->file_as.str); |
1915 f->name = (char*) pst_malloc(10); | 1926 f->name = (char*) pst_malloc(file_name_len); |
1916 memset(f->name, 0, 10); | 1927 memset(f->name, 0, file_name_len); |
1917 } else { | 1928 } else { |
1918 f->name = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1); | 1929 f->name = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1); |
1919 sprintf(f->name, OUTPUT_TEMPLATE, item->file_as.str); | 1930 sprintf(f->name, OUTPUT_TEMPLATE, item->file_as.str); |
1920 } | 1931 } |
1921 | 1932 |