comparison src/readpst.c @ 239:aa50c23a6935

patch from Lee Ayres to add file name extensions in separate mode; allow mixed items types in a folder in separate mode
author Carl Byington <carl@five-ten-sg.com>
date Mon, 14 Sep 2009 10:56:39 -0700
parents 410b6422d65b
children 67b24d6a45d6
comparison
equal deleted inserted replaced
238:410b6422d65b 239:aa50c23a6935
9 #include "lzfu.h" 9 #include "lzfu.h"
10 10
11 #define OUTPUT_TEMPLATE "%s" 11 #define OUTPUT_TEMPLATE "%s"
12 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" 12 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory"
13 #define KMAIL_INDEX ".%s.index" 13 #define KMAIL_INDEX ".%s.index"
14 #define SEP_MAIL_FILE_TEMPLATE "%i" 14 #define SEP_MAIL_FILE_TEMPLATE "%i%s"
15 15
16 // max size of the c_time char*. It will store the date of the email 16 // max size of the c_time char*. It will store the date of the email
17 #define C_TIME_SIZE 500 17 #define C_TIME_SIZE 500
18 18
19 struct file_ll { 19 struct file_ll {
37 int close_kmail_dir(); 37 int close_kmail_dir();
38 char* mk_recurse_dir(char* dir, int32_t folder_type); 38 char* mk_recurse_dir(char* dir, int32_t folder_type);
39 int close_recurse_dir(); 39 int close_recurse_dir();
40 char* mk_separate_dir(char *dir); 40 char* mk_separate_dir(char *dir);
41 int close_separate_dir(); 41 int close_separate_dir();
42 int mk_separate_file(struct file_ll *f); 42 int mk_separate_file(struct file_ll *f, char *extension);
43 char* my_stristr(char *haystack, char *needle); 43 char* my_stristr(char *haystack, char *needle);
44 void check_filename(char *fname); 44 void check_filename(char *fname);
45 void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst); 45 void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst);
46 void write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, char** extra_mime_headers); 46 void write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, char** extra_mime_headers);
47 void write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst); 47 void write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst);
79 // contains only one file which stores the emails in mbox format. 79 // contains only one file which stores the emails in mbox format.
80 #define MODE_RECURSE 2 80 #define MODE_RECURSE 2
81 81
82 // separate mode creates the same directory structure as recurse. The emails are stored in 82 // separate mode creates the same directory structure as recurse. The emails are stored in
83 // separate files, numbering from 1 upward. Attachments belonging to the emails are 83 // separate files, numbering from 1 upward. Attachments belonging to the emails are
84 // saved as email_no-filename (e.g. 1-samplefile.doc or 000001-Attachment2.zip) 84 // saved as email_no-filename (e.g. 1-samplefile.doc or 1-Attachment2.zip)
85 #define MODE_SEPARATE 3 85 #define MODE_SEPARATE 3
86 86
87 87
88 // Output Normal just prints the standard information about what is going on 88 // Output Normal just prints the standard information about what is going on
89 #define OUTPUT_NORMAL 0 89 #define OUTPUT_NORMAL 0
116 #define RTF_ATTACH_TYPE "application/rtf" 116 #define RTF_ATTACH_TYPE "application/rtf"
117 117
118 // global settings 118 // global settings
119 int mode = MODE_NORMAL; 119 int mode = MODE_NORMAL;
120 int mode_MH = 0; // a submode of MODE_SEPARATE 120 int mode_MH = 0; // a submode of MODE_SEPARATE
121 int mode_EX = 0; // a submode of MODE_SEPARATE
121 int mode_thunder = 0; // a submode of MODE_RECURSE 122 int mode_thunder = 0; // a submode of MODE_RECURSE
122 int output_mode = OUTPUT_NORMAL; 123 int output_mode = OUTPUT_NORMAL;
123 int contact_mode = CMODE_VCARD; 124 int contact_mode = CMODE_VCARD;
124 int deleted_mode = DMODE_EXCLUDE; 125 int deleted_mode = DMODE_EXCLUDE;
125 int output_type_mode = 0xff; // Default to all. 126 int output_type_mode = 0xff; // Default to all.
126 int contact_mode_specified = 0; 127 int contact_mode_specified = 0;
127 int overwrite = 0; 128 int overwrite = 0;
128 int save_rtf_body = 1; 129 int save_rtf_body = 1;
130 int file_name_len = 10; // enough room for MODE_SPEARATE file name
129 pst_file pstfile; 131 pst_file pstfile;
130 regex_t meta_charset_pattern; 132 regex_t meta_charset_pattern;
131 133
132 int number_processors = 1; // number of cpus we have 134 int number_processors = 1; // number of cpus we have
133 int max_children = 0; // based on number of cpus and command line args 135 int max_children = 0; // based on number of cpus and command line args
279 ff.skip_count++; 281 ff.skip_count++;
280 DEBUG_INFO(("skipping contact: not in output type list\n")); 282 DEBUG_INFO(("skipping contact: not in output type list\n"));
281 } 283 }
282 else { 284 else {
283 if (!ff.type) ff.type = item->type; 285 if (!ff.type) ff.type = item->type;
284 if (ff.type != PST_TYPE_CONTACT) { 286 if ((ff.type != PST_TYPE_CONTACT) && (mode != MODE_SEPARATE)) {
285 ff.skip_count++; 287 ff.skip_count++;
286 DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type)); 288 DEBUG_INFO(("I have a contact, but the folder type %"PRIi32" isn't a contacts folder. Skipping it\n", ff.type));
287 } 289 }
288 else { 290 else {
289 ff.item_count++; 291 ff.item_count++;
290 if (mode == MODE_SEPARATE) mk_separate_file(&ff); 292 if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".vcf" : "");
291 if (contact_mode == CMODE_VCARD) { 293 if (contact_mode == CMODE_VCARD) {
292 pst_convert_utf8_null(item, &item->comment); 294 pst_convert_utf8_null(item, &item->comment);
293 write_vcard(ff.output, item, item->contact, item->comment.str); 295 write_vcard(ff.output, item, item->contact, item->comment.str);
294 } 296 }
295 else { 297 else {
306 ff.skip_count++; 308 ff.skip_count++;
307 DEBUG_INFO(("skipping email: not in output type list\n")); 309 DEBUG_INFO(("skipping email: not in output type list\n"));
308 } 310 }
309 else { 311 else {
310 if (!ff.type) ff.type = item->type; 312 if (!ff.type) ff.type = item->type;
311 if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) { 313 if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT) && (mode != MODE_SEPARATE)) {
312 ff.skip_count++; 314 ff.skip_count++;
313 DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type)); 315 DEBUG_INFO(("I have an email type %"PRIi32", but the folder type %"PRIi32" isn't an email folder. Skipping it\n", item->type, ff.type));
314 } 316 }
315 else { 317 else {
316 char *extra_mime_headers = NULL; 318 char *extra_mime_headers = NULL;
317 ff.item_count++; 319 ff.item_count++;
318 if (mode == MODE_SEPARATE) mk_separate_file(&ff); 320 if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".eml" : "");
319 write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers); 321 write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers);
320 } 322 }
321 } 323 }
322 324
323 } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) { 325 } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) {
326 ff.skip_count++; 328 ff.skip_count++;
327 DEBUG_INFO(("skipping journal entry: not in output type list\n")); 329 DEBUG_INFO(("skipping journal entry: not in output type list\n"));
328 } 330 }
329 else { 331 else {
330 if (!ff.type) ff.type = item->type; 332 if (!ff.type) ff.type = item->type;
331 if (ff.type != PST_TYPE_JOURNAL) { 333 if ((ff.type != PST_TYPE_JOURNAL) && (mode != MODE_SEPARATE)) {
332 ff.skip_count++; 334 ff.skip_count++;
333 DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type)); 335 DEBUG_INFO(("I have a journal entry, but the folder type %"PRIi32" isn't a journal folder. Skipping it\n", ff.type));
334 } 336 }
335 else { 337 else {
336 ff.item_count++; 338 ff.item_count++;
337 if (mode == MODE_SEPARATE) mk_separate_file(&ff); 339 if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "");
338 write_journal(ff.output, item); 340 write_journal(ff.output, item);
339 fprintf(ff.output, "\n"); 341 fprintf(ff.output, "\n");
340 } 342 }
341 } 343 }
342 344
346 ff.skip_count++; 348 ff.skip_count++;
347 DEBUG_INFO(("skipping appointment: not in output type list\n")); 349 DEBUG_INFO(("skipping appointment: not in output type list\n"));
348 } 350 }
349 else { 351 else {
350 if (!ff.type) ff.type = item->type; 352 if (!ff.type) ff.type = item->type;
351 if (ff.type != PST_TYPE_APPOINTMENT) { 353 if ((ff.type != PST_TYPE_APPOINTMENT) && (mode != MODE_SEPARATE)) {
352 ff.skip_count++; 354 ff.skip_count++;
353 DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type)); 355 DEBUG_INFO(("I have an appointment, but the folder type %"PRIi32" isn't an appointment folder. Skipping it\n", ff.type));
354 } 356 }
355 else { 357 else {
356 ff.item_count++; 358 ff.item_count++;
357 if (mode == MODE_SEPARATE) mk_separate_file(&ff); 359 if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "");
358 write_appointment(ff.output, item, 0); 360 write_schedule_part_data(ff.output, item, NULL, NULL);
359 fprintf(ff.output, "\n"); 361 fprintf(ff.output, "\n");
360 } 362 }
361 } 363 }
362 364
363 } else if (item->message_store) { 365 } else if (item->message_store) {
394 printf("cannot compile regex pattern to find content charset in html bodies\n"); 396 printf("cannot compile regex pattern to find content charset in html bodies\n");
395 exit(3); 397 exit(3);
396 } 398 }
397 399
398 // command-line option handling 400 // command-line option handling
399 while ((c = getopt(argc, argv, "bc:Dd:hj:kMo:qrSt:uVw"))!= -1) { 401 while ((c = getopt(argc, argv, "bc:Dd:ehj:kMo:qrSt:uVw"))!= -1) {
400 switch (c) { 402 switch (c) {
401 case 'b': 403 case 'b':
402 save_rtf_body = 0; 404 save_rtf_body = 0;
403 break; 405 break;
404 case 'c': 406 case 'c':
433 mode = MODE_KMAIL; 435 mode = MODE_KMAIL;
434 break; 436 break;
435 case 'M': 437 case 'M':
436 mode = MODE_SEPARATE; 438 mode = MODE_SEPARATE;
437 mode_MH = 1; 439 mode_MH = 1;
440 mode_EX = 0;
441 break;
442 case 'e':
443 mode = MODE_SEPARATE;
444 mode_MH = 1;
445 mode_EX = 1;
446 file_name_len = 14;
438 break; 447 break;
439 case 'o': 448 case 'o':
440 output_dir = optarg; 449 output_dir = optarg;
441 break; 450 break;
442 case 'q': 451 case 'q':
447 mode_thunder = 0; 456 mode_thunder = 0;
448 break; 457 break;
449 case 'S': 458 case 'S':
450 mode = MODE_SEPARATE; 459 mode = MODE_SEPARATE;
451 mode_MH = 0; 460 mode_MH = 0;
461 mode_EX = 0;
452 break; 462 break;
453 case 't': 463 case 't':
454 // email, appointment, contact, other 464 // email, appointment, contact, other
455 if (!optarg) { 465 if (!optarg) {
456 usage(); 466 usage();
640 version(); 650 version();
641 printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); 651 printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name);
642 printf("OPTIONS:\n"); 652 printf("OPTIONS:\n");
643 printf("\t-V\t- Version. Display program version\n"); 653 printf("\t-V\t- Version. Display program version\n");
644 printf("\t-D\t- Include deleted items in output\n"); 654 printf("\t-D\t- Include deleted items in output\n");
645 printf("\t-M\t- MH. Write emails in the MH format\n"); 655 printf("\t-M\t- Write emails in the MH (rfc822) format\n");
646 printf("\t-S\t- Separate. Write emails in the separate format\n"); 656 printf("\t-S\t- Separate. Write emails in the separate format\n");
647 printf("\t-b\t- Don't save RTF-Body attachments\n"); 657 printf("\t-b\t- Don't save RTF-Body attachments\n");
648 printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); 658 printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n");
649 printf("\t-d <filename> \t- Debug to file.\n"); 659 printf("\t-d <filename> \t- Debug to file.\n");
660 printf("\t-e\t- As with -M, but include extensions on output files\n");
650 printf("\t-h\t- Help. This screen\n"); 661 printf("\t-h\t- Help. This screen\n");
651 printf("\t-j <integer>\t- Number of parallel jobs to run\n"); 662 printf("\t-j <integer>\t- Number of parallel jobs to run\n");
652 printf("\t-k\t- KMail. Output in kmail format\n"); 663 printf("\t-k\t- KMail. Output in kmail format\n");
653 printf("\t-o <dirname>\t- Output directory to write files to. CWD is changed *after* opening pst file\n"); 664 printf("\t-o <dirname>\t- Output directory to write files to. CWD is changed *after* opening pst file\n");
654 printf("\t-q\t- Quiet. Only print error messages\n"); 665 printf("\t-q\t- Quiet. Only print error messages\n");
796 DEBUG_ENT("mk_separate_dir"); 807 DEBUG_ENT("mk_separate_dir");
797 do { 808 do {
798 if (y == 0) 809 if (y == 0)
799 snprintf(dir_name, dirsize, "%s", dir); 810 snprintf(dir_name, dirsize, "%s", dir);
800 else 811 else
801 snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y); // enough for 9 digits allocated above 812 snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y, ""); // enough for 9 digits allocated above
802 813
803 check_filename(dir_name); 814 check_filename(dir_name);
804 DEBUG_INFO(("about to try creating %s\n", dir_name)); 815 DEBUG_INFO(("about to try creating %s\n", dir_name));
805 if (D_MKDIR(dir_name)) { 816 if (D_MKDIR(dir_name)) {
806 if (errno != EEXIST) { // if there is an error, and it doesn't already exist 817 if (errno != EEXIST) { // if there is an error, and it doesn't already exist
856 DEBUG_RET(); 867 DEBUG_RET();
857 return 0; 868 return 0;
858 } 869 }
859 870
860 871
861 int mk_separate_file(struct file_ll *f) { 872 int mk_separate_file(struct file_ll *f, char *extension) {
862 const int name_offset = 1; 873 const int name_offset = 1;
863 DEBUG_ENT("mk_separate_file"); 874 DEBUG_ENT("mk_separate_file");
864 DEBUG_INFO(("opening next file to save email\n")); 875 DEBUG_INFO(("opening next file to save email\n"));
865 if (f->item_count > 999999999) { // bigger than nine 9's 876 if (f->item_count > 999999999) { // bigger than nine 9's
866 DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n")); 877 DIE(("mk_separate_file: The number of emails in this folder has become too high to handle\n"));
867 } 878 }
868 sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset); 879 sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count + name_offset, extension);
869 if (f->output) fclose(f->output); 880 if (f->output) fclose(f->output);
870 f->output = NULL; 881 f->output = NULL;
871 check_filename(f->name); 882 check_filename(f->name);
872 if (!(f->output = fopen(f->name, "w"))) { 883 if (!(f->output = fopen(f->name, "w"))) {
873 DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name)); 884 DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name));
1263 void write_schedule_part_data(FILE* f_output, pst_item* item, const char* sender, const char* method) 1274 void write_schedule_part_data(FILE* f_output, pst_item* item, const char* sender, const char* method)
1264 { 1275 {
1265 fprintf(f_output, "BEGIN:VCALENDAR\n"); 1276 fprintf(f_output, "BEGIN:VCALENDAR\n");
1266 fprintf(f_output, "VERSION:2.0\n"); 1277 fprintf(f_output, "VERSION:2.0\n");
1267 fprintf(f_output, "PRODID:LibPST v%s\n", VERSION); 1278 fprintf(f_output, "PRODID:LibPST v%s\n", VERSION);
1268 fprintf(f_output, "METHOD:%s\n", method); 1279 if (method) fprintf(f_output, "METHOD:%s\n", method);
1269 fprintf(f_output, "BEGIN:VEVENT\n"); 1280 fprintf(f_output, "BEGIN:VEVENT\n");
1270 fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender); 1281 if (sender) fprintf(f_output, "ORGANIZER;CN=\"%s\":MAILTO:%s\n", item->email->outlook_sender_name.str, sender);
1271 write_appointment(f_output, item, 1); 1282 write_appointment(f_output, item, 1);
1272 fprintf(f_output, "END:VCALENDAR\n"); 1283 fprintf(f_output, "END:VCALENDAR\n");
1273 } 1284 }
1274 1285
1275 1286
1910 fclose(type_file); 1921 fclose(type_file);
1911 } 1922 }
1912 } else if (mode == MODE_SEPARATE) { 1923 } else if (mode == MODE_SEPARATE) {
1913 // do similar stuff to recurse here. 1924 // do similar stuff to recurse here.
1914 mk_separate_dir(item->file_as.str); 1925 mk_separate_dir(item->file_as.str);
1915 f->name = (char*) pst_malloc(10); 1926 f->name = (char*) pst_malloc(file_name_len);
1916 memset(f->name, 0, 10); 1927 memset(f->name, 0, file_name_len);
1917 } else { 1928 } else {
1918 f->name = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1); 1929 f->name = (char*) pst_malloc(strlen(item->file_as.str)+strlen(OUTPUT_TEMPLATE)+1);
1919 sprintf(f->name, OUTPUT_TEMPLATE, item->file_as.str); 1930 sprintf(f->name, OUTPUT_TEMPLATE, item->file_as.str);
1920 } 1931 }
1921 1932