Mercurial > libpst
comparison src/readpst.c @ 121:8399ef94c11b
strip and regenerate all MIME headers to avoid duplicates.
do a better job of making unique MIME boundaries.
only use base64 coding when strictly necessary.
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Sun, 01 Feb 2009 19:21:47 -0800 |
parents | 6395ced2b8b2 |
children | bdb38b434c0a |
comparison
equal
deleted
inserted
replaced
120:6395ced2b8b2 | 121:8399ef94c11b |
---|---|
10 #include "timeconv.h" | 10 #include "timeconv.h" |
11 #include "lzfu.h" | 11 #include "lzfu.h" |
12 #include "libstrfunc.h" | 12 #include "libstrfunc.h" |
13 #include "vbuf.h" | 13 #include "vbuf.h" |
14 | 14 |
15 #ifdef HAVE_REGEX_H | |
16 #include <regex.h> | |
17 #endif | |
18 | |
15 #define OUTPUT_TEMPLATE "%s" | 19 #define OUTPUT_TEMPLATE "%s" |
16 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" | 20 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" |
17 #define KMAIL_INDEX ".%s.index" | 21 #define KMAIL_INDEX ".%s.index" |
18 #define SEP_MAIL_FILE_TEMPLATE "%i" /* "%09i" */ | 22 #define SEP_MAIL_FILE_TEMPLATE "%i" /* "%09i" */ |
19 | 23 |
30 int32_t type; | 34 int32_t type; |
31 }; | 35 }; |
32 | 36 |
33 void process(pst_item *outeritem, pst_desc_ll *d_ptr); | 37 void process(pst_item *outeritem, pst_desc_ll *d_ptr); |
34 void write_email_body(FILE *f, char *body); | 38 void write_email_body(FILE *f, char *body); |
35 char* removeCR (char *c); | 39 void removeCR(char *c); |
36 void usage(); | 40 void usage(); |
37 void version(); | 41 void version(); |
38 char* mk_kmail_dir(char*); | 42 char* mk_kmail_dir(char*); |
39 int close_kmail_dir(); | 43 int close_kmail_dir(); |
40 char* mk_recurse_dir(char*); | 44 char* mk_recurse_dir(char*); |
42 char* mk_separate_dir(char *dir); | 46 char* mk_separate_dir(char *dir); |
43 int close_separate_dir(); | 47 int close_separate_dir(); |
44 int mk_separate_file(struct file_ll *f); | 48 int mk_separate_file(struct file_ll *f); |
45 char* my_stristr(char *haystack, char *needle); | 49 char* my_stristr(char *haystack, char *needle); |
46 void check_filename(char *fname); | 50 void check_filename(char *fname); |
47 char* skip_header_prologue(char *headers); | |
48 void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst); | 51 void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst); |
49 void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char boundary[], pst_file* pst); | 52 void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char *boundary, pst_file* pst); |
53 void header_has_field(char *header, char *field, int *flag); | |
54 char* header_get_field(char *header, char *field); | |
55 void header_strip_field(char *header, char *field); | |
56 int test_base64(char *body); | |
57 void find_html_charset(char *html, char *charset, size_t charsetlen); | |
58 void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary); | |
50 void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf); | 59 void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf); |
51 void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]); | 60 void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]); |
52 void write_appointment(FILE* f_output, pst_item_appointment* appointment, | 61 void write_appointment(FILE* f_output, pst_item_appointment* appointment, |
53 pst_item_email* email, FILETIME* create_date, FILETIME* modify_date); | 62 pst_item_email* email, FILETIME* create_date, FILETIME* modify_date); |
54 void create_enter_dir(struct file_ll* f, pst_item *item); | 63 void create_enter_dir(struct file_ll* f, pst_item *item); |
109 int contact_mode = CMODE_VCARD; | 118 int contact_mode = CMODE_VCARD; |
110 int deleted_mode = DMODE_EXCLUDE; | 119 int deleted_mode = DMODE_EXCLUDE; |
111 int overwrite = 0; | 120 int overwrite = 0; |
112 int save_rtf_body = 1; | 121 int save_rtf_body = 1; |
113 pst_file pstfile; | 122 pst_file pstfile; |
114 | 123 regex_t meta_charset_pattern; |
115 | 124 |
116 | 125 |
117 void process(pst_item *outeritem, pst_desc_ll *d_ptr) | 126 void process(pst_item *outeritem, pst_desc_ll *d_ptr) |
118 { | 127 { |
119 struct file_ll ff; | 128 struct file_ll ff; |
226 char *d_log = NULL; | 235 char *d_log = NULL; |
227 int c,x; | 236 int c,x; |
228 char *temp = NULL; //temporary char pointer | 237 char *temp = NULL; //temporary char pointer |
229 prog_name = argv[0]; | 238 prog_name = argv[0]; |
230 | 239 |
240 time_t now = time(NULL); | |
241 srand((unsigned)now); | |
242 | |
243 if (regcomp(&meta_charset_pattern, "<meta[^>]*content=\"[^>]*charset=([^>\";]*)[\";]", REG_ICASE | REG_EXTENDED)) { | |
244 printf("cannot compile regex pattern\n"); | |
245 exit(3); | |
246 } | |
247 | |
231 // command-line option handling | 248 // command-line option handling |
232 while ((c = getopt(argc, argv, "bCc:Dd:hko:qrSMVw"))!= -1) { | 249 while ((c = getopt(argc, argv, "bCc:Dd:hko:qrSMVw"))!= -1) { |
233 switch (c) { | 250 switch (c) { |
234 case 'b': | 251 case 'b': |
235 save_rtf_body = 0; | 252 save_rtf_body = 0; |
375 | 392 |
376 process(item, d_ptr->child); // do the children of TOPF | 393 process(item, d_ptr->child); // do the children of TOPF |
377 pst_freeItem(item); | 394 pst_freeItem(item); |
378 pst_close(&pstfile); | 395 pst_close(&pstfile); |
379 DEBUG_RET(); | 396 DEBUG_RET(); |
397 regfree(&meta_charset_pattern); | |
380 return 0; | 398 return 0; |
381 } | 399 } |
382 | 400 |
383 | 401 |
384 void write_email_body(FILE *f, char *body) { | 402 void write_email_body(FILE *f, char *body) { |
397 pst_fwrite(body, strlen(body), 1, f); | 415 pst_fwrite(body, strlen(body), 1, f); |
398 DEBUG_RET(); | 416 DEBUG_RET(); |
399 } | 417 } |
400 | 418 |
401 | 419 |
402 char *removeCR (char *c) { | 420 void removeCR (char *c) { |
403 // converts /r/n to /n | 421 // converts \r\n to \n |
404 char *a, *b; | 422 char *a, *b; |
405 DEBUG_ENT("removeCR"); | 423 DEBUG_ENT("removeCR"); |
406 a = b = c; | 424 a = b = c; |
407 while (*a != '\0') { | 425 while (*a != '\0') { |
408 *b = *a; | 426 *b = *a; |
409 if (*a != '\r') | 427 if (*a != '\r') b++; |
410 b++; | |
411 a++; | 428 a++; |
412 } | 429 } |
413 *b = '\0'; | 430 *b = '\0'; |
414 DEBUG_RET(); | 431 DEBUG_RET(); |
415 return c; | |
416 } | 432 } |
417 | 433 |
418 | 434 |
419 void usage() { | 435 void usage() { |
420 DEBUG_ENT("usage"); | 436 DEBUG_ENT("usage"); |
640 | 656 |
641 | 657 |
642 char *my_stristr(char *haystack, char *needle) { | 658 char *my_stristr(char *haystack, char *needle) { |
643 // my_stristr varies from strstr in that its searches are case-insensitive | 659 // my_stristr varies from strstr in that its searches are case-insensitive |
644 char *x=haystack, *y=needle, *z = NULL; | 660 char *x=haystack, *y=needle, *z = NULL; |
645 DEBUG_ENT("my_stristr"); | |
646 if (!haystack || !needle) { | 661 if (!haystack || !needle) { |
647 DEBUG_RET(); | |
648 return NULL; | 662 return NULL; |
649 } | 663 } |
650 while (*y != '\0' && *x != '\0') { | 664 while (*y != '\0' && *x != '\0') { |
651 if (tolower(*y) == tolower(*x)) { | 665 if (tolower(*y) == tolower(*x)) { |
652 // move y on one | 666 // move y on one |
658 y = needle; // reset y to the beginning of the needle | 672 y = needle; // reset y to the beginning of the needle |
659 z = NULL; // reset the haystack storage point | 673 z = NULL; // reset the haystack storage point |
660 } | 674 } |
661 x++; // advance the search in the haystack | 675 x++; // advance the search in the haystack |
662 } | 676 } |
663 DEBUG_RET(); | |
664 // If the haystack ended before our search finished, it's not a match. | 677 // If the haystack ended before our search finished, it's not a match. |
665 if (*y != '\0') return NULL; | 678 if (*y != '\0') return NULL; |
666 return z; | 679 return z; |
667 } | 680 } |
668 | 681 |
677 while ((t = strpbrk(t, "/\\:"))) { | 690 while ((t = strpbrk(t, "/\\:"))) { |
678 // while there are characters in the second string that we don't want | 691 // while there are characters in the second string that we don't want |
679 *t = '_'; //replace them with an underscore | 692 *t = '_'; //replace them with an underscore |
680 } | 693 } |
681 DEBUG_RET(); | 694 DEBUG_RET(); |
682 } | |
683 | |
684 | |
685 // The sole purpose of this function is to bypass the pseudo-header prologue | |
686 // that Microsoft Outlook inserts at the beginning of the internet email | |
687 // headers for emails stored in their "Personal Folders" files. | |
688 char *skip_header_prologue(char *headers) { | |
689 const char *bad = "Microsoft Mail Internet Headers"; | |
690 if (strncmp(headers, bad, strlen(bad)) == 0) { | |
691 // Found the offensive header prologue | |
692 char *pc = strchr(headers, '\n'); | |
693 return pc + 1; | |
694 } | |
695 return headers; | |
696 } | 695 } |
697 | 696 |
698 | 697 |
699 void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst) | 698 void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst) |
700 { | 699 { |
741 if (temp) free(temp); | 740 if (temp) free(temp); |
742 DEBUG_RET(); | 741 DEBUG_RET(); |
743 } | 742 } |
744 | 743 |
745 | 744 |
746 void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char boundary[], pst_file* pst) | 745 void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char *boundary, pst_file* pst) |
747 { | 746 { |
747 char *attach_filename; | |
748 char *enc = NULL; // base64 encoded attachment | 748 char *enc = NULL; // base64 encoded attachment |
749 DEBUG_ENT("write_inline_attachment"); | 749 DEBUG_ENT("write_inline_attachment"); |
750 DEBUG_EMAIL(("Attachment Size is %i\n", current_attach->size)); | 750 DEBUG_EMAIL(("Attachment Size is %i\n", current_attach->size)); |
751 DEBUG_EMAIL(("Attachment Pointer is %p\n", current_attach->data)); | 751 DEBUG_EMAIL(("Attachment Pointer is %p\n", current_attach->data)); |
752 if (current_attach->data) { | 752 if (current_attach->data) { |
755 DEBUG_EMAIL(("ERROR base64_encode returned NULL. Must have failed\n")); | 755 DEBUG_EMAIL(("ERROR base64_encode returned NULL. Must have failed\n")); |
756 DEBUG_RET(); | 756 DEBUG_RET(); |
757 return; | 757 return; |
758 } | 758 } |
759 } | 759 } |
760 if (boundary) { | 760 |
761 char *attach_filename; | 761 fprintf(f_output, "\n--%s\n", boundary); |
762 fprintf(f_output, "\n--%s\n", boundary); | 762 if (!current_attach->mimetype) { |
763 if (!current_attach->mimetype) { | 763 fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT); |
764 fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT); | 764 } else { |
765 } else { | 765 fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype); |
766 fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype); | 766 } |
767 } | 767 fprintf(f_output, "Content-Transfer-Encoding: base64\n"); |
768 fprintf(f_output, "Content-Transfer-Encoding: base64\n"); | 768 // If there is a long filename (filename2) use that, otherwise |
769 // If there is a long filename (filename2) use that, otherwise | 769 // use the 8.3 filename (filename1) |
770 // use the 8.3 filename (filename1) | 770 if (current_attach->filename2) { |
771 if (current_attach->filename2) { | 771 attach_filename = current_attach->filename2; |
772 attach_filename = current_attach->filename2; | 772 } else { |
773 } else { | 773 attach_filename = current_attach->filename1; |
774 attach_filename = current_attach->filename1; | 774 } |
775 } | 775 if (!attach_filename) { |
776 if (!attach_filename) { | 776 fprintf(f_output, "Content-Disposition: inline\n\n"); |
777 fprintf(f_output, "Content-Disposition: inline\n\n"); | 777 } else { |
778 } else { | 778 fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach_filename); |
779 fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach_filename); | 779 } |
780 } | 780 |
781 } | |
782 if (current_attach->data) { | 781 if (current_attach->data) { |
783 pst_fwrite(enc, 1, strlen(enc), f_output); | 782 pst_fwrite(enc, 1, strlen(enc), f_output); |
784 DEBUG_EMAIL(("Attachment Size after encoding is %i\n", strlen(enc))); | 783 DEBUG_EMAIL(("Attachment Size after encoding is %i\n", strlen(enc))); |
785 free(enc); // caught by valgrind | 784 free(enc); // caught by valgrind |
786 } else { | 785 } else { |
789 fprintf(f_output, "\n\n"); | 788 fprintf(f_output, "\n\n"); |
790 DEBUG_RET(); | 789 DEBUG_RET(); |
791 } | 790 } |
792 | 791 |
793 | 792 |
793 void header_has_field(char *header, char *field, int *flag) | |
794 { | |
795 if (my_stristr(header, field) || (strncasecmp(header, field+1, strlen(field)-1) == 0)) { | |
796 DEBUG_EMAIL(("header block has %s header\n", field+1)); | |
797 *flag = 1; | |
798 } | |
799 } | |
800 | |
801 | |
802 char* header_get_field(char *header, char *field) | |
803 { | |
804 char *t = my_stristr(header, field); | |
805 if (!t && (strncasecmp(header, field+1, strlen(field)-1) == 0)) t = header; | |
806 return t; | |
807 } | |
808 | |
809 | |
810 void header_strip_field(char *header, char *field) | |
811 { | |
812 char *e; | |
813 char *t = header_get_field(header, field); | |
814 if (t) { | |
815 e = strchr(t+1, '\n'); | |
816 while (e && ((e[1] == ' ') || (e[1] == '\t'))) { | |
817 e = strchr(e+1, '\n'); | |
818 } | |
819 if (e) { | |
820 if (t == header) e++; // if *t is not \n, we don't want to keep the \n at *e either. | |
821 while (*e != '\0') { | |
822 *t = *e; | |
823 t++; | |
824 e++; | |
825 } | |
826 *t = '\0'; | |
827 } | |
828 else { | |
829 // this was the last header field, truncate the headers | |
830 *t = '\0'; | |
831 } | |
832 } | |
833 } | |
834 | |
835 | |
836 int test_base64(char *body) | |
837 { | |
838 int b64 = 0; | |
839 uint8_t *b = (uint8_t *)body; | |
840 while (*b != 0) { | |
841 if ((*b < 32) && (*b != 9) && (*b != 10)) { | |
842 DEBUG_EMAIL(("found base64 byte %d\n", (int)*b)); | |
843 DEBUG_HEXDUMPC(body, strlen(body), 0x10); | |
844 b64 = 1; | |
845 break; | |
846 } | |
847 b++; | |
848 } | |
849 return b64; | |
850 } | |
851 | |
852 | |
853 void find_html_charset(char *html, char *charset, size_t charsetlen) | |
854 { | |
855 const int index = 1; | |
856 const int nmatch = index+1; | |
857 regmatch_t match[nmatch]; | |
858 int rc = regexec(&meta_charset_pattern, html, nmatch, match, 0); | |
859 if (rc == 0) { | |
860 int s = match[index].rm_so; | |
861 int e = match[index].rm_eo; | |
862 if (s != -1) { | |
863 char save = html[e]; | |
864 html[e] = '\0'; | |
865 snprintf(charset, charsetlen, "%s", html+s); // copy the html charset | |
866 html[e] = save; | |
867 DEBUG_EMAIL(("charset %s from html text\n", charset)); | |
868 } | |
869 else { | |
870 DEBUG_EMAIL(("matching %d %d %d %d", match[0].rm_so, match[0].rm_eo, match[1].rm_so, match[1].rm_eo)); | |
871 DEBUG_HEXDUMPC(html, strlen(html), 0x10); | |
872 } | |
873 } | |
874 else { | |
875 DEBUG_EMAIL(("regexec returns %d\n", rc)); | |
876 } | |
877 } | |
878 | |
879 | |
880 void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary) | |
881 { | |
882 char *needfree = NULL; | |
883 if (strcasecmp("utf-8", charset)) { | |
884 // try to convert to the specified charset since it is not utf-8 | |
885 size_t rc; | |
886 DEBUG_EMAIL(("Convert %s utf-8 to %s\n", mime, charset)); | |
887 vbuf *newer = vballoc(2); | |
888 rc = vb_utf8to8bit(newer, body, strlen(body) + 1, charset); | |
889 if (rc == (size_t)-1) { | |
890 // unable to convert, maybe it is already in that character set | |
891 free(newer->b); | |
892 DEBUG_EMAIL(("Failed to convert %s utf-8 to %s\n", mime, charset)); | |
893 } | |
894 else { | |
895 needfree = body = newer->b; | |
896 } | |
897 free(newer); | |
898 } | |
899 removeCR(body); | |
900 int base64 = test_base64(body); | |
901 fprintf(f_output, "\n--%s\n", boundary); | |
902 fprintf(f_output, "Content-Type: %s; charset=\"%s\"\n", mime, charset); | |
903 if (base64) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); | |
904 fprintf(f_output, "\n"); | |
905 if (base64) { | |
906 char *enc = base64_encode(body, strlen(body)); | |
907 if (enc) { | |
908 write_email_body(f_output, enc); | |
909 fprintf(f_output, "\n"); | |
910 free(enc); | |
911 } | |
912 } | |
913 else { | |
914 write_email_body(f_output, body); | |
915 } | |
916 if (needfree) free(needfree); | |
917 } | |
918 | |
919 | |
794 void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf) | 920 void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf) |
795 { | 921 { |
796 char *boundary = NULL; // the boundary marker between multipart sections | 922 char boundary[60]; |
797 int boundary_created = 0; // we have not (yet) created a new boundary | 923 char body_charset[60]; |
798 char *temp = NULL; | 924 char *temp = NULL; |
799 int attach_num, base64_body = 0; | 925 int attach_num; |
800 time_t em_time; | 926 time_t em_time; |
801 char *c_time; | 927 char *c_time; |
802 pst_item_attach* current_attach; | 928 pst_item_attach* current_attach; |
803 int has_from, has_subject, has_to, has_cc, has_bcc, has_date; | 929 int has_from, has_subject, has_to, has_cc, has_bcc, has_date; |
804 has_from = has_subject = has_to = has_cc = has_bcc = has_date = 0; | 930 has_from = has_subject = has_to = has_cc = has_bcc = has_date = 0; |
805 DEBUG_ENT("write_normal_email"); | 931 DEBUG_ENT("write_normal_email"); |
932 | |
933 // setup default body character set | |
934 snprintf(body_charset, sizeof(body_charset), "%s", (item->email->body_charset) ? item->email->body_charset : "utf-8"); | |
806 | 935 |
807 // convert the sent date if it exists, or set it to a fixed date | 936 // convert the sent date if it exists, or set it to a fixed date |
808 if (item->email->sent_date) { | 937 if (item->email->sent_date) { |
809 em_time = fileTimeToUnixTime(item->email->sent_date, 0); | 938 em_time = fileTimeToUnixTime(item->email->sent_date, 0); |
810 c_time = ctime(&em_time); | 939 c_time = ctime(&em_time); |
813 else | 942 else |
814 c_time = "Fri Dec 28 12:06:21 2001"; | 943 c_time = "Fri Dec 28 12:06:21 2001"; |
815 } else | 944 } else |
816 c_time= "Fri Dec 28 12:06:21 2001"; | 945 c_time= "Fri Dec 28 12:06:21 2001"; |
817 | 946 |
818 // we will always look at the header to discover some stuff | 947 // create our MIME boundary here. |
948 snprintf(boundary, sizeof(boundary), "--boundary-LibPST-iamunique-%i_-_-", rand()); | |
949 | |
950 // we will always look at the headers to discover some stuff | |
819 if (item->email->header ) { | 951 if (item->email->header ) { |
820 char *b1, *b2; | 952 char *t; |
821 // see if there is a boundary variable there | |
822 // this search MUST be made case insensitive (DONE). | |
823 // Also, we should check to find out if we are looking | |
824 // at the boundary associated with Content-Type, and that | |
825 // the content type really is multipart | |
826 | |
827 removeCR(item->email->header); | 953 removeCR(item->email->header); |
828 | 954 |
829 if ((b2 = my_stristr(item->email->header, "boundary="))) { | 955 // some of the headers we get from the file are not properly defined. |
830 int len; | 956 // they can contain some email stuff too. We will cut off the header |
831 b2 += strlen("boundary="); // move boundary to first char of marker | 957 // when we see a \n\n |
832 | 958 temp = strstr(item->email->header, "\n\n"); |
833 if (*b2 == '"') { | 959 if (temp) { |
834 b2++; | 960 temp[1] = '\0'; // stop after first \n |
835 b1 = strchr(b2, '"'); // find terminating quote | 961 DEBUG_EMAIL(("Found body text in header %s\n", temp+2)); |
836 } else { | 962 } |
837 b1 = b2; | 963 |
838 while (isgraph(*b1)) // find first char that isn't part of boundary | 964 // Check if the headers have all the necessary fields |
839 b1++; | 965 header_has_field(item->email->header, "\nFrom: ", &has_from); |
966 header_has_field(item->email->header, "\nTo: ", &has_to); | |
967 header_has_field(item->email->header, "\nSubject: ", &has_subject); | |
968 header_has_field(item->email->header, "\nDate: ", &has_date); | |
969 header_has_field(item->email->header, "\nCC: ", &has_cc); | |
970 header_has_field(item->email->header, "\nBCC: ", &has_bcc); | |
971 | |
972 // look for charset in Content-Type header | |
973 t = header_get_field(item->email->header, "\nContent-Type: "); | |
974 if (t) { | |
975 // assume charset= will be on the first line, rather than on a continuation line | |
976 t++; | |
977 char *n = strchr(t, '\n'); | |
978 char *s = my_stristr(t, "; charset="); | |
979 if (n && s && (s < n)) { | |
980 char *e; | |
981 char save; | |
982 s += 10; // skip over charset= | |
983 if (*s == '"') { | |
984 s++; | |
985 e = strchr(s, '"'); | |
986 } | |
987 else { | |
988 e = strchr(s, ';'); | |
989 } | |
990 if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better | |
991 save = *e; | |
992 *e = '\0'; | |
993 snprintf(body_charset, sizeof(body_charset), "%s", s); // copy the charset to our buffer | |
994 *e = save; | |
995 DEBUG_EMAIL(("body charset %s from headers\n", body_charset)); | |
840 } | 996 } |
841 len = b1 - b2; | 997 } |
842 boundary = malloc(len+1); //malloc that length | 998 |
843 strncpy(boundary, b2, len); // copy boundary to another variable | 999 // Strip out the mime headers and some others that we don't want to emit |
844 boundary[len] = '\0'; | 1000 header_strip_field(item->email->header, "\nMicrosoft Mail Internet Headers"); |
845 b1 = b2 = boundary; | 1001 header_strip_field(item->email->header, "\nMIME-Version: "); |
846 while (*b2 != '\0') { // remove any CRs and Tabs | 1002 header_strip_field(item->email->header, "\nContent-Type: "); |
847 if (*b2 != '\n' && *b2 != '\r' && *b2 != '\t') { | 1003 header_strip_field(item->email->header, "\nContent-Transfer-Encoding: "); |
848 *b1 = *b2; | 1004 header_strip_field(item->email->header, "\nContent-class: "); |
849 b1++; | 1005 header_strip_field(item->email->header, "\nX-MimeOLE: "); |
850 } | |
851 b2++; | |
852 } | |
853 *b1 = '\0'; | |
854 | |
855 DEBUG_EMAIL(("Found boundary of - %s\n", boundary)); | |
856 } else { | |
857 DEBUG_EMAIL(("boundary not found in header\n")); | |
858 } | |
859 | |
860 // also possible to set 7bit encoding detection here. | |
861 if ((b2 = my_stristr(item->email->header, "Content-Transfer-Encoding:"))) { | |
862 if ((b2 = strchr(b2, ':'))) { | |
863 b2++; // skip to the : at the end of the string | |
864 | |
865 while (*b2 == ' ' || *b2 == '\t') | |
866 b2++; | |
867 if (pst_strincmp(b2, "base64", 6)==0) { | |
868 DEBUG_EMAIL(("body is base64 encoded\n")); | |
869 base64_body = 1; | |
870 } | |
871 } else { | |
872 DEBUG_WARN(("found a ':' during the my_stristr, but not after that..\n")); | |
873 } | |
874 } | |
875 | |
876 // Check if the header block has all the necessary headers. | |
877 if (my_stristr(item->email->header, "\nFrom:") || (strncasecmp(item->email->header, "From: ", 6) == 0) || my_stristr(item->email->header, "\nX-From:")) { | |
878 DEBUG_EMAIL(("header block has From header\n")); | |
879 has_from = 1; | |
880 } | |
881 if (my_stristr(item->email->header, "\nTo:") || (strncasecmp(item->email->header, "To: ", 4) == 0)) { | |
882 DEBUG_EMAIL(("header block has To header\n")); | |
883 has_to = 1; | |
884 } | |
885 if (my_stristr(item->email->header, "\nSubject:") || (strncasecmp(item->email->header, "Subject: ", 9) == 0)) { | |
886 DEBUG_EMAIL(("header block has Subject header\n")); | |
887 has_subject = 1; | |
888 } | |
889 if (my_stristr(item->email->header, "\nDate:") || (strncasecmp(item->email->header, "Date: ", 6) == 0)) { | |
890 DEBUG_EMAIL(("header block has Date header\n")); | |
891 has_date = 1; | |
892 } | |
893 if (my_stristr(item->email->header, "\nCC:") || (strncasecmp(item->email->header, "CC: ", 4) == 0)) { | |
894 DEBUG_EMAIL(("header block has CC header\n")); | |
895 has_cc = 1; | |
896 } | |
897 if (my_stristr(item->email->header, "\nBCC:") || (strncasecmp(item->email->header, "BCC: ", 5) == 0)) { | |
898 DEBUG_EMAIL(("header block has BCC header\n")); | |
899 has_bcc = 1; | |
900 } | |
901 } | |
902 | |
903 if (!boundary && (item->attach || (item->email->body && item->email->htmlbody) | |
904 || item->email->rtf_compressed || item->email->encrypted_body | |
905 || item->email->encrypted_htmlbody)) { | |
906 // we need to create a boundary here. | |
907 DEBUG_EMAIL(("must create own boundary. oh dear.\n")); | |
908 boundary = malloc(50 * sizeof(char)); // allow 50 chars for boundary | |
909 boundary[0] = '\0'; | |
910 sprintf(boundary, "--boundary-LibPST-iamunique-%i_-_-", rand()); | |
911 DEBUG_EMAIL(("created boundary is %s\n", boundary)); | |
912 boundary_created = 1; | |
913 } | 1006 } |
914 | 1007 |
915 DEBUG_EMAIL(("About to print Header\n")); | 1008 DEBUG_EMAIL(("About to print Header\n")); |
916 | 1009 |
917 if (item && item->email && item->email->subject && item->email->subject->subj) { | 1010 if (item && item->email && item->email->subject && item->email->subject->subj) { |
918 DEBUG_EMAIL(("item->email->subject->subj = %s\n", item->email->subject->subj)); | 1011 DEBUG_EMAIL(("item->email->subject->subj = %s\n", item->email->subject->subj)); |
919 } | 1012 } |
920 | 1013 |
921 if (item->email->header) { | 1014 if (item->email->header) { |
922 int len; | 1015 int len; |
923 char *soh = NULL; // real start of headers. | 1016 char *soh = item->email->header; |
924 | 1017 |
925 // some of the headers we get from the file are not properly defined. | |
926 // they can contain some email stuff too. We will cut off the header | |
927 // when we see a \n\n or \r\n\r\n | |
928 removeCR(item->email->header); | |
929 temp = strstr(item->email->header, "\n\n"); | |
930 | |
931 if (temp) { | |
932 DEBUG_EMAIL(("Found body text in header\n")); | |
933 temp[1] = '\0'; // stop after first \n | |
934 } | |
935 | |
936 // Write out any fields that weren't included in the header. | |
937 if (!has_from) { | |
938 temp = item->email->outlook_sender; | |
939 if (!temp) temp = ""; | |
940 fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp); | |
941 } | |
942 | |
943 if (!has_subject) { | |
944 if (item->email->subject && item->email->subject->subj) { | |
945 fprintf(f_output, "Subject: %s\n", item->email->subject->subj); | |
946 } else { | |
947 fprintf(f_output, "Subject: \n"); | |
948 } | |
949 } | |
950 | |
951 if (!has_to && item->email->sentto_address) { | |
952 fprintf(f_output, "To: %s\n", item->email->sentto_address); | |
953 } | |
954 | |
955 if (!has_cc && item->email->cc_address) { | |
956 fprintf(f_output, "Cc: %s\n", item->email->cc_address); | |
957 } | |
958 | |
959 if (!has_bcc && item->email->bcc_address) { | |
960 fprintf(f_output, "Bcc: %s\n", item->email->bcc_address); | |
961 } | |
962 | |
963 if (!has_date && item->email->sent_date) { | |
964 char c_time[C_TIME_SIZE]; | |
965 strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); | |
966 fprintf(f_output, "Date: %s\n", c_time); | |
967 } | |
968 | |
969 // Now, write out the header... | |
970 soh = skip_header_prologue(item->email->header); | |
971 if (mode != MODE_SEPARATE) { | 1018 if (mode != MODE_SEPARATE) { |
972 // don't put rubbish in if we are doing separate | 1019 // don't put rubbish in if we are doing separate |
973 if (strncmp(soh, "X-From_: ", 9) == 0 ) { | 1020 if (strncmp(soh, "X-From_: ", 9) == 0 ) { |
974 fputs("From ", f_output); | 1021 fputs("From ", f_output); |
975 soh += 9; | 1022 soh += 9; |
976 } else | 1023 } else |
977 fprintf(f_output, "From \"%s\" %s\n", item->email->outlook_sender_name, c_time); | 1024 fprintf(f_output, "From \"%s\" %s\n", item->email->outlook_sender_name, c_time); |
978 } | 1025 } |
1026 | |
1027 // make sure the headers end with a \n | |
979 fprintf(f_output, "%s", soh); | 1028 fprintf(f_output, "%s", soh); |
980 len = strlen(soh); | 1029 len = strlen(soh); |
981 if (!len || (soh[len-1] != '\n')) fprintf(f_output, "\n"); | 1030 if (!len || (soh[len-1] != '\n')) fprintf(f_output, "\n"); |
982 | 1031 |
983 } else { | 1032 } else { |
989 } else { | 1038 } else { |
990 temp = "(readpst_null)"; | 1039 temp = "(readpst_null)"; |
991 } | 1040 } |
992 fprintf(f_output, "From \"%s\" %s\n", temp, c_time); | 1041 fprintf(f_output, "From \"%s\" %s\n", temp, c_time); |
993 } | 1042 } |
994 | 1043 } |
1044 | |
1045 // create required header fields that are not already written | |
1046 if (!has_from) { | |
995 temp = item->email->outlook_sender; | 1047 temp = item->email->outlook_sender; |
996 if (!temp) temp = ""; | 1048 if (!temp) temp = ""; |
997 fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp); | 1049 fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp); |
998 | 1050 } |
1051 | |
1052 if (!has_subject) { | |
999 if (item->email->subject && item->email->subject->subj) { | 1053 if (item->email->subject && item->email->subject->subj) { |
1000 fprintf(f_output, "Subject: %s\n", item->email->subject->subj); | 1054 fprintf(f_output, "Subject: %s\n", item->email->subject->subj); |
1001 } else { | 1055 } else { |
1002 fprintf(f_output, "Subject: \n"); | 1056 fprintf(f_output, "Subject: \n"); |
1003 } | 1057 } |
1004 | 1058 } |
1005 if (item->email->sentto_address) { | 1059 |
1006 fprintf(f_output, "To: %s\n", item->email->sentto_address); | 1060 if (!has_to && item->email->sentto_address) { |
1007 } | 1061 fprintf(f_output, "To: %s\n", item->email->sentto_address); |
1008 | 1062 } |
1009 if (item->email->cc_address) { | 1063 |
1010 fprintf(f_output, "Cc: %s\n", item->email->cc_address); | 1064 if (!has_cc && item->email->cc_address) { |
1011 } | 1065 fprintf(f_output, "Cc: %s\n", item->email->cc_address); |
1012 | 1066 } |
1013 if (item->email->sent_date) { | 1067 |
1014 char c_time[C_TIME_SIZE]; | 1068 if (!has_bcc && item->email->bcc_address) { |
1015 strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); | 1069 fprintf(f_output, "Bcc: %s\n", item->email->bcc_address); |
1016 fprintf(f_output, "Date: %s\n", c_time); | 1070 } |
1017 } | 1071 |
1018 } | 1072 if (!has_date && item->email->sent_date) { |
1019 | 1073 char c_time[C_TIME_SIZE]; |
1074 strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); | |
1075 fprintf(f_output, "Date: %s\n", c_time); | |
1076 } | |
1077 | |
1078 // add our own mime headers | |
1020 fprintf(f_output, "MIME-Version: 1.0\n"); | 1079 fprintf(f_output, "MIME-Version: 1.0\n"); |
1021 if (boundary && boundary_created) { | 1080 if (item->attach || (item->email->rtf_compressed && save_rtf) |
1022 // if we created the boundary, then it has NOT already been printed | 1081 || item->email->encrypted_body |
1023 // in the headers above. | 1082 || item->email->encrypted_htmlbody) { |
1024 if (item->attach) { | 1083 // use multipart/mixed if we have attachments |
1025 // write the boundary stuff if we have attachments | 1084 fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); |
1026 fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); | 1085 } else { |
1027 } else { | 1086 // else use multipart/alternative |
1028 // else we have multipart/alternative then tell it so | 1087 fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); |
1029 fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); | 1088 } |
1030 } | 1089 fprintf(f_output, "\n"); // end of headers, start of body |
1031 } | 1090 |
1032 fprintf(f_output, "\n"); // start the body | 1091 // now dump the body parts |
1033 DEBUG_EMAIL(("About to print Body\n")); | |
1034 | |
1035 if (item->email->body) { | 1092 if (item->email->body) { |
1036 if (boundary) { | 1093 write_body_part(f_output, item->email->body, "text/plain", body_charset, boundary); |
1037 // try to find the charset for this body part | |
1038 const char *def = "utf-8"; | |
1039 // it seems that if (item->email->body_charset) is set, then | |
1040 // we actually have utf8 plain body text. If that is not set | |
1041 // we have plain body text in an 8 bit charset specified in | |
1042 // the headers. | |
1043 char *c = my_stristr(item->email->header, "\nContent-Type:"); | |
1044 if (c) { | |
1045 c++; | |
1046 char *n = my_stristr(c, "\n"); // termination on the content type | |
1047 if (n) { | |
1048 char *s = my_stristr(c, "; charset="); | |
1049 if (s && (s < n)) { | |
1050 char *e; | |
1051 s += 10; // skip over charset= | |
1052 if (*s == '"') { | |
1053 s++; | |
1054 e = my_stristr(s, "\""); | |
1055 } | |
1056 else { | |
1057 e = my_stristr(s, ";"); | |
1058 } | |
1059 if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better | |
1060 *e = '\0'; // corrupt the header, but we have already printed it | |
1061 def = s; | |
1062 DEBUG_EMAIL(("body charset %s from headers\n", def)); | |
1063 } | |
1064 } | |
1065 } | |
1066 fprintf(f_output, "\n--%s\n", boundary); | |
1067 fprintf(f_output, "Content-Type: text/plain; charset=\"%s\"\n", def); | |
1068 if (base64_body) | |
1069 fprintf(f_output, "Content-Transfer-Encoding: base64\n"); | |
1070 fprintf(f_output, "\n"); | |
1071 } | |
1072 else if (item->email->body_charset && (strcasecmp("utf-8",item->email->body_charset))) { | |
1073 // try to convert to the specified charset since it is not utf-8 | |
1074 size_t rc; | |
1075 DEBUG_EMAIL(("Convert plain text utf-8 to %s\n", item->email->body_charset)); | |
1076 vbuf *newer = vballoc(2); | |
1077 rc = vb_utf8to8bit(newer, item->email->body, strlen(item->email->body) + 1, item->email->body_charset); | |
1078 if (rc == (size_t)-1) { | |
1079 free(newer->b); | |
1080 DEBUG_EMAIL(("Failed to convert plain text utf-8 to %s\n", item->email->body_charset)); | |
1081 } | |
1082 else { | |
1083 // unable to convert, maybe it is already in that character set | |
1084 free(item->email->body); | |
1085 item->email->body = newer->b; | |
1086 } | |
1087 free(newer); | |
1088 } | |
1089 removeCR(item->email->body); | |
1090 if (base64_body) { | |
1091 char *enc = base64_encode(item->email->body, strlen(item->email->body)); | |
1092 if (enc) { | |
1093 write_email_body(f_output, enc); | |
1094 free(enc); | |
1095 } | |
1096 } | |
1097 else { | |
1098 write_email_body(f_output, item->email->body); | |
1099 } | |
1100 } | 1094 } |
1101 | 1095 |
1102 if (item->email->htmlbody) { | 1096 if (item->email->htmlbody) { |
1103 if (boundary) { | 1097 find_html_charset(item->email->htmlbody, body_charset, sizeof(body_charset)); |
1104 const char *def = "utf-8"; | 1098 write_body_part(f_output, item->email->htmlbody, "text/html", body_charset, boundary); |
1105 if (item->email->body_charset) def = item->email->body_charset; | |
1106 fprintf(f_output, "\n--%s\n", boundary); | |
1107 fprintf(f_output, "Content-Type: text/html; charset=\"%s\"\n", def); | |
1108 if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); | |
1109 fprintf(f_output, "\n"); | |
1110 } | |
1111 removeCR(item->email->htmlbody); | |
1112 if (base64_body) { | |
1113 char *enc = base64_encode(item->email->htmlbody, strlen(item->email->htmlbody)); | |
1114 if (enc) { | |
1115 write_email_body(f_output, enc); | |
1116 free(enc); | |
1117 } | |
1118 } | |
1119 else { | |
1120 write_email_body(f_output, item->email->htmlbody); | |
1121 } | |
1122 } | 1099 } |
1123 | 1100 |
1124 if (item->email->rtf_compressed && save_rtf) { | 1101 if (item->email->rtf_compressed && save_rtf) { |
1125 //int32_t tester; | |
1126 DEBUG_EMAIL(("Adding RTF body as attachment\n")); | 1102 DEBUG_EMAIL(("Adding RTF body as attachment\n")); |
1127 current_attach = (pst_item_attach*)xmalloc(sizeof(pst_item_attach)); | 1103 current_attach = (pst_item_attach*)xmalloc(sizeof(pst_item_attach)); |
1128 memset(current_attach, 0, sizeof(pst_item_attach)); | 1104 memset(current_attach, 0, sizeof(pst_item_attach)); |
1129 current_attach->next = item->attach; | 1105 current_attach->next = item->attach; |
1130 item->attach = current_attach; | 1106 item->attach = current_attach; |
1131 current_attach->data = lzfu_decompress(item->email->rtf_compressed, item->email->rtf_compressed_size, ¤t_attach->size); | 1107 current_attach->data = lzfu_decompress(item->email->rtf_compressed, item->email->rtf_compressed_size, ¤t_attach->size); |
1132 current_attach->filename2 = xmalloc(strlen(RTF_ATTACH_NAME)+2); | 1108 current_attach->filename2 = xmalloc(strlen(RTF_ATTACH_NAME)+2); |
1133 strcpy(current_attach->filename2, RTF_ATTACH_NAME); | 1109 strcpy(current_attach->filename2, RTF_ATTACH_NAME); |
1134 current_attach->mimetype = xmalloc(strlen(RTF_ATTACH_TYPE)+2); | 1110 current_attach->mimetype = xmalloc(strlen(RTF_ATTACH_TYPE)+2); |
1135 strcpy(current_attach->mimetype, RTF_ATTACH_TYPE); | 1111 strcpy(current_attach->mimetype, RTF_ATTACH_TYPE); |
1136 //memcpy(&tester, item->email->rtf_compressed+sizeof(int32_t), sizeof(int32_t)); | |
1137 //LE32_CPU(tester); | |
1138 //printf("lz produced %d bytes, rtf claims %d bytes\n", current_attach->size, tester); | |
1139 } | 1112 } |
1140 | 1113 |
1141 if (item->email->encrypted_body || item->email->encrypted_htmlbody) { | 1114 if (item->email->encrypted_body || item->email->encrypted_htmlbody) { |
1142 // if either the body or htmlbody is encrypted, add them as attachments | 1115 // if either the body or htmlbody is encrypted, add them as attachments |
1143 if (item->email->encrypted_body) { | 1116 if (item->email->encrypted_body) { |
1162 item->email->encrypted_htmlbody = NULL; | 1135 item->email->encrypted_htmlbody = NULL; |
1163 } | 1136 } |
1164 write_email_body(f_output, "The body of this email is encrypted. This isn't supported yet, but the body is now an attachment\n"); | 1137 write_email_body(f_output, "The body of this email is encrypted. This isn't supported yet, but the body is now an attachment\n"); |
1165 } | 1138 } |
1166 | 1139 |
1167 // attachments | 1140 // other attachments |
1168 attach_num = 0; | 1141 attach_num = 0; |
1169 for (current_attach = item->attach; current_attach; current_attach = current_attach->next) { | 1142 for (current_attach = item->attach; current_attach; current_attach = current_attach->next) { |
1170 DEBUG_EMAIL(("Attempting Attachment encoding\n")); | 1143 DEBUG_EMAIL(("Attempting Attachment encoding\n")); |
1171 if (!current_attach->data) { | 1144 if (!current_attach->data) { |
1172 DEBUG_EMAIL(("Data of attachment is NULL!. Size is supposed to be %i\n", current_attach->size)); | 1145 DEBUG_EMAIL(("Data of attachment is NULL!. Size is supposed to be %i\n", current_attach->size)); |
1176 else | 1149 else |
1177 write_inline_attachment(f_output, current_attach, boundary, pst); | 1150 write_inline_attachment(f_output, current_attach, boundary, pst); |
1178 } | 1151 } |
1179 if (mode != MODE_SEPARATE) { /* do not add a boundary after the last attachment for mode_MH */ | 1152 if (mode != MODE_SEPARATE) { /* do not add a boundary after the last attachment for mode_MH */ |
1180 DEBUG_EMAIL(("Writing buffer between emails\n")); | 1153 DEBUG_EMAIL(("Writing buffer between emails\n")); |
1181 if (boundary) fprintf(f_output, "\n--%s--\n", boundary); | 1154 fprintf(f_output, "\n--%s--\n", boundary); |
1182 fprintf(f_output, "\n\n"); | 1155 fprintf(f_output, "\n\n"); |
1183 } | 1156 } |
1184 if (boundary) free (boundary); | |
1185 DEBUG_RET(); | 1157 DEBUG_RET(); |
1186 } | 1158 } |
1187 | 1159 |
1188 | 1160 |
1189 void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]) | 1161 void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]) |