comparison src/readpst.c @ 121:8399ef94c11b

strip and regenerate all MIME headers to avoid duplicates. do a better job of making unique MIME boundaries. only use base64 coding when strictly necessary.
author Carl Byington <carl@five-ten-sg.com>
date Sun, 01 Feb 2009 19:21:47 -0800
parents 6395ced2b8b2
children bdb38b434c0a
comparison
equal deleted inserted replaced
120:6395ced2b8b2 121:8399ef94c11b
10 #include "timeconv.h" 10 #include "timeconv.h"
11 #include "lzfu.h" 11 #include "lzfu.h"
12 #include "libstrfunc.h" 12 #include "libstrfunc.h"
13 #include "vbuf.h" 13 #include "vbuf.h"
14 14
15 #ifdef HAVE_REGEX_H
16 #include <regex.h>
17 #endif
18
15 #define OUTPUT_TEMPLATE "%s" 19 #define OUTPUT_TEMPLATE "%s"
16 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" 20 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory"
17 #define KMAIL_INDEX ".%s.index" 21 #define KMAIL_INDEX ".%s.index"
18 #define SEP_MAIL_FILE_TEMPLATE "%i" /* "%09i" */ 22 #define SEP_MAIL_FILE_TEMPLATE "%i" /* "%09i" */
19 23
30 int32_t type; 34 int32_t type;
31 }; 35 };
32 36
33 void process(pst_item *outeritem, pst_desc_ll *d_ptr); 37 void process(pst_item *outeritem, pst_desc_ll *d_ptr);
34 void write_email_body(FILE *f, char *body); 38 void write_email_body(FILE *f, char *body);
35 char* removeCR (char *c); 39 void removeCR(char *c);
36 void usage(); 40 void usage();
37 void version(); 41 void version();
38 char* mk_kmail_dir(char*); 42 char* mk_kmail_dir(char*);
39 int close_kmail_dir(); 43 int close_kmail_dir();
40 char* mk_recurse_dir(char*); 44 char* mk_recurse_dir(char*);
42 char* mk_separate_dir(char *dir); 46 char* mk_separate_dir(char *dir);
43 int close_separate_dir(); 47 int close_separate_dir();
44 int mk_separate_file(struct file_ll *f); 48 int mk_separate_file(struct file_ll *f);
45 char* my_stristr(char *haystack, char *needle); 49 char* my_stristr(char *haystack, char *needle);
46 void check_filename(char *fname); 50 void check_filename(char *fname);
47 char* skip_header_prologue(char *headers);
48 void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst); 51 void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst);
49 void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char boundary[], pst_file* pst); 52 void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char *boundary, pst_file* pst);
53 void header_has_field(char *header, char *field, int *flag);
54 char* header_get_field(char *header, char *field);
55 void header_strip_field(char *header, char *field);
56 int test_base64(char *body);
57 void find_html_charset(char *html, char *charset, size_t charsetlen);
58 void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary);
50 void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf); 59 void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf);
51 void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]); 60 void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]);
52 void write_appointment(FILE* f_output, pst_item_appointment* appointment, 61 void write_appointment(FILE* f_output, pst_item_appointment* appointment,
53 pst_item_email* email, FILETIME* create_date, FILETIME* modify_date); 62 pst_item_email* email, FILETIME* create_date, FILETIME* modify_date);
54 void create_enter_dir(struct file_ll* f, pst_item *item); 63 void create_enter_dir(struct file_ll* f, pst_item *item);
109 int contact_mode = CMODE_VCARD; 118 int contact_mode = CMODE_VCARD;
110 int deleted_mode = DMODE_EXCLUDE; 119 int deleted_mode = DMODE_EXCLUDE;
111 int overwrite = 0; 120 int overwrite = 0;
112 int save_rtf_body = 1; 121 int save_rtf_body = 1;
113 pst_file pstfile; 122 pst_file pstfile;
114 123 regex_t meta_charset_pattern;
115 124
116 125
117 void process(pst_item *outeritem, pst_desc_ll *d_ptr) 126 void process(pst_item *outeritem, pst_desc_ll *d_ptr)
118 { 127 {
119 struct file_ll ff; 128 struct file_ll ff;
226 char *d_log = NULL; 235 char *d_log = NULL;
227 int c,x; 236 int c,x;
228 char *temp = NULL; //temporary char pointer 237 char *temp = NULL; //temporary char pointer
229 prog_name = argv[0]; 238 prog_name = argv[0];
230 239
240 time_t now = time(NULL);
241 srand((unsigned)now);
242
243 if (regcomp(&meta_charset_pattern, "<meta[^>]*content=\"[^>]*charset=([^>\";]*)[\";]", REG_ICASE | REG_EXTENDED)) {
244 printf("cannot compile regex pattern\n");
245 exit(3);
246 }
247
231 // command-line option handling 248 // command-line option handling
232 while ((c = getopt(argc, argv, "bCc:Dd:hko:qrSMVw"))!= -1) { 249 while ((c = getopt(argc, argv, "bCc:Dd:hko:qrSMVw"))!= -1) {
233 switch (c) { 250 switch (c) {
234 case 'b': 251 case 'b':
235 save_rtf_body = 0; 252 save_rtf_body = 0;
375 392
376 process(item, d_ptr->child); // do the children of TOPF 393 process(item, d_ptr->child); // do the children of TOPF
377 pst_freeItem(item); 394 pst_freeItem(item);
378 pst_close(&pstfile); 395 pst_close(&pstfile);
379 DEBUG_RET(); 396 DEBUG_RET();
397 regfree(&meta_charset_pattern);
380 return 0; 398 return 0;
381 } 399 }
382 400
383 401
384 void write_email_body(FILE *f, char *body) { 402 void write_email_body(FILE *f, char *body) {
397 pst_fwrite(body, strlen(body), 1, f); 415 pst_fwrite(body, strlen(body), 1, f);
398 DEBUG_RET(); 416 DEBUG_RET();
399 } 417 }
400 418
401 419
402 char *removeCR (char *c) { 420 void removeCR (char *c) {
403 // converts /r/n to /n 421 // converts \r\n to \n
404 char *a, *b; 422 char *a, *b;
405 DEBUG_ENT("removeCR"); 423 DEBUG_ENT("removeCR");
406 a = b = c; 424 a = b = c;
407 while (*a != '\0') { 425 while (*a != '\0') {
408 *b = *a; 426 *b = *a;
409 if (*a != '\r') 427 if (*a != '\r') b++;
410 b++;
411 a++; 428 a++;
412 } 429 }
413 *b = '\0'; 430 *b = '\0';
414 DEBUG_RET(); 431 DEBUG_RET();
415 return c;
416 } 432 }
417 433
418 434
419 void usage() { 435 void usage() {
420 DEBUG_ENT("usage"); 436 DEBUG_ENT("usage");
640 656
641 657
642 char *my_stristr(char *haystack, char *needle) { 658 char *my_stristr(char *haystack, char *needle) {
643 // my_stristr varies from strstr in that its searches are case-insensitive 659 // my_stristr varies from strstr in that its searches are case-insensitive
644 char *x=haystack, *y=needle, *z = NULL; 660 char *x=haystack, *y=needle, *z = NULL;
645 DEBUG_ENT("my_stristr");
646 if (!haystack || !needle) { 661 if (!haystack || !needle) {
647 DEBUG_RET();
648 return NULL; 662 return NULL;
649 } 663 }
650 while (*y != '\0' && *x != '\0') { 664 while (*y != '\0' && *x != '\0') {
651 if (tolower(*y) == tolower(*x)) { 665 if (tolower(*y) == tolower(*x)) {
652 // move y on one 666 // move y on one
658 y = needle; // reset y to the beginning of the needle 672 y = needle; // reset y to the beginning of the needle
659 z = NULL; // reset the haystack storage point 673 z = NULL; // reset the haystack storage point
660 } 674 }
661 x++; // advance the search in the haystack 675 x++; // advance the search in the haystack
662 } 676 }
663 DEBUG_RET();
664 // If the haystack ended before our search finished, it's not a match. 677 // If the haystack ended before our search finished, it's not a match.
665 if (*y != '\0') return NULL; 678 if (*y != '\0') return NULL;
666 return z; 679 return z;
667 } 680 }
668 681
677 while ((t = strpbrk(t, "/\\:"))) { 690 while ((t = strpbrk(t, "/\\:"))) {
678 // while there are characters in the second string that we don't want 691 // while there are characters in the second string that we don't want
679 *t = '_'; //replace them with an underscore 692 *t = '_'; //replace them with an underscore
680 } 693 }
681 DEBUG_RET(); 694 DEBUG_RET();
682 }
683
684
685 // The sole purpose of this function is to bypass the pseudo-header prologue
686 // that Microsoft Outlook inserts at the beginning of the internet email
687 // headers for emails stored in their "Personal Folders" files.
688 char *skip_header_prologue(char *headers) {
689 const char *bad = "Microsoft Mail Internet Headers";
690 if (strncmp(headers, bad, strlen(bad)) == 0) {
691 // Found the offensive header prologue
692 char *pc = strchr(headers, '\n');
693 return pc + 1;
694 }
695 return headers;
696 } 695 }
697 696
698 697
699 void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst) 698 void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst)
700 { 699 {
741 if (temp) free(temp); 740 if (temp) free(temp);
742 DEBUG_RET(); 741 DEBUG_RET();
743 } 742 }
744 743
745 744
746 void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char boundary[], pst_file* pst) 745 void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char *boundary, pst_file* pst)
747 { 746 {
747 char *attach_filename;
748 char *enc = NULL; // base64 encoded attachment 748 char *enc = NULL; // base64 encoded attachment
749 DEBUG_ENT("write_inline_attachment"); 749 DEBUG_ENT("write_inline_attachment");
750 DEBUG_EMAIL(("Attachment Size is %i\n", current_attach->size)); 750 DEBUG_EMAIL(("Attachment Size is %i\n", current_attach->size));
751 DEBUG_EMAIL(("Attachment Pointer is %p\n", current_attach->data)); 751 DEBUG_EMAIL(("Attachment Pointer is %p\n", current_attach->data));
752 if (current_attach->data) { 752 if (current_attach->data) {
755 DEBUG_EMAIL(("ERROR base64_encode returned NULL. Must have failed\n")); 755 DEBUG_EMAIL(("ERROR base64_encode returned NULL. Must have failed\n"));
756 DEBUG_RET(); 756 DEBUG_RET();
757 return; 757 return;
758 } 758 }
759 } 759 }
760 if (boundary) { 760
761 char *attach_filename; 761 fprintf(f_output, "\n--%s\n", boundary);
762 fprintf(f_output, "\n--%s\n", boundary); 762 if (!current_attach->mimetype) {
763 if (!current_attach->mimetype) { 763 fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT);
764 fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT); 764 } else {
765 } else { 765 fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype);
766 fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype); 766 }
767 } 767 fprintf(f_output, "Content-Transfer-Encoding: base64\n");
768 fprintf(f_output, "Content-Transfer-Encoding: base64\n"); 768 // If there is a long filename (filename2) use that, otherwise
769 // If there is a long filename (filename2) use that, otherwise 769 // use the 8.3 filename (filename1)
770 // use the 8.3 filename (filename1) 770 if (current_attach->filename2) {
771 if (current_attach->filename2) { 771 attach_filename = current_attach->filename2;
772 attach_filename = current_attach->filename2; 772 } else {
773 } else { 773 attach_filename = current_attach->filename1;
774 attach_filename = current_attach->filename1; 774 }
775 } 775 if (!attach_filename) {
776 if (!attach_filename) { 776 fprintf(f_output, "Content-Disposition: inline\n\n");
777 fprintf(f_output, "Content-Disposition: inline\n\n"); 777 } else {
778 } else { 778 fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach_filename);
779 fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach_filename); 779 }
780 } 780
781 }
782 if (current_attach->data) { 781 if (current_attach->data) {
783 pst_fwrite(enc, 1, strlen(enc), f_output); 782 pst_fwrite(enc, 1, strlen(enc), f_output);
784 DEBUG_EMAIL(("Attachment Size after encoding is %i\n", strlen(enc))); 783 DEBUG_EMAIL(("Attachment Size after encoding is %i\n", strlen(enc)));
785 free(enc); // caught by valgrind 784 free(enc); // caught by valgrind
786 } else { 785 } else {
789 fprintf(f_output, "\n\n"); 788 fprintf(f_output, "\n\n");
790 DEBUG_RET(); 789 DEBUG_RET();
791 } 790 }
792 791
793 792
793 void header_has_field(char *header, char *field, int *flag)
794 {
795 if (my_stristr(header, field) || (strncasecmp(header, field+1, strlen(field)-1) == 0)) {
796 DEBUG_EMAIL(("header block has %s header\n", field+1));
797 *flag = 1;
798 }
799 }
800
801
802 char* header_get_field(char *header, char *field)
803 {
804 char *t = my_stristr(header, field);
805 if (!t && (strncasecmp(header, field+1, strlen(field)-1) == 0)) t = header;
806 return t;
807 }
808
809
810 void header_strip_field(char *header, char *field)
811 {
812 char *e;
813 char *t = header_get_field(header, field);
814 if (t) {
815 e = strchr(t+1, '\n');
816 while (e && ((e[1] == ' ') || (e[1] == '\t'))) {
817 e = strchr(e+1, '\n');
818 }
819 if (e) {
820 if (t == header) e++; // if *t is not \n, we don't want to keep the \n at *e either.
821 while (*e != '\0') {
822 *t = *e;
823 t++;
824 e++;
825 }
826 *t = '\0';
827 }
828 else {
829 // this was the last header field, truncate the headers
830 *t = '\0';
831 }
832 }
833 }
834
835
836 int test_base64(char *body)
837 {
838 int b64 = 0;
839 uint8_t *b = (uint8_t *)body;
840 while (*b != 0) {
841 if ((*b < 32) && (*b != 9) && (*b != 10)) {
842 DEBUG_EMAIL(("found base64 byte %d\n", (int)*b));
843 DEBUG_HEXDUMPC(body, strlen(body), 0x10);
844 b64 = 1;
845 break;
846 }
847 b++;
848 }
849 return b64;
850 }
851
852
853 void find_html_charset(char *html, char *charset, size_t charsetlen)
854 {
855 const int index = 1;
856 const int nmatch = index+1;
857 regmatch_t match[nmatch];
858 int rc = regexec(&meta_charset_pattern, html, nmatch, match, 0);
859 if (rc == 0) {
860 int s = match[index].rm_so;
861 int e = match[index].rm_eo;
862 if (s != -1) {
863 char save = html[e];
864 html[e] = '\0';
865 snprintf(charset, charsetlen, "%s", html+s); // copy the html charset
866 html[e] = save;
867 DEBUG_EMAIL(("charset %s from html text\n", charset));
868 }
869 else {
870 DEBUG_EMAIL(("matching %d %d %d %d", match[0].rm_so, match[0].rm_eo, match[1].rm_so, match[1].rm_eo));
871 DEBUG_HEXDUMPC(html, strlen(html), 0x10);
872 }
873 }
874 else {
875 DEBUG_EMAIL(("regexec returns %d\n", rc));
876 }
877 }
878
879
880 void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary)
881 {
882 char *needfree = NULL;
883 if (strcasecmp("utf-8", charset)) {
884 // try to convert to the specified charset since it is not utf-8
885 size_t rc;
886 DEBUG_EMAIL(("Convert %s utf-8 to %s\n", mime, charset));
887 vbuf *newer = vballoc(2);
888 rc = vb_utf8to8bit(newer, body, strlen(body) + 1, charset);
889 if (rc == (size_t)-1) {
890 // unable to convert, maybe it is already in that character set
891 free(newer->b);
892 DEBUG_EMAIL(("Failed to convert %s utf-8 to %s\n", mime, charset));
893 }
894 else {
895 needfree = body = newer->b;
896 }
897 free(newer);
898 }
899 removeCR(body);
900 int base64 = test_base64(body);
901 fprintf(f_output, "\n--%s\n", boundary);
902 fprintf(f_output, "Content-Type: %s; charset=\"%s\"\n", mime, charset);
903 if (base64) fprintf(f_output, "Content-Transfer-Encoding: base64\n");
904 fprintf(f_output, "\n");
905 if (base64) {
906 char *enc = base64_encode(body, strlen(body));
907 if (enc) {
908 write_email_body(f_output, enc);
909 fprintf(f_output, "\n");
910 free(enc);
911 }
912 }
913 else {
914 write_email_body(f_output, body);
915 }
916 if (needfree) free(needfree);
917 }
918
919
794 void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf) 920 void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf)
795 { 921 {
796 char *boundary = NULL; // the boundary marker between multipart sections 922 char boundary[60];
797 int boundary_created = 0; // we have not (yet) created a new boundary 923 char body_charset[60];
798 char *temp = NULL; 924 char *temp = NULL;
799 int attach_num, base64_body = 0; 925 int attach_num;
800 time_t em_time; 926 time_t em_time;
801 char *c_time; 927 char *c_time;
802 pst_item_attach* current_attach; 928 pst_item_attach* current_attach;
803 int has_from, has_subject, has_to, has_cc, has_bcc, has_date; 929 int has_from, has_subject, has_to, has_cc, has_bcc, has_date;
804 has_from = has_subject = has_to = has_cc = has_bcc = has_date = 0; 930 has_from = has_subject = has_to = has_cc = has_bcc = has_date = 0;
805 DEBUG_ENT("write_normal_email"); 931 DEBUG_ENT("write_normal_email");
932
933 // setup default body character set
934 snprintf(body_charset, sizeof(body_charset), "%s", (item->email->body_charset) ? item->email->body_charset : "utf-8");
806 935
807 // convert the sent date if it exists, or set it to a fixed date 936 // convert the sent date if it exists, or set it to a fixed date
808 if (item->email->sent_date) { 937 if (item->email->sent_date) {
809 em_time = fileTimeToUnixTime(item->email->sent_date, 0); 938 em_time = fileTimeToUnixTime(item->email->sent_date, 0);
810 c_time = ctime(&em_time); 939 c_time = ctime(&em_time);
813 else 942 else
814 c_time = "Fri Dec 28 12:06:21 2001"; 943 c_time = "Fri Dec 28 12:06:21 2001";
815 } else 944 } else
816 c_time= "Fri Dec 28 12:06:21 2001"; 945 c_time= "Fri Dec 28 12:06:21 2001";
817 946
818 // we will always look at the header to discover some stuff 947 // create our MIME boundary here.
948 snprintf(boundary, sizeof(boundary), "--boundary-LibPST-iamunique-%i_-_-", rand());
949
950 // we will always look at the headers to discover some stuff
819 if (item->email->header ) { 951 if (item->email->header ) {
820 char *b1, *b2; 952 char *t;
821 // see if there is a boundary variable there
822 // this search MUST be made case insensitive (DONE).
823 // Also, we should check to find out if we are looking
824 // at the boundary associated with Content-Type, and that
825 // the content type really is multipart
826
827 removeCR(item->email->header); 953 removeCR(item->email->header);
828 954
829 if ((b2 = my_stristr(item->email->header, "boundary="))) { 955 // some of the headers we get from the file are not properly defined.
830 int len; 956 // they can contain some email stuff too. We will cut off the header
831 b2 += strlen("boundary="); // move boundary to first char of marker 957 // when we see a \n\n
832 958 temp = strstr(item->email->header, "\n\n");
833 if (*b2 == '"') { 959 if (temp) {
834 b2++; 960 temp[1] = '\0'; // stop after first \n
835 b1 = strchr(b2, '"'); // find terminating quote 961 DEBUG_EMAIL(("Found body text in header %s\n", temp+2));
836 } else { 962 }
837 b1 = b2; 963
838 while (isgraph(*b1)) // find first char that isn't part of boundary 964 // Check if the headers have all the necessary fields
839 b1++; 965 header_has_field(item->email->header, "\nFrom: ", &has_from);
966 header_has_field(item->email->header, "\nTo: ", &has_to);
967 header_has_field(item->email->header, "\nSubject: ", &has_subject);
968 header_has_field(item->email->header, "\nDate: ", &has_date);
969 header_has_field(item->email->header, "\nCC: ", &has_cc);
970 header_has_field(item->email->header, "\nBCC: ", &has_bcc);
971
972 // look for charset in Content-Type header
973 t = header_get_field(item->email->header, "\nContent-Type: ");
974 if (t) {
975 // assume charset= will be on the first line, rather than on a continuation line
976 t++;
977 char *n = strchr(t, '\n');
978 char *s = my_stristr(t, "; charset=");
979 if (n && s && (s < n)) {
980 char *e;
981 char save;
982 s += 10; // skip over charset=
983 if (*s == '"') {
984 s++;
985 e = strchr(s, '"');
986 }
987 else {
988 e = strchr(s, ';');
989 }
990 if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better
991 save = *e;
992 *e = '\0';
993 snprintf(body_charset, sizeof(body_charset), "%s", s); // copy the charset to our buffer
994 *e = save;
995 DEBUG_EMAIL(("body charset %s from headers\n", body_charset));
840 } 996 }
841 len = b1 - b2; 997 }
842 boundary = malloc(len+1); //malloc that length 998
843 strncpy(boundary, b2, len); // copy boundary to another variable 999 // Strip out the mime headers and some others that we don't want to emit
844 boundary[len] = '\0'; 1000 header_strip_field(item->email->header, "\nMicrosoft Mail Internet Headers");
845 b1 = b2 = boundary; 1001 header_strip_field(item->email->header, "\nMIME-Version: ");
846 while (*b2 != '\0') { // remove any CRs and Tabs 1002 header_strip_field(item->email->header, "\nContent-Type: ");
847 if (*b2 != '\n' && *b2 != '\r' && *b2 != '\t') { 1003 header_strip_field(item->email->header, "\nContent-Transfer-Encoding: ");
848 *b1 = *b2; 1004 header_strip_field(item->email->header, "\nContent-class: ");
849 b1++; 1005 header_strip_field(item->email->header, "\nX-MimeOLE: ");
850 }
851 b2++;
852 }
853 *b1 = '\0';
854
855 DEBUG_EMAIL(("Found boundary of - %s\n", boundary));
856 } else {
857 DEBUG_EMAIL(("boundary not found in header\n"));
858 }
859
860 // also possible to set 7bit encoding detection here.
861 if ((b2 = my_stristr(item->email->header, "Content-Transfer-Encoding:"))) {
862 if ((b2 = strchr(b2, ':'))) {
863 b2++; // skip to the : at the end of the string
864
865 while (*b2 == ' ' || *b2 == '\t')
866 b2++;
867 if (pst_strincmp(b2, "base64", 6)==0) {
868 DEBUG_EMAIL(("body is base64 encoded\n"));
869 base64_body = 1;
870 }
871 } else {
872 DEBUG_WARN(("found a ':' during the my_stristr, but not after that..\n"));
873 }
874 }
875
876 // Check if the header block has all the necessary headers.
877 if (my_stristr(item->email->header, "\nFrom:") || (strncasecmp(item->email->header, "From: ", 6) == 0) || my_stristr(item->email->header, "\nX-From:")) {
878 DEBUG_EMAIL(("header block has From header\n"));
879 has_from = 1;
880 }
881 if (my_stristr(item->email->header, "\nTo:") || (strncasecmp(item->email->header, "To: ", 4) == 0)) {
882 DEBUG_EMAIL(("header block has To header\n"));
883 has_to = 1;
884 }
885 if (my_stristr(item->email->header, "\nSubject:") || (strncasecmp(item->email->header, "Subject: ", 9) == 0)) {
886 DEBUG_EMAIL(("header block has Subject header\n"));
887 has_subject = 1;
888 }
889 if (my_stristr(item->email->header, "\nDate:") || (strncasecmp(item->email->header, "Date: ", 6) == 0)) {
890 DEBUG_EMAIL(("header block has Date header\n"));
891 has_date = 1;
892 }
893 if (my_stristr(item->email->header, "\nCC:") || (strncasecmp(item->email->header, "CC: ", 4) == 0)) {
894 DEBUG_EMAIL(("header block has CC header\n"));
895 has_cc = 1;
896 }
897 if (my_stristr(item->email->header, "\nBCC:") || (strncasecmp(item->email->header, "BCC: ", 5) == 0)) {
898 DEBUG_EMAIL(("header block has BCC header\n"));
899 has_bcc = 1;
900 }
901 }
902
903 if (!boundary && (item->attach || (item->email->body && item->email->htmlbody)
904 || item->email->rtf_compressed || item->email->encrypted_body
905 || item->email->encrypted_htmlbody)) {
906 // we need to create a boundary here.
907 DEBUG_EMAIL(("must create own boundary. oh dear.\n"));
908 boundary = malloc(50 * sizeof(char)); // allow 50 chars for boundary
909 boundary[0] = '\0';
910 sprintf(boundary, "--boundary-LibPST-iamunique-%i_-_-", rand());
911 DEBUG_EMAIL(("created boundary is %s\n", boundary));
912 boundary_created = 1;
913 } 1006 }
914 1007
915 DEBUG_EMAIL(("About to print Header\n")); 1008 DEBUG_EMAIL(("About to print Header\n"));
916 1009
917 if (item && item->email && item->email->subject && item->email->subject->subj) { 1010 if (item && item->email && item->email->subject && item->email->subject->subj) {
918 DEBUG_EMAIL(("item->email->subject->subj = %s\n", item->email->subject->subj)); 1011 DEBUG_EMAIL(("item->email->subject->subj = %s\n", item->email->subject->subj));
919 } 1012 }
920 1013
921 if (item->email->header) { 1014 if (item->email->header) {
922 int len; 1015 int len;
923 char *soh = NULL; // real start of headers. 1016 char *soh = item->email->header;
924 1017
925 // some of the headers we get from the file are not properly defined.
926 // they can contain some email stuff too. We will cut off the header
927 // when we see a \n\n or \r\n\r\n
928 removeCR(item->email->header);
929 temp = strstr(item->email->header, "\n\n");
930
931 if (temp) {
932 DEBUG_EMAIL(("Found body text in header\n"));
933 temp[1] = '\0'; // stop after first \n
934 }
935
936 // Write out any fields that weren't included in the header.
937 if (!has_from) {
938 temp = item->email->outlook_sender;
939 if (!temp) temp = "";
940 fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp);
941 }
942
943 if (!has_subject) {
944 if (item->email->subject && item->email->subject->subj) {
945 fprintf(f_output, "Subject: %s\n", item->email->subject->subj);
946 } else {
947 fprintf(f_output, "Subject: \n");
948 }
949 }
950
951 if (!has_to && item->email->sentto_address) {
952 fprintf(f_output, "To: %s\n", item->email->sentto_address);
953 }
954
955 if (!has_cc && item->email->cc_address) {
956 fprintf(f_output, "Cc: %s\n", item->email->cc_address);
957 }
958
959 if (!has_bcc && item->email->bcc_address) {
960 fprintf(f_output, "Bcc: %s\n", item->email->bcc_address);
961 }
962
963 if (!has_date && item->email->sent_date) {
964 char c_time[C_TIME_SIZE];
965 strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time));
966 fprintf(f_output, "Date: %s\n", c_time);
967 }
968
969 // Now, write out the header...
970 soh = skip_header_prologue(item->email->header);
971 if (mode != MODE_SEPARATE) { 1018 if (mode != MODE_SEPARATE) {
972 // don't put rubbish in if we are doing separate 1019 // don't put rubbish in if we are doing separate
973 if (strncmp(soh, "X-From_: ", 9) == 0 ) { 1020 if (strncmp(soh, "X-From_: ", 9) == 0 ) {
974 fputs("From ", f_output); 1021 fputs("From ", f_output);
975 soh += 9; 1022 soh += 9;
976 } else 1023 } else
977 fprintf(f_output, "From \"%s\" %s\n", item->email->outlook_sender_name, c_time); 1024 fprintf(f_output, "From \"%s\" %s\n", item->email->outlook_sender_name, c_time);
978 } 1025 }
1026
1027 // make sure the headers end with a \n
979 fprintf(f_output, "%s", soh); 1028 fprintf(f_output, "%s", soh);
980 len = strlen(soh); 1029 len = strlen(soh);
981 if (!len || (soh[len-1] != '\n')) fprintf(f_output, "\n"); 1030 if (!len || (soh[len-1] != '\n')) fprintf(f_output, "\n");
982 1031
983 } else { 1032 } else {
989 } else { 1038 } else {
990 temp = "(readpst_null)"; 1039 temp = "(readpst_null)";
991 } 1040 }
992 fprintf(f_output, "From \"%s\" %s\n", temp, c_time); 1041 fprintf(f_output, "From \"%s\" %s\n", temp, c_time);
993 } 1042 }
994 1043 }
1044
1045 // create required header fields that are not already written
1046 if (!has_from) {
995 temp = item->email->outlook_sender; 1047 temp = item->email->outlook_sender;
996 if (!temp) temp = ""; 1048 if (!temp) temp = "";
997 fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp); 1049 fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp);
998 1050 }
1051
1052 if (!has_subject) {
999 if (item->email->subject && item->email->subject->subj) { 1053 if (item->email->subject && item->email->subject->subj) {
1000 fprintf(f_output, "Subject: %s\n", item->email->subject->subj); 1054 fprintf(f_output, "Subject: %s\n", item->email->subject->subj);
1001 } else { 1055 } else {
1002 fprintf(f_output, "Subject: \n"); 1056 fprintf(f_output, "Subject: \n");
1003 } 1057 }
1004 1058 }
1005 if (item->email->sentto_address) { 1059
1006 fprintf(f_output, "To: %s\n", item->email->sentto_address); 1060 if (!has_to && item->email->sentto_address) {
1007 } 1061 fprintf(f_output, "To: %s\n", item->email->sentto_address);
1008 1062 }
1009 if (item->email->cc_address) { 1063
1010 fprintf(f_output, "Cc: %s\n", item->email->cc_address); 1064 if (!has_cc && item->email->cc_address) {
1011 } 1065 fprintf(f_output, "Cc: %s\n", item->email->cc_address);
1012 1066 }
1013 if (item->email->sent_date) { 1067
1014 char c_time[C_TIME_SIZE]; 1068 if (!has_bcc && item->email->bcc_address) {
1015 strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); 1069 fprintf(f_output, "Bcc: %s\n", item->email->bcc_address);
1016 fprintf(f_output, "Date: %s\n", c_time); 1070 }
1017 } 1071
1018 } 1072 if (!has_date && item->email->sent_date) {
1019 1073 char c_time[C_TIME_SIZE];
1074 strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time));
1075 fprintf(f_output, "Date: %s\n", c_time);
1076 }
1077
1078 // add our own mime headers
1020 fprintf(f_output, "MIME-Version: 1.0\n"); 1079 fprintf(f_output, "MIME-Version: 1.0\n");
1021 if (boundary && boundary_created) { 1080 if (item->attach || (item->email->rtf_compressed && save_rtf)
1022 // if we created the boundary, then it has NOT already been printed 1081 || item->email->encrypted_body
1023 // in the headers above. 1082 || item->email->encrypted_htmlbody) {
1024 if (item->attach) { 1083 // use multipart/mixed if we have attachments
1025 // write the boundary stuff if we have attachments 1084 fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary);
1026 fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); 1085 } else {
1027 } else { 1086 // else use multipart/alternative
1028 // else we have multipart/alternative then tell it so 1087 fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary);
1029 fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); 1088 }
1030 } 1089 fprintf(f_output, "\n"); // end of headers, start of body
1031 } 1090
1032 fprintf(f_output, "\n"); // start the body 1091 // now dump the body parts
1033 DEBUG_EMAIL(("About to print Body\n"));
1034
1035 if (item->email->body) { 1092 if (item->email->body) {
1036 if (boundary) { 1093 write_body_part(f_output, item->email->body, "text/plain", body_charset, boundary);
1037 // try to find the charset for this body part
1038 const char *def = "utf-8";
1039 // it seems that if (item->email->body_charset) is set, then
1040 // we actually have utf8 plain body text. If that is not set
1041 // we have plain body text in an 8 bit charset specified in
1042 // the headers.
1043 char *c = my_stristr(item->email->header, "\nContent-Type:");
1044 if (c) {
1045 c++;
1046 char *n = my_stristr(c, "\n"); // termination on the content type
1047 if (n) {
1048 char *s = my_stristr(c, "; charset=");
1049 if (s && (s < n)) {
1050 char *e;
1051 s += 10; // skip over charset=
1052 if (*s == '"') {
1053 s++;
1054 e = my_stristr(s, "\"");
1055 }
1056 else {
1057 e = my_stristr(s, ";");
1058 }
1059 if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better
1060 *e = '\0'; // corrupt the header, but we have already printed it
1061 def = s;
1062 DEBUG_EMAIL(("body charset %s from headers\n", def));
1063 }
1064 }
1065 }
1066 fprintf(f_output, "\n--%s\n", boundary);
1067 fprintf(f_output, "Content-Type: text/plain; charset=\"%s\"\n", def);
1068 if (base64_body)
1069 fprintf(f_output, "Content-Transfer-Encoding: base64\n");
1070 fprintf(f_output, "\n");
1071 }
1072 else if (item->email->body_charset && (strcasecmp("utf-8",item->email->body_charset))) {
1073 // try to convert to the specified charset since it is not utf-8
1074 size_t rc;
1075 DEBUG_EMAIL(("Convert plain text utf-8 to %s\n", item->email->body_charset));
1076 vbuf *newer = vballoc(2);
1077 rc = vb_utf8to8bit(newer, item->email->body, strlen(item->email->body) + 1, item->email->body_charset);
1078 if (rc == (size_t)-1) {
1079 free(newer->b);
1080 DEBUG_EMAIL(("Failed to convert plain text utf-8 to %s\n", item->email->body_charset));
1081 }
1082 else {
1083 // unable to convert, maybe it is already in that character set
1084 free(item->email->body);
1085 item->email->body = newer->b;
1086 }
1087 free(newer);
1088 }
1089 removeCR(item->email->body);
1090 if (base64_body) {
1091 char *enc = base64_encode(item->email->body, strlen(item->email->body));
1092 if (enc) {
1093 write_email_body(f_output, enc);
1094 free(enc);
1095 }
1096 }
1097 else {
1098 write_email_body(f_output, item->email->body);
1099 }
1100 } 1094 }
1101 1095
1102 if (item->email->htmlbody) { 1096 if (item->email->htmlbody) {
1103 if (boundary) { 1097 find_html_charset(item->email->htmlbody, body_charset, sizeof(body_charset));
1104 const char *def = "utf-8"; 1098 write_body_part(f_output, item->email->htmlbody, "text/html", body_charset, boundary);
1105 if (item->email->body_charset) def = item->email->body_charset;
1106 fprintf(f_output, "\n--%s\n", boundary);
1107 fprintf(f_output, "Content-Type: text/html; charset=\"%s\"\n", def);
1108 if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n");
1109 fprintf(f_output, "\n");
1110 }
1111 removeCR(item->email->htmlbody);
1112 if (base64_body) {
1113 char *enc = base64_encode(item->email->htmlbody, strlen(item->email->htmlbody));
1114 if (enc) {
1115 write_email_body(f_output, enc);
1116 free(enc);
1117 }
1118 }
1119 else {
1120 write_email_body(f_output, item->email->htmlbody);
1121 }
1122 } 1099 }
1123 1100
1124 if (item->email->rtf_compressed && save_rtf) { 1101 if (item->email->rtf_compressed && save_rtf) {
1125 //int32_t tester;
1126 DEBUG_EMAIL(("Adding RTF body as attachment\n")); 1102 DEBUG_EMAIL(("Adding RTF body as attachment\n"));
1127 current_attach = (pst_item_attach*)xmalloc(sizeof(pst_item_attach)); 1103 current_attach = (pst_item_attach*)xmalloc(sizeof(pst_item_attach));
1128 memset(current_attach, 0, sizeof(pst_item_attach)); 1104 memset(current_attach, 0, sizeof(pst_item_attach));
1129 current_attach->next = item->attach; 1105 current_attach->next = item->attach;
1130 item->attach = current_attach; 1106 item->attach = current_attach;
1131 current_attach->data = lzfu_decompress(item->email->rtf_compressed, item->email->rtf_compressed_size, &current_attach->size); 1107 current_attach->data = lzfu_decompress(item->email->rtf_compressed, item->email->rtf_compressed_size, &current_attach->size);
1132 current_attach->filename2 = xmalloc(strlen(RTF_ATTACH_NAME)+2); 1108 current_attach->filename2 = xmalloc(strlen(RTF_ATTACH_NAME)+2);
1133 strcpy(current_attach->filename2, RTF_ATTACH_NAME); 1109 strcpy(current_attach->filename2, RTF_ATTACH_NAME);
1134 current_attach->mimetype = xmalloc(strlen(RTF_ATTACH_TYPE)+2); 1110 current_attach->mimetype = xmalloc(strlen(RTF_ATTACH_TYPE)+2);
1135 strcpy(current_attach->mimetype, RTF_ATTACH_TYPE); 1111 strcpy(current_attach->mimetype, RTF_ATTACH_TYPE);
1136 //memcpy(&tester, item->email->rtf_compressed+sizeof(int32_t), sizeof(int32_t));
1137 //LE32_CPU(tester);
1138 //printf("lz produced %d bytes, rtf claims %d bytes\n", current_attach->size, tester);
1139 } 1112 }
1140 1113
1141 if (item->email->encrypted_body || item->email->encrypted_htmlbody) { 1114 if (item->email->encrypted_body || item->email->encrypted_htmlbody) {
1142 // if either the body or htmlbody is encrypted, add them as attachments 1115 // if either the body or htmlbody is encrypted, add them as attachments
1143 if (item->email->encrypted_body) { 1116 if (item->email->encrypted_body) {
1162 item->email->encrypted_htmlbody = NULL; 1135 item->email->encrypted_htmlbody = NULL;
1163 } 1136 }
1164 write_email_body(f_output, "The body of this email is encrypted. This isn't supported yet, but the body is now an attachment\n"); 1137 write_email_body(f_output, "The body of this email is encrypted. This isn't supported yet, but the body is now an attachment\n");
1165 } 1138 }
1166 1139
1167 // attachments 1140 // other attachments
1168 attach_num = 0; 1141 attach_num = 0;
1169 for (current_attach = item->attach; current_attach; current_attach = current_attach->next) { 1142 for (current_attach = item->attach; current_attach; current_attach = current_attach->next) {
1170 DEBUG_EMAIL(("Attempting Attachment encoding\n")); 1143 DEBUG_EMAIL(("Attempting Attachment encoding\n"));
1171 if (!current_attach->data) { 1144 if (!current_attach->data) {
1172 DEBUG_EMAIL(("Data of attachment is NULL!. Size is supposed to be %i\n", current_attach->size)); 1145 DEBUG_EMAIL(("Data of attachment is NULL!. Size is supposed to be %i\n", current_attach->size));
1176 else 1149 else
1177 write_inline_attachment(f_output, current_attach, boundary, pst); 1150 write_inline_attachment(f_output, current_attach, boundary, pst);
1178 } 1151 }
1179 if (mode != MODE_SEPARATE) { /* do not add a boundary after the last attachment for mode_MH */ 1152 if (mode != MODE_SEPARATE) { /* do not add a boundary after the last attachment for mode_MH */
1180 DEBUG_EMAIL(("Writing buffer between emails\n")); 1153 DEBUG_EMAIL(("Writing buffer between emails\n"));
1181 if (boundary) fprintf(f_output, "\n--%s--\n", boundary); 1154 fprintf(f_output, "\n--%s--\n", boundary);
1182 fprintf(f_output, "\n\n"); 1155 fprintf(f_output, "\n\n");
1183 } 1156 }
1184 if (boundary) free (boundary);
1185 DEBUG_RET(); 1157 DEBUG_RET();
1186 } 1158 }
1187 1159
1188 1160
1189 void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]) 1161 void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[])