comparison src/readpst.c @ 116:ed2a260bbb98 stable-0-6-25

improve handling of content-type charset values in mime parts
author Carl Byington <carl@five-ten-sg.com>
date Fri, 16 Jan 2009 15:23:52 -0800
parents 7133b39975f7
children 0f1492b7fe8b
comparison
equal deleted inserted replaced
115:7689c006b166 116:ed2a260bbb98
4 * Written by David Smith 4 * Written by David Smith
5 * dave.s@earthcorp.com 5 * dave.s@earthcorp.com
6 */ 6 */
7 #include "define.h" 7 #include "define.h"
8 #include "libstrfunc.h" 8 #include "libstrfunc.h"
9 #include "vbuf.h"
9 #include "libpst.h" 10 #include "libpst.h"
10 #include "common.h" 11 #include "common.h"
11 #include "timeconv.h" 12 #include "timeconv.h"
12 #include "lzfu.h" 13 #include "lzfu.h"
13 14
760 } 761 }
761 if (boundary) { 762 if (boundary) {
762 char *attach_filename; 763 char *attach_filename;
763 fprintf(f_output, "\n--%s\n", boundary); 764 fprintf(f_output, "\n--%s\n", boundary);
764 if (!current_attach->mimetype) { 765 if (!current_attach->mimetype) {
765 fprintf(f_output, "Content-type: %s\n", MIME_TYPE_DEFAULT); 766 fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT);
766 } else { 767 } else {
767 fprintf(f_output, "Content-type: %s\n", current_attach->mimetype); 768 fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype);
768 } 769 }
769 fprintf(f_output, "Content-transfer-encoding: base64\n"); 770 fprintf(f_output, "Content-Transfer-Encoding: base64\n");
770 // If there is a long filename (filename2) use that, otherwise 771 // If there is a long filename (filename2) use that, otherwise
771 // use the 8.3 filename (filename1) 772 // use the 8.3 filename (filename1)
772 if (current_attach->filename2) { 773 if (current_attach->filename2) {
773 attach_filename = current_attach->filename2; 774 attach_filename = current_attach->filename2;
774 } else { 775 } else {
820 if (item->email->header ) { 821 if (item->email->header ) {
821 char *b1, *b2; 822 char *b1, *b2;
822 // see if there is a boundary variable there 823 // see if there is a boundary variable there
823 // this search MUST be made case insensitive (DONE). 824 // this search MUST be made case insensitive (DONE).
824 // Also, we should check to find out if we are looking 825 // Also, we should check to find out if we are looking
825 // at the boundary associated with content-type, and that 826 // at the boundary associated with Content-Type, and that
826 // the content type really is multipart 827 // the content type really is multipart
827 828
828 removeCR(item->email->header); 829 removeCR(item->email->header);
829 830
830 if ((b2 = my_stristr(item->email->header, "boundary="))) { 831 if ((b2 = my_stristr(item->email->header, "boundary="))) {
1022 if (boundary && boundary_created) { 1023 if (boundary && boundary_created) {
1023 // if we created the boundary, then it has NOT already been printed 1024 // if we created the boundary, then it has NOT already been printed
1024 // in the headers above. 1025 // in the headers above.
1025 if (item->attach) { 1026 if (item->attach) {
1026 // write the boundary stuff if we have attachments 1027 // write the boundary stuff if we have attachments
1027 fprintf(f_output, "Content-type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); 1028 fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary);
1028 } else if (boundary) { 1029 } else {
1029 // else if we have multipart/alternative then tell it so 1030 // else we have multipart/alternative then tell it so
1030 fprintf(f_output, "Content-type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); 1031 fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary);
1031 } else if (item->email->htmlbody) {
1032 fprintf(f_output, "Content-type: text/html\n");
1033 } 1032 }
1034 } 1033 }
1035 fprintf(f_output, "\n"); // start the body 1034 fprintf(f_output, "\n"); // start the body
1036 DEBUG_EMAIL(("About to print Body\n")); 1035 DEBUG_EMAIL(("About to print Body\n"));
1037 1036
1038 if (item->email->body) { 1037 if (item->email->body) {
1039 if (boundary) { 1038 if (boundary) {
1039 // try to find the charset for this body part
1040 const char *def = "utf-8";
1041 // it seems that if (item->email->body_charset) is set, then
1042 // we actually have utf8 plain body text. If that is not set
1043 // we have plain body text in an 8 bit charset specified in
1044 // the headers.
1045 char *c = my_stristr(item->email->header, "\nContent-Type:");
1046 if (c) {
1047 c++;
1048 char *n = my_stristr(c, "\n"); // termination on the content type
1049 if (n) {
1050 char *s = my_stristr(c, "; charset=");
1051 if (s && (s < n)) {
1052 char *e;
1053 s += 10; // skip over charset=
1054 if (*s == '"') {
1055 s++;
1056 e = my_stristr(s, "\"");
1057 }
1058 else {
1059 e = my_stristr(s, ";");
1060 }
1061 if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better
1062 *e = '\0'; // corrupt the header, but we have already printed it
1063 def = s;
1064 DEBUG_EMAIL(("body charset %s from headers\n", def));
1065 }
1066 }
1067 }
1040 fprintf(f_output, "\n--%s\n", boundary); 1068 fprintf(f_output, "\n--%s\n", boundary);
1041 fprintf(f_output, "Content-type: text/plain\n"); 1069 fprintf(f_output, "Content-Type: text/plain; charset=\"%s\"\n", def);
1042 if (base64_body) 1070 if (base64_body)
1043 fprintf(f_output, "Content-Transfer-Encoding: base64\n"); 1071 fprintf(f_output, "Content-Transfer-Encoding: base64\n");
1044 fprintf(f_output, "\n"); 1072 fprintf(f_output, "\n");
1073 }
1074 else if (item->email->body_charset && (strcasecmp("utf-8",item->email->body_charset))) {
1075 // try to convert to the specified charset since it is not utf-8
1076 size_t rc;
1077 DEBUG_EMAIL(("Convert plain text utf-8 to %s\n", item->email->body_charset));
1078 vbuf *newer = vballoc(2);
1079 rc = vb_utf8to8bit(newer, item->email->body, strlen(item->email->body) + 1, item->email->body_charset);
1080 if (rc == (size_t)-1) {
1081 free(newer->b);
1082 DEBUG_EMAIL(("Failed to convert plain text utf-8 to %s\n", item->email->body_charset));
1083 }
1084 else {
1085 free(item->email->body);
1086 item->email->body = newer->b;
1087 }
1088 free(newer);
1045 } 1089 }
1046 removeCR(item->email->body); 1090 removeCR(item->email->body);
1047 if (base64_body) { 1091 if (base64_body) {
1048 char *enc = base64_encode(item->email->body, strlen(item->email->body)); 1092 char *enc = base64_encode(item->email->body, strlen(item->email->body));
1049 if (enc) { 1093 if (enc) {
1056 } 1100 }
1057 } 1101 }
1058 1102
1059 if (item->email->htmlbody) { 1103 if (item->email->htmlbody) {
1060 if (boundary) { 1104 if (boundary) {
1105 const char *def = "utf-8";
1106 if (item->email->body_charset) def = item->email->body_charset;
1061 fprintf(f_output, "\n--%s\n", boundary); 1107 fprintf(f_output, "\n--%s\n", boundary);
1062 fprintf(f_output, "Content-type: text/html\n"); 1108 fprintf(f_output, "Content-Type: text/html; charset=\"%s\"\n", def);
1063 if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); 1109 if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n");
1064 fprintf(f_output, "\n"); 1110 fprintf(f_output, "\n");
1065 } 1111 }
1066 removeCR(item->email->htmlbody); 1112 removeCR(item->email->htmlbody);
1067 if (base64_body) { 1113 if (base64_body) {