Mercurial > libpst
comparison src/readpst.c @ 116:ed2a260bbb98 stable-0-6-25
improve handling of content-type charset values in mime parts
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Fri, 16 Jan 2009 15:23:52 -0800 |
parents | 7133b39975f7 |
children | 0f1492b7fe8b |
comparison
equal
deleted
inserted
replaced
115:7689c006b166 | 116:ed2a260bbb98 |
---|---|
4 * Written by David Smith | 4 * Written by David Smith |
5 * dave.s@earthcorp.com | 5 * dave.s@earthcorp.com |
6 */ | 6 */ |
7 #include "define.h" | 7 #include "define.h" |
8 #include "libstrfunc.h" | 8 #include "libstrfunc.h" |
9 #include "vbuf.h" | |
9 #include "libpst.h" | 10 #include "libpst.h" |
10 #include "common.h" | 11 #include "common.h" |
11 #include "timeconv.h" | 12 #include "timeconv.h" |
12 #include "lzfu.h" | 13 #include "lzfu.h" |
13 | 14 |
760 } | 761 } |
761 if (boundary) { | 762 if (boundary) { |
762 char *attach_filename; | 763 char *attach_filename; |
763 fprintf(f_output, "\n--%s\n", boundary); | 764 fprintf(f_output, "\n--%s\n", boundary); |
764 if (!current_attach->mimetype) { | 765 if (!current_attach->mimetype) { |
765 fprintf(f_output, "Content-type: %s\n", MIME_TYPE_DEFAULT); | 766 fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT); |
766 } else { | 767 } else { |
767 fprintf(f_output, "Content-type: %s\n", current_attach->mimetype); | 768 fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype); |
768 } | 769 } |
769 fprintf(f_output, "Content-transfer-encoding: base64\n"); | 770 fprintf(f_output, "Content-Transfer-Encoding: base64\n"); |
770 // If there is a long filename (filename2) use that, otherwise | 771 // If there is a long filename (filename2) use that, otherwise |
771 // use the 8.3 filename (filename1) | 772 // use the 8.3 filename (filename1) |
772 if (current_attach->filename2) { | 773 if (current_attach->filename2) { |
773 attach_filename = current_attach->filename2; | 774 attach_filename = current_attach->filename2; |
774 } else { | 775 } else { |
820 if (item->email->header ) { | 821 if (item->email->header ) { |
821 char *b1, *b2; | 822 char *b1, *b2; |
822 // see if there is a boundary variable there | 823 // see if there is a boundary variable there |
823 // this search MUST be made case insensitive (DONE). | 824 // this search MUST be made case insensitive (DONE). |
824 // Also, we should check to find out if we are looking | 825 // Also, we should check to find out if we are looking |
825 // at the boundary associated with content-type, and that | 826 // at the boundary associated with Content-Type, and that |
826 // the content type really is multipart | 827 // the content type really is multipart |
827 | 828 |
828 removeCR(item->email->header); | 829 removeCR(item->email->header); |
829 | 830 |
830 if ((b2 = my_stristr(item->email->header, "boundary="))) { | 831 if ((b2 = my_stristr(item->email->header, "boundary="))) { |
1022 if (boundary && boundary_created) { | 1023 if (boundary && boundary_created) { |
1023 // if we created the boundary, then it has NOT already been printed | 1024 // if we created the boundary, then it has NOT already been printed |
1024 // in the headers above. | 1025 // in the headers above. |
1025 if (item->attach) { | 1026 if (item->attach) { |
1026 // write the boundary stuff if we have attachments | 1027 // write the boundary stuff if we have attachments |
1027 fprintf(f_output, "Content-type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); | 1028 fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); |
1028 } else if (boundary) { | 1029 } else { |
1029 // else if we have multipart/alternative then tell it so | 1030 // else we have multipart/alternative then tell it so |
1030 fprintf(f_output, "Content-type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); | 1031 fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); |
1031 } else if (item->email->htmlbody) { | |
1032 fprintf(f_output, "Content-type: text/html\n"); | |
1033 } | 1032 } |
1034 } | 1033 } |
1035 fprintf(f_output, "\n"); // start the body | 1034 fprintf(f_output, "\n"); // start the body |
1036 DEBUG_EMAIL(("About to print Body\n")); | 1035 DEBUG_EMAIL(("About to print Body\n")); |
1037 | 1036 |
1038 if (item->email->body) { | 1037 if (item->email->body) { |
1039 if (boundary) { | 1038 if (boundary) { |
1039 // try to find the charset for this body part | |
1040 const char *def = "utf-8"; | |
1041 // it seems that if (item->email->body_charset) is set, then | |
1042 // we actually have utf8 plain body text. If that is not set | |
1043 // we have plain body text in an 8 bit charset specified in | |
1044 // the headers. | |
1045 char *c = my_stristr(item->email->header, "\nContent-Type:"); | |
1046 if (c) { | |
1047 c++; | |
1048 char *n = my_stristr(c, "\n"); // termination on the content type | |
1049 if (n) { | |
1050 char *s = my_stristr(c, "; charset="); | |
1051 if (s && (s < n)) { | |
1052 char *e; | |
1053 s += 10; // skip over charset= | |
1054 if (*s == '"') { | |
1055 s++; | |
1056 e = my_stristr(s, "\""); | |
1057 } | |
1058 else { | |
1059 e = my_stristr(s, ";"); | |
1060 } | |
1061 if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better | |
1062 *e = '\0'; // corrupt the header, but we have already printed it | |
1063 def = s; | |
1064 DEBUG_EMAIL(("body charset %s from headers\n", def)); | |
1065 } | |
1066 } | |
1067 } | |
1040 fprintf(f_output, "\n--%s\n", boundary); | 1068 fprintf(f_output, "\n--%s\n", boundary); |
1041 fprintf(f_output, "Content-type: text/plain\n"); | 1069 fprintf(f_output, "Content-Type: text/plain; charset=\"%s\"\n", def); |
1042 if (base64_body) | 1070 if (base64_body) |
1043 fprintf(f_output, "Content-Transfer-Encoding: base64\n"); | 1071 fprintf(f_output, "Content-Transfer-Encoding: base64\n"); |
1044 fprintf(f_output, "\n"); | 1072 fprintf(f_output, "\n"); |
1073 } | |
1074 else if (item->email->body_charset && (strcasecmp("utf-8",item->email->body_charset))) { | |
1075 // try to convert to the specified charset since it is not utf-8 | |
1076 size_t rc; | |
1077 DEBUG_EMAIL(("Convert plain text utf-8 to %s\n", item->email->body_charset)); | |
1078 vbuf *newer = vballoc(2); | |
1079 rc = vb_utf8to8bit(newer, item->email->body, strlen(item->email->body) + 1, item->email->body_charset); | |
1080 if (rc == (size_t)-1) { | |
1081 free(newer->b); | |
1082 DEBUG_EMAIL(("Failed to convert plain text utf-8 to %s\n", item->email->body_charset)); | |
1083 } | |
1084 else { | |
1085 free(item->email->body); | |
1086 item->email->body = newer->b; | |
1087 } | |
1088 free(newer); | |
1045 } | 1089 } |
1046 removeCR(item->email->body); | 1090 removeCR(item->email->body); |
1047 if (base64_body) { | 1091 if (base64_body) { |
1048 char *enc = base64_encode(item->email->body, strlen(item->email->body)); | 1092 char *enc = base64_encode(item->email->body, strlen(item->email->body)); |
1049 if (enc) { | 1093 if (enc) { |
1056 } | 1100 } |
1057 } | 1101 } |
1058 | 1102 |
1059 if (item->email->htmlbody) { | 1103 if (item->email->htmlbody) { |
1060 if (boundary) { | 1104 if (boundary) { |
1105 const char *def = "utf-8"; | |
1106 if (item->email->body_charset) def = item->email->body_charset; | |
1061 fprintf(f_output, "\n--%s\n", boundary); | 1107 fprintf(f_output, "\n--%s\n", boundary); |
1062 fprintf(f_output, "Content-type: text/html\n"); | 1108 fprintf(f_output, "Content-Type: text/html; charset=\"%s\"\n", def); |
1063 if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); | 1109 if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); |
1064 fprintf(f_output, "\n"); | 1110 fprintf(f_output, "\n"); |
1065 } | 1111 } |
1066 removeCR(item->email->htmlbody); | 1112 removeCR(item->email->htmlbody); |
1067 if (base64_body) { | 1113 if (base64_body) { |