diff src/readpst.c @ 116:ed2a260bbb98 stable-0-6-25

improve handling of content-type charset values in mime parts
author Carl Byington <carl@five-ten-sg.com>
date Fri, 16 Jan 2009 15:23:52 -0800
parents 7133b39975f7
children 0f1492b7fe8b
line wrap: on
line diff
--- a/src/readpst.c	Thu Dec 11 12:06:03 2008 -0800
+++ b/src/readpst.c	Fri Jan 16 15:23:52 2009 -0800
@@ -6,6 +6,7 @@
  */
 #include "define.h"
 #include "libstrfunc.h"
+#include "vbuf.h"
 #include "libpst.h"
 #include "common.h"
 #include "timeconv.h"
@@ -762,11 +763,11 @@
         char *attach_filename;
         fprintf(f_output, "\n--%s\n", boundary);
         if (!current_attach->mimetype) {
-            fprintf(f_output, "Content-type: %s\n", MIME_TYPE_DEFAULT);
+            fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT);
         } else {
-            fprintf(f_output, "Content-type: %s\n", current_attach->mimetype);
+            fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype);
         }
-        fprintf(f_output, "Content-transfer-encoding: base64\n");
+        fprintf(f_output, "Content-Transfer-Encoding: base64\n");
         // If there is a long filename (filename2) use that, otherwise
         // use the 8.3 filename (filename1)
         if (current_attach->filename2) {
@@ -822,7 +823,7 @@
         // see if there is a boundary variable there
         // this search MUST be made case insensitive (DONE).
         // Also, we should check to find out if we are looking
-        // at the boundary associated with content-type, and that
+        // at the boundary associated with Content-Type, and that
         // the content type really is multipart
 
         removeCR(item->email->header);
@@ -1024,12 +1025,10 @@
         // in the headers above.
         if (item->attach) {
             // write the boundary stuff if we have attachments
-            fprintf(f_output, "Content-type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary);
-        } else if (boundary) {
-            // else if we have multipart/alternative then tell it so
-            fprintf(f_output, "Content-type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary);
-        } else if (item->email->htmlbody) {
-            fprintf(f_output, "Content-type: text/html\n");
+            fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary);
+        } else {
+            // else we have multipart/alternative then tell it so
+            fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary);
         }
     }
     fprintf(f_output, "\n");    // start the body
@@ -1037,12 +1036,57 @@
 
     if (item->email->body) {
         if (boundary) {
+            // try to find the charset for this body part
+            const char *def = "utf-8";
+            // it seems that if (item->email->body_charset) is set, then
+            // we actually have utf8 plain body text. If that is not set
+            // we have plain body text in an 8 bit charset specified in
+            // the headers.
+            char *c = my_stristr(item->email->header, "\nContent-Type:");
+            if (c) {
+                c++;
+                char *n = my_stristr(c, "\n");  // termination on the content type
+                if (n) {
+                    char *s = my_stristr(c, "; charset=");
+                    if (s && (s < n)) {
+                        char *e;
+                        s += 10;    // skip over charset=
+                        if (*s == '"') {
+                            s++;
+                            e = my_stristr(s, "\"");
+                        }
+                        else {
+                            e = my_stristr(s, ";");
+                        }
+                        if (!e || (e > n)) e = n;   // use the trailing lf as terminator if nothing better
+                        *e = '\0';      // corrupt the header, but we have already printed it
+                        def = s;
+                        DEBUG_EMAIL(("body charset %s from headers\n", def));
+                    }
+                }
+            }
             fprintf(f_output, "\n--%s\n", boundary);
-            fprintf(f_output, "Content-type: text/plain\n");
+            fprintf(f_output, "Content-Type: text/plain; charset=\"%s\"\n", def);
             if (base64_body)
                 fprintf(f_output, "Content-Transfer-Encoding: base64\n");
             fprintf(f_output, "\n");
         }
+        else if (item->email->body_charset && (strcasecmp("utf-8",item->email->body_charset))) {
+            // try to convert to the specified charset since it is not utf-8
+            size_t rc;
+            DEBUG_EMAIL(("Convert plain text utf-8 to %s\n", item->email->body_charset));
+            vbuf *newer = vballoc(2);
+            rc = vb_utf8to8bit(newer, item->email->body, strlen(item->email->body) + 1, item->email->body_charset);
+            if (rc == (size_t)-1) {
+                free(newer->b);
+                DEBUG_EMAIL(("Failed to convert plain text utf-8 to %s\n", item->email->body_charset));
+            }
+            else {
+                free(item->email->body);
+                item->email->body = newer->b;
+            }
+            free(newer);
+        }
         removeCR(item->email->body);
         if (base64_body) {
             char *enc = base64_encode(item->email->body, strlen(item->email->body));
@@ -1058,8 +1102,10 @@
 
     if (item->email->htmlbody) {
         if (boundary) {
+            const char *def = "utf-8";
+            if (item->email->body_charset) def = item->email->body_charset;
             fprintf(f_output, "\n--%s\n", boundary);
-            fprintf(f_output, "Content-type: text/html\n");
+            fprintf(f_output, "Content-Type: text/html; charset=\"%s\"\n", def);
             if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n");
             fprintf(f_output, "\n");
         }