# HG changeset patch # User Carl Byington # Date 1233544907 28800 # Node ID 8399ef94c11b9fba0c6d6bc709bd82f4a200af3a # Parent 6395ced2b8b2b57d0c697b287247d534d28cbc6c strip and regenerate all MIME headers to avoid duplicates. do a better job of making unique MIME boundaries. only use base64 coding when strictly necessary. diff -r 6395ced2b8b2 -r 8399ef94c11b ChangeLog --- a/ChangeLog Sun Feb 01 11:24:22 2009 -0800 +++ b/ChangeLog Sun Feb 01 19:21:47 2009 -0800 @@ -1,4 +1,4 @@ -LibPST 0.6.26 (2009-01-31) +LibPST 0.6.26 (2009-02-01) =============================== * patch from Fridrich Strba for building on mingw and general cleanup of autoconf files @@ -9,8 +9,11 @@ * more const correctness issues regarding getopt() * disable building pst2dii on cygwin, since the convert program on the path is part of windows, not image magic. - * consistent ordering of our include files. all systems includes + * consistent ordering of our include files. all system includes protected by ifdef HAVE_ from autoconf. + * strip and regenerate all MIME headers to avoid duplicates. + * do a better job of making unique MIME boundaries. + * only use base64 coding when strictly necessary. LibPST 0.6.25 (2009-01-16) =============================== diff -r 6395ced2b8b2 -r 8399ef94c11b NEWS --- a/NEWS Sun Feb 01 11:24:22 2009 -0800 +++ b/NEWS Sun Feb 01 19:21:47 2009 -0800 @@ -1,4 +1,4 @@ -0.6.26 2009-01-31 patch from Fridrich Strba for building on mingw, and autoconf cleanup +0.6.26 2009-02-01 patch from Fridrich Strba for building on mingw, and autoconf cleanup, better mime headers 0.6.25 2009-01-16 improve handling of content-type charset values in mime parts 0.6.24 2008-12-11 patch from Chris Eagle to build on cygwin 0.6.23 2008-12-04 bump version to avoid cvs tagging mistake in fedora diff -r 6395ced2b8b2 -r 8399ef94c11b configure.in --- a/configure.in Sun Feb 01 11:24:22 2009 -0800 +++ b/configure.in Sun Feb 01 19:21:47 2009 -0800 @@ -84,7 +84,7 @@ ) AC_HEADER_DIRENT AC_HEADER_STDC -AC_CHECK_HEADERS([ctype.h dirent.h errno.h fcntl.h inttypes.h limits.h signal.h stdarg.h stdint.h stdio.h stdlib.h string.h sys/param.h sys/stat.h sys/types.h time.h unistd.h wchar.h]) +AC_CHECK_HEADERS([ctype.h dirent.h errno.h fcntl.h inttypes.h limits.h regex.h signal.h stdarg.h stdint.h stdio.h stdlib.h string.h sys/param.h sys/stat.h sys/types.h time.h unistd.h wchar.h]) # Checks for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL diff -r 6395ced2b8b2 -r 8399ef94c11b libpst.spec.in --- a/libpst.spec.in Sun Feb 01 11:24:22 2009 -0800 +++ b/libpst.spec.in Sun Feb 01 19:21:47 2009 -0800 @@ -47,10 +47,13 @@ %changelog -* Sat Jan 31 2009 Carl Byington - 0.6.26-1 +* Sun Feb 01 2009 Carl Byington - 0.6.26-1 - patch from Fridrich Strba for building on mingw and general -- cleanup of autoconf files. +- cleanup of autoconf files - add processing for pst files of type 0x0f +- strip and regenerate all MIME headers to avoid duplicates +- do a better job of making unique MIME boundaries +- only use base64 coding when strictly necessary * Fri Jan 16 2009 Carl Byington - 0.6.25-1 - improve handling of content-type charset values in mime parts diff -r 6395ced2b8b2 -r 8399ef94c11b regression/regression-tests.bash --- a/regression/regression-tests.bash Sun Feb 01 11:24:22 2009 -0800 +++ b/regression/regression-tests.bash Sun Feb 01 19:21:47 2009 -0800 @@ -45,22 +45,23 @@ dodii 3 test.pst dodii 4 big_mail.pst else - #dopst 1 ams.pst - #dopst 2 sample_64.pst - #dopst 3 test.pst - #dopst 4 big_mail.pst - #dopst 5 mbmg.archive.pst - #dopst 6 Single2003-read.pst - #dopst 7 Single2003-unread.pst - #dopst 8 ol2k3high.pst - #dopst 9 ol97high.pst - #dopst 10 returned_message.pst - #dopst 11 flow.pst - #dopst 12 test-html.pst - #dopst 13 test-text.pst - #dopst 14 joe.romanowski.pst - #dopst 15 hourig1.pst + dopst 1 ams.pst + dopst 2 sample_64.pst + dopst 3 test.pst + dopst 4 big_mail.pst + dopst 5 mbmg.archive.pst + dopst 6 Single2003-read.pst + dopst 7 Single2003-unread.pst + dopst 8 ol2k3high.pst + dopst 9 ol97high.pst + dopst 10 returned_message.pst + dopst 11 flow.pst + dopst 12 test-html.pst + dopst 13 test-text.pst + dopst 14 joe.romanowski.pst + dopst 15 hourig1.pst dopst 16 hourig2.pst dopst 17 hourig3.pst + dopst 18 test-mac.pst fi diff -r 6395ced2b8b2 -r 8399ef94c11b src/readpst.c --- a/src/readpst.c Sun Feb 01 11:24:22 2009 -0800 +++ b/src/readpst.c Sun Feb 01 19:21:47 2009 -0800 @@ -12,6 +12,10 @@ #include "libstrfunc.h" #include "vbuf.h" +#ifdef HAVE_REGEX_H + #include +#endif + #define OUTPUT_TEMPLATE "%s" #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory" #define KMAIL_INDEX ".%s.index" @@ -32,7 +36,7 @@ void process(pst_item *outeritem, pst_desc_ll *d_ptr); void write_email_body(FILE *f, char *body); -char* removeCR (char *c); +void removeCR(char *c); void usage(); void version(); char* mk_kmail_dir(char*); @@ -44,9 +48,14 @@ int mk_separate_file(struct file_ll *f); char* my_stristr(char *haystack, char *needle); void check_filename(char *fname); -char* skip_header_prologue(char *headers); void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst); -void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char boundary[], pst_file* pst); +void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char *boundary, pst_file* pst); +void header_has_field(char *header, char *field, int *flag); +char* header_get_field(char *header, char *field); +void header_strip_field(char *header, char *field); +int test_base64(char *body); +void find_html_charset(char *html, char *charset, size_t charsetlen); +void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary); void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf); void write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]); void write_appointment(FILE* f_output, pst_item_appointment* appointment, @@ -111,7 +120,7 @@ int overwrite = 0; int save_rtf_body = 1; pst_file pstfile; - +regex_t meta_charset_pattern; void process(pst_item *outeritem, pst_desc_ll *d_ptr) @@ -228,6 +237,14 @@ char *temp = NULL; //temporary char pointer prog_name = argv[0]; + time_t now = time(NULL); + srand((unsigned)now); + + if (regcomp(&meta_charset_pattern, "]*content=\"[^>]*charset=([^>\";]*)[\";]", REG_ICASE | REG_EXTENDED)) { + printf("cannot compile regex pattern\n"); + exit(3); + } + // command-line option handling while ((c = getopt(argc, argv, "bCc:Dd:hko:qrSMVw"))!= -1) { switch (c) { @@ -377,6 +394,7 @@ pst_freeItem(item); pst_close(&pstfile); DEBUG_RET(); + regfree(&meta_charset_pattern); return 0; } @@ -399,20 +417,18 @@ } -char *removeCR (char *c) { - // converts /r/n to /n +void removeCR (char *c) { + // converts \r\n to \n char *a, *b; DEBUG_ENT("removeCR"); a = b = c; while (*a != '\0') { *b = *a; - if (*a != '\r') - b++; + if (*a != '\r') b++; a++; } *b = '\0'; DEBUG_RET(); - return c; } @@ -642,9 +658,7 @@ char *my_stristr(char *haystack, char *needle) { // my_stristr varies from strstr in that its searches are case-insensitive char *x=haystack, *y=needle, *z = NULL; - DEBUG_ENT("my_stristr"); if (!haystack || !needle) { - DEBUG_RET(); return NULL; } while (*y != '\0' && *x != '\0') { @@ -660,7 +674,6 @@ } x++; // advance the search in the haystack } - DEBUG_RET(); // If the haystack ended before our search finished, it's not a match. if (*y != '\0') return NULL; return z; @@ -682,20 +695,6 @@ } -// The sole purpose of this function is to bypass the pseudo-header prologue -// that Microsoft Outlook inserts at the beginning of the internet email -// headers for emails stored in their "Personal Folders" files. -char *skip_header_prologue(char *headers) { - const char *bad = "Microsoft Mail Internet Headers"; - if (strncmp(headers, bad, strlen(bad)) == 0) { - // Found the offensive header prologue - char *pc = strchr(headers, '\n'); - return pc + 1; - } - return headers; -} - - void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst) { FILE *fp = NULL; @@ -743,8 +742,9 @@ } -void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char boundary[], pst_file* pst) +void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char *boundary, pst_file* pst) { + char *attach_filename; char *enc = NULL; // base64 encoded attachment DEBUG_ENT("write_inline_attachment"); DEBUG_EMAIL(("Attachment Size is %i\n", current_attach->size)); @@ -757,28 +757,27 @@ return; } } - if (boundary) { - char *attach_filename; - fprintf(f_output, "\n--%s\n", boundary); - if (!current_attach->mimetype) { - fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT); - } else { - fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype); - } - fprintf(f_output, "Content-Transfer-Encoding: base64\n"); - // If there is a long filename (filename2) use that, otherwise - // use the 8.3 filename (filename1) - if (current_attach->filename2) { - attach_filename = current_attach->filename2; - } else { - attach_filename = current_attach->filename1; - } - if (!attach_filename) { - fprintf(f_output, "Content-Disposition: inline\n\n"); - } else { - fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach_filename); - } + + fprintf(f_output, "\n--%s\n", boundary); + if (!current_attach->mimetype) { + fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT); + } else { + fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype); } + fprintf(f_output, "Content-Transfer-Encoding: base64\n"); + // If there is a long filename (filename2) use that, otherwise + // use the 8.3 filename (filename1) + if (current_attach->filename2) { + attach_filename = current_attach->filename2; + } else { + attach_filename = current_attach->filename1; + } + if (!attach_filename) { + fprintf(f_output, "Content-Disposition: inline\n\n"); + } else { + fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach_filename); + } + if (current_attach->data) { pst_fwrite(enc, 1, strlen(enc), f_output); DEBUG_EMAIL(("Attachment Size after encoding is %i\n", strlen(enc))); @@ -791,12 +790,139 @@ } +void header_has_field(char *header, char *field, int *flag) +{ + if (my_stristr(header, field) || (strncasecmp(header, field+1, strlen(field)-1) == 0)) { + DEBUG_EMAIL(("header block has %s header\n", field+1)); + *flag = 1; + } +} + + +char* header_get_field(char *header, char *field) +{ + char *t = my_stristr(header, field); + if (!t && (strncasecmp(header, field+1, strlen(field)-1) == 0)) t = header; + return t; +} + + +void header_strip_field(char *header, char *field) +{ + char *e; + char *t = header_get_field(header, field); + if (t) { + e = strchr(t+1, '\n'); + while (e && ((e[1] == ' ') || (e[1] == '\t'))) { + e = strchr(e+1, '\n'); + } + if (e) { + if (t == header) e++; // if *t is not \n, we don't want to keep the \n at *e either. + while (*e != '\0') { + *t = *e; + t++; + e++; + } + *t = '\0'; + } + else { + // this was the last header field, truncate the headers + *t = '\0'; + } + } +} + + +int test_base64(char *body) +{ + int b64 = 0; + uint8_t *b = (uint8_t *)body; + while (*b != 0) { + if ((*b < 32) && (*b != 9) && (*b != 10)) { + DEBUG_EMAIL(("found base64 byte %d\n", (int)*b)); + DEBUG_HEXDUMPC(body, strlen(body), 0x10); + b64 = 1; + break; + } + b++; + } + return b64; +} + + +void find_html_charset(char *html, char *charset, size_t charsetlen) +{ + const int index = 1; + const int nmatch = index+1; + regmatch_t match[nmatch]; + int rc = regexec(&meta_charset_pattern, html, nmatch, match, 0); + if (rc == 0) { + int s = match[index].rm_so; + int e = match[index].rm_eo; + if (s != -1) { + char save = html[e]; + html[e] = '\0'; + snprintf(charset, charsetlen, "%s", html+s); // copy the html charset + html[e] = save; + DEBUG_EMAIL(("charset %s from html text\n", charset)); + } + else { + DEBUG_EMAIL(("matching %d %d %d %d", match[0].rm_so, match[0].rm_eo, match[1].rm_so, match[1].rm_eo)); + DEBUG_HEXDUMPC(html, strlen(html), 0x10); + } + } + else { + DEBUG_EMAIL(("regexec returns %d\n", rc)); + } +} + + +void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary) +{ + char *needfree = NULL; + if (strcasecmp("utf-8", charset)) { + // try to convert to the specified charset since it is not utf-8 + size_t rc; + DEBUG_EMAIL(("Convert %s utf-8 to %s\n", mime, charset)); + vbuf *newer = vballoc(2); + rc = vb_utf8to8bit(newer, body, strlen(body) + 1, charset); + if (rc == (size_t)-1) { + // unable to convert, maybe it is already in that character set + free(newer->b); + DEBUG_EMAIL(("Failed to convert %s utf-8 to %s\n", mime, charset)); + } + else { + needfree = body = newer->b; + } + free(newer); + } + removeCR(body); + int base64 = test_base64(body); + fprintf(f_output, "\n--%s\n", boundary); + fprintf(f_output, "Content-Type: %s; charset=\"%s\"\n", mime, charset); + if (base64) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); + fprintf(f_output, "\n"); + if (base64) { + char *enc = base64_encode(body, strlen(body)); + if (enc) { + write_email_body(f_output, enc); + fprintf(f_output, "\n"); + free(enc); + } + } + else { + write_email_body(f_output, body); + } + if (needfree) free(needfree); +} + + void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf) { - char *boundary = NULL; // the boundary marker between multipart sections - int boundary_created = 0; // we have not (yet) created a new boundary + char boundary[60]; + char body_charset[60]; char *temp = NULL; - int attach_num, base64_body = 0; + int attach_num; time_t em_time; char *c_time; pst_item_attach* current_attach; @@ -804,6 +930,9 @@ has_from = has_subject = has_to = has_cc = has_bcc = has_date = 0; DEBUG_ENT("write_normal_email"); + // setup default body character set + snprintf(body_charset, sizeof(body_charset), "%s", (item->email->body_charset) ? item->email->body_charset : "utf-8"); + // convert the sent date if it exists, or set it to a fixed date if (item->email->sent_date) { em_time = fileTimeToUnixTime(item->email->sent_date, 0); @@ -815,101 +944,65 @@ } else c_time= "Fri Dec 28 12:06:21 2001"; - // we will always look at the header to discover some stuff + // create our MIME boundary here. + snprintf(boundary, sizeof(boundary), "--boundary-LibPST-iamunique-%i_-_-", rand()); + + // we will always look at the headers to discover some stuff if (item->email->header ) { - char *b1, *b2; - // see if there is a boundary variable there - // this search MUST be made case insensitive (DONE). - // Also, we should check to find out if we are looking - // at the boundary associated with Content-Type, and that - // the content type really is multipart - + char *t; removeCR(item->email->header); - if ((b2 = my_stristr(item->email->header, "boundary="))) { - int len; - b2 += strlen("boundary="); // move boundary to first char of marker - - if (*b2 == '"') { - b2++; - b1 = strchr(b2, '"'); // find terminating quote - } else { - b1 = b2; - while (isgraph(*b1)) // find first char that isn't part of boundary - b1++; - } - len = b1 - b2; - boundary = malloc(len+1); //malloc that length - strncpy(boundary, b2, len); // copy boundary to another variable - boundary[len] = '\0'; - b1 = b2 = boundary; - while (*b2 != '\0') { // remove any CRs and Tabs - if (*b2 != '\n' && *b2 != '\r' && *b2 != '\t') { - *b1 = *b2; - b1++; - } - b2++; - } - *b1 = '\0'; - - DEBUG_EMAIL(("Found boundary of - %s\n", boundary)); - } else { - DEBUG_EMAIL(("boundary not found in header\n")); + // some of the headers we get from the file are not properly defined. + // they can contain some email stuff too. We will cut off the header + // when we see a \n\n + temp = strstr(item->email->header, "\n\n"); + if (temp) { + temp[1] = '\0'; // stop after first \n + DEBUG_EMAIL(("Found body text in header %s\n", temp+2)); } - // also possible to set 7bit encoding detection here. - if ((b2 = my_stristr(item->email->header, "Content-Transfer-Encoding:"))) { - if ((b2 = strchr(b2, ':'))) { - b2++; // skip to the : at the end of the string + // Check if the headers have all the necessary fields + header_has_field(item->email->header, "\nFrom: ", &has_from); + header_has_field(item->email->header, "\nTo: ", &has_to); + header_has_field(item->email->header, "\nSubject: ", &has_subject); + header_has_field(item->email->header, "\nDate: ", &has_date); + header_has_field(item->email->header, "\nCC: ", &has_cc); + header_has_field(item->email->header, "\nBCC: ", &has_bcc); - while (*b2 == ' ' || *b2 == '\t') - b2++; - if (pst_strincmp(b2, "base64", 6)==0) { - DEBUG_EMAIL(("body is base64 encoded\n")); - base64_body = 1; + // look for charset in Content-Type header + t = header_get_field(item->email->header, "\nContent-Type: "); + if (t) { + // assume charset= will be on the first line, rather than on a continuation line + t++; + char *n = strchr(t, '\n'); + char *s = my_stristr(t, "; charset="); + if (n && s && (s < n)) { + char *e; + char save; + s += 10; // skip over charset= + if (*s == '"') { + s++; + e = strchr(s, '"'); } - } else { - DEBUG_WARN(("found a ':' during the my_stristr, but not after that..\n")); + else { + e = strchr(s, ';'); + } + if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better + save = *e; + *e = '\0'; + snprintf(body_charset, sizeof(body_charset), "%s", s); // copy the charset to our buffer + *e = save; + DEBUG_EMAIL(("body charset %s from headers\n", body_charset)); } } - // Check if the header block has all the necessary headers. - if (my_stristr(item->email->header, "\nFrom:") || (strncasecmp(item->email->header, "From: ", 6) == 0) || my_stristr(item->email->header, "\nX-From:")) { - DEBUG_EMAIL(("header block has From header\n")); - has_from = 1; - } - if (my_stristr(item->email->header, "\nTo:") || (strncasecmp(item->email->header, "To: ", 4) == 0)) { - DEBUG_EMAIL(("header block has To header\n")); - has_to = 1; - } - if (my_stristr(item->email->header, "\nSubject:") || (strncasecmp(item->email->header, "Subject: ", 9) == 0)) { - DEBUG_EMAIL(("header block has Subject header\n")); - has_subject = 1; - } - if (my_stristr(item->email->header, "\nDate:") || (strncasecmp(item->email->header, "Date: ", 6) == 0)) { - DEBUG_EMAIL(("header block has Date header\n")); - has_date = 1; - } - if (my_stristr(item->email->header, "\nCC:") || (strncasecmp(item->email->header, "CC: ", 4) == 0)) { - DEBUG_EMAIL(("header block has CC header\n")); - has_cc = 1; - } - if (my_stristr(item->email->header, "\nBCC:") || (strncasecmp(item->email->header, "BCC: ", 5) == 0)) { - DEBUG_EMAIL(("header block has BCC header\n")); - has_bcc = 1; - } - } - - if (!boundary && (item->attach || (item->email->body && item->email->htmlbody) - || item->email->rtf_compressed || item->email->encrypted_body - || item->email->encrypted_htmlbody)) { - // we need to create a boundary here. - DEBUG_EMAIL(("must create own boundary. oh dear.\n")); - boundary = malloc(50 * sizeof(char)); // allow 50 chars for boundary - boundary[0] = '\0'; - sprintf(boundary, "--boundary-LibPST-iamunique-%i_-_-", rand()); - DEBUG_EMAIL(("created boundary is %s\n", boundary)); - boundary_created = 1; + // Strip out the mime headers and some others that we don't want to emit + header_strip_field(item->email->header, "\nMicrosoft Mail Internet Headers"); + header_strip_field(item->email->header, "\nMIME-Version: "); + header_strip_field(item->email->header, "\nContent-Type: "); + header_strip_field(item->email->header, "\nContent-Transfer-Encoding: "); + header_strip_field(item->email->header, "\nContent-class: "); + header_strip_field(item->email->header, "\nX-MimeOLE: "); } DEBUG_EMAIL(("About to print Header\n")); @@ -920,54 +1013,8 @@ if (item->email->header) { int len; - char *soh = NULL; // real start of headers. - - // some of the headers we get from the file are not properly defined. - // they can contain some email stuff too. We will cut off the header - // when we see a \n\n or \r\n\r\n - removeCR(item->email->header); - temp = strstr(item->email->header, "\n\n"); - - if (temp) { - DEBUG_EMAIL(("Found body text in header\n")); - temp[1] = '\0'; // stop after first \n - } - - // Write out any fields that weren't included in the header. - if (!has_from) { - temp = item->email->outlook_sender; - if (!temp) temp = ""; - fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp); - } + char *soh = item->email->header; - if (!has_subject) { - if (item->email->subject && item->email->subject->subj) { - fprintf(f_output, "Subject: %s\n", item->email->subject->subj); - } else { - fprintf(f_output, "Subject: \n"); - } - } - - if (!has_to && item->email->sentto_address) { - fprintf(f_output, "To: %s\n", item->email->sentto_address); - } - - if (!has_cc && item->email->cc_address) { - fprintf(f_output, "Cc: %s\n", item->email->cc_address); - } - - if (!has_bcc && item->email->bcc_address) { - fprintf(f_output, "Bcc: %s\n", item->email->bcc_address); - } - - if (!has_date && item->email->sent_date) { - char c_time[C_TIME_SIZE]; - strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); - fprintf(f_output, "Date: %s\n", c_time); - } - - // Now, write out the header... - soh = skip_header_prologue(item->email->header); if (mode != MODE_SEPARATE) { // don't put rubbish in if we are doing separate if (strncmp(soh, "X-From_: ", 9) == 0 ) { @@ -976,6 +1023,8 @@ } else fprintf(f_output, "From \"%s\" %s\n", item->email->outlook_sender_name, c_time); } + + // make sure the headers end with a \n fprintf(f_output, "%s", soh); len = strlen(soh); if (!len || (soh[len-1] != '\n')) fprintf(f_output, "\n"); @@ -991,138 +1040,65 @@ } fprintf(f_output, "From \"%s\" %s\n", temp, c_time); } + } + // create required header fields that are not already written + if (!has_from) { temp = item->email->outlook_sender; if (!temp) temp = ""; fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp); + } + if (!has_subject) { if (item->email->subject && item->email->subject->subj) { fprintf(f_output, "Subject: %s\n", item->email->subject->subj); } else { fprintf(f_output, "Subject: \n"); } + } - if (item->email->sentto_address) { - fprintf(f_output, "To: %s\n", item->email->sentto_address); - } + if (!has_to && item->email->sentto_address) { + fprintf(f_output, "To: %s\n", item->email->sentto_address); + } - if (item->email->cc_address) { - fprintf(f_output, "Cc: %s\n", item->email->cc_address); - } + if (!has_cc && item->email->cc_address) { + fprintf(f_output, "Cc: %s\n", item->email->cc_address); + } - if (item->email->sent_date) { - char c_time[C_TIME_SIZE]; - strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); - fprintf(f_output, "Date: %s\n", c_time); - } + if (!has_bcc && item->email->bcc_address) { + fprintf(f_output, "Bcc: %s\n", item->email->bcc_address); } + if (!has_date && item->email->sent_date) { + char c_time[C_TIME_SIZE]; + strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); + fprintf(f_output, "Date: %s\n", c_time); + } + + // add our own mime headers fprintf(f_output, "MIME-Version: 1.0\n"); - if (boundary && boundary_created) { - // if we created the boundary, then it has NOT already been printed - // in the headers above. - if (item->attach) { - // write the boundary stuff if we have attachments - fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); - } else { - // else we have multipart/alternative then tell it so - fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); - } + if (item->attach || (item->email->rtf_compressed && save_rtf) + || item->email->encrypted_body + || item->email->encrypted_htmlbody) { + // use multipart/mixed if we have attachments + fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); + } else { + // else use multipart/alternative + fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); } - fprintf(f_output, "\n"); // start the body - DEBUG_EMAIL(("About to print Body\n")); + fprintf(f_output, "\n"); // end of headers, start of body + // now dump the body parts if (item->email->body) { - if (boundary) { - // try to find the charset for this body part - const char *def = "utf-8"; - // it seems that if (item->email->body_charset) is set, then - // we actually have utf8 plain body text. If that is not set - // we have plain body text in an 8 bit charset specified in - // the headers. - char *c = my_stristr(item->email->header, "\nContent-Type:"); - if (c) { - c++; - char *n = my_stristr(c, "\n"); // termination on the content type - if (n) { - char *s = my_stristr(c, "; charset="); - if (s && (s < n)) { - char *e; - s += 10; // skip over charset= - if (*s == '"') { - s++; - e = my_stristr(s, "\""); - } - else { - e = my_stristr(s, ";"); - } - if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better - *e = '\0'; // corrupt the header, but we have already printed it - def = s; - DEBUG_EMAIL(("body charset %s from headers\n", def)); - } - } - } - fprintf(f_output, "\n--%s\n", boundary); - fprintf(f_output, "Content-Type: text/plain; charset=\"%s\"\n", def); - if (base64_body) - fprintf(f_output, "Content-Transfer-Encoding: base64\n"); - fprintf(f_output, "\n"); - } - else if (item->email->body_charset && (strcasecmp("utf-8",item->email->body_charset))) { - // try to convert to the specified charset since it is not utf-8 - size_t rc; - DEBUG_EMAIL(("Convert plain text utf-8 to %s\n", item->email->body_charset)); - vbuf *newer = vballoc(2); - rc = vb_utf8to8bit(newer, item->email->body, strlen(item->email->body) + 1, item->email->body_charset); - if (rc == (size_t)-1) { - free(newer->b); - DEBUG_EMAIL(("Failed to convert plain text utf-8 to %s\n", item->email->body_charset)); - } - else { - // unable to convert, maybe it is already in that character set - free(item->email->body); - item->email->body = newer->b; - } - free(newer); - } - removeCR(item->email->body); - if (base64_body) { - char *enc = base64_encode(item->email->body, strlen(item->email->body)); - if (enc) { - write_email_body(f_output, enc); - free(enc); - } - } - else { - write_email_body(f_output, item->email->body); - } + write_body_part(f_output, item->email->body, "text/plain", body_charset, boundary); } if (item->email->htmlbody) { - if (boundary) { - const char *def = "utf-8"; - if (item->email->body_charset) def = item->email->body_charset; - fprintf(f_output, "\n--%s\n", boundary); - fprintf(f_output, "Content-Type: text/html; charset=\"%s\"\n", def); - if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); - fprintf(f_output, "\n"); - } - removeCR(item->email->htmlbody); - if (base64_body) { - char *enc = base64_encode(item->email->htmlbody, strlen(item->email->htmlbody)); - if (enc) { - write_email_body(f_output, enc); - free(enc); - } - } - else { - write_email_body(f_output, item->email->htmlbody); - } + find_html_charset(item->email->htmlbody, body_charset, sizeof(body_charset)); + write_body_part(f_output, item->email->htmlbody, "text/html", body_charset, boundary); } if (item->email->rtf_compressed && save_rtf) { - //int32_t tester; DEBUG_EMAIL(("Adding RTF body as attachment\n")); current_attach = (pst_item_attach*)xmalloc(sizeof(pst_item_attach)); memset(current_attach, 0, sizeof(pst_item_attach)); @@ -1133,9 +1109,6 @@ strcpy(current_attach->filename2, RTF_ATTACH_NAME); current_attach->mimetype = xmalloc(strlen(RTF_ATTACH_TYPE)+2); strcpy(current_attach->mimetype, RTF_ATTACH_TYPE); - //memcpy(&tester, item->email->rtf_compressed+sizeof(int32_t), sizeof(int32_t)); - //LE32_CPU(tester); - //printf("lz produced %d bytes, rtf claims %d bytes\n", current_attach->size, tester); } if (item->email->encrypted_body || item->email->encrypted_htmlbody) { @@ -1164,7 +1137,7 @@ write_email_body(f_output, "The body of this email is encrypted. This isn't supported yet, but the body is now an attachment\n"); } - // attachments + // other attachments attach_num = 0; for (current_attach = item->attach; current_attach; current_attach = current_attach->next) { DEBUG_EMAIL(("Attempting Attachment encoding\n")); @@ -1178,10 +1151,9 @@ } if (mode != MODE_SEPARATE) { /* do not add a boundary after the last attachment for mode_MH */ DEBUG_EMAIL(("Writing buffer between emails\n")); - if (boundary) fprintf(f_output, "\n--%s--\n", boundary); + fprintf(f_output, "\n--%s--\n", boundary); fprintf(f_output, "\n\n"); } - if (boundary) free (boundary); DEBUG_RET(); }