# HG changeset patch # User Carl Byington # Date 1293247565 28800 # Node ID c947b8812120a2f7c152e9d82f9adc3f0d04752d # Parent a863de65e5b80c80476ab9cfb472e521a21a763b rfc2047 and rfc2231 encoding for non-ascii headers and attachment filenames diff -r a863de65e5b8 -r c947b8812120 ChangeLog --- a/ChangeLog Mon Sep 13 15:10:52 2010 -0700 +++ b/ChangeLog Fri Dec 24 19:26:05 2010 -0800 @@ -1,3 +1,7 @@ +LibPST 0.6.50 (2010-12-24) +=============================== + * rfc2047 and rfc2231 encoding for non-ascii headers and attachment filenames + LibPST 0.6.49 (2010-09-13) =============================== * fix to ignore embedded objects that are not email messages diff -r a863de65e5b8 -r c947b8812120 NEWS --- a/NEWS Mon Sep 13 15:10:52 2010 -0700 +++ b/NEWS Fri Dec 24 19:26:05 2010 -0800 @@ -1,3 +1,4 @@ +0.6.50 2010-12-24 rfc2047 and rfc2231 encoding for non-ascii headers and attachment filenames 0.6.49 2010-09-13 fix to ignore embedded objects that are not email messages 0.6.48 2010-09-02 fix for broken internet headers from Outlook, change to mboxrd quoting 0.6.47 2010-05-07 patches from Kenneth Berland for solaris diff -r a863de65e5b8 -r c947b8812120 configure.in --- a/configure.in Mon Sep 13 15:10:52 2010 -0700 +++ b/configure.in Fri Dec 24 19:26:05 2010 -0800 @@ -1,5 +1,5 @@ AC_PREREQ(2.59) -AC_INIT(libpst,0.6.49,carl@five-ten-sg.com) +AC_INIT(libpst,0.6.50,carl@five-ten-sg.com) AC_CONFIG_SRCDIR([src/libpst.c]) AC_CONFIG_HEADER([config.h]) AM_INIT_AUTOMAKE @@ -19,7 +19,7 @@ # 6. libtool will build libpst.so.x.y.z where the SONAME is libpst.so.x # and x=current-age, y=age, z=revision -libpst_version_info='4:4:0' +libpst_version_info='5:0:1' AC_SUBST(LIBPST_VERSION_INFO, [$libpst_version_info]) libpst_so_major='4' AC_SUBST(LIBPST_SO_MAJOR, [$libpst_so_major]) @@ -34,6 +34,7 @@ # 0.6.47 libpst.so.4 libpst.so.4.0.2 # 0.6.48 libpst.so.4 libpst.so.4.0.3 # 0.6.49 libpst.so.4 libpst.so.4.0.4 +# 0.6.50 libpst.so.4 libpst.so.4.1.0 diff -r a863de65e5b8 -r c947b8812120 libpst.spec.in --- a/libpst.spec.in Mon Sep 13 15:10:52 2010 -0700 +++ b/libpst.spec.in Fri Dec 24 19:26:05 2010 -0800 @@ -147,6 +147,10 @@ %changelog +* Fri Dec 24 2010 Carl Byington - 0.6.50-1 +- rfc2047 and rfc2231 encoding for non-ascii headers and + attachment filenames. + * Mon Sep 13 2010 Carl Byington - 0.6.49-1 - fix to ignore embedded objects that are not email messages fedora bugzilla 633498 diff -r a863de65e5b8 -r c947b8812120 regression/regression-tests.bash --- a/regression/regression-tests.bash Mon Sep 13 15:10:52 2010 -0700 +++ b/regression/regression-tests.bash Fri Dec 24 19:26:05 2010 -0800 @@ -101,30 +101,31 @@ [ "$2" == "reg" ] && regression="yes" [ "$regression" == "yes" ] && val="" -$func 1 ams.pst -$func 2 sample_64.pst -$func 3 test.pst -$func 4 big_mail.pst -$func 5 mbmg.archive.pst -$func 6 Single2003-read.pst -$func 7 Single2003-unread.pst -$func 8 ol2k3high.pst -$func 9 ol97high.pst -$func 10 returned_message.pst -$func 11 flow.pst -$func 12 test-html.pst -$func 13 test-text.pst -$func 14 joe.romanowski.pst -$func 15 hourig1.pst -$func 16 test-mac.pst -$func 17 harris.pst -$func 18 spam.pst -$func 19 rendgen.pst # single email appointment -$func 20 rendgen2.pst # email appointment with no termination date -$func 21 rendgen3.pst # mime signed email -$func 22 rendgen4.pst # appointment test cases -$func 23 rendgen5.pst # appointment test cases -$func 24 paul.sheer.pst # embedded rfc822 attachment +#$func 1 ams.pst +#$func 2 sample_64.pst +#$func 3 test.pst +#$func 4 big_mail.pst +#$func 5 mbmg.archive.pst +#$func 6 Single2003-read.pst +#$func 7 Single2003-unread.pst +##$func 8 ol2k3high.pst +##$func 9 ol97high.pst +#$func 10 returned_message.pst +##$func 11 flow.pst +#$func 12 test-html.pst +#$func 13 test-text.pst +#$func 14 joe.romanowski.pst +#$func 15 hourig1.pst +#$func 16 test-mac.pst +##$func 17 harris.pst +##$func 18 spam.pst +#$func 19 rendgen.pst # single email appointment +#$func 20 rendgen2.pst # email appointment with no termination date +#$func 21 rendgen3.pst # mime signed email +#$func 22 rendgen4.pst # appointment test cases +#$func 23 rendgen5.pst # appointment test cases +#$func 24 paul.sheer.pst # embedded rfc822 attachment +$func 25 jerry.pst # non ascii subject lines [ -n "$val" ] && grep 'lost:' *err | grep -v 'lost: 0 ' diff -r a863de65e5b8 -r c947b8812120 src/libpst.c --- a/src/libpst.c Mon Sep 13 15:10:52 2010 -0700 +++ b/src/libpst.c Fri Dec 24 19:26:05 2010 -0800 @@ -4385,6 +4385,76 @@ } +/** Convert str to rfc2231 encoding of str + * + * @param item pointer to the containing mapi item + * @param str pointer to the mapi string of interest + */ +void pst_rfc2231(pst_string *str) { + int needs = 0; + const int8_t *x = (int8_t *)str->str; + while (*x) { + if (*x <= 32) needs++; + x++; + } + int n = strlen(str->str) + 2*needs + 15; + char *buffer = pst_malloc(n); + strcpy(buffer, "utf-8''"); + x = (int8_t *)str->str; + const uint8_t *y = (uint8_t *)str->str; + uint8_t *z = (uint8_t *)buffer; + z += strlen(buffer); // skip the utf8 prefix + while (*y) { + if (*x <= 32) { + *(z++) = (uint8_t)'%'; + snprintf(z, 3, "%2x", *y); + z += 2; + } + else { + *(z++) = *y; + } + x++; + y++; + } + *z = '\0'; + free(str->str); + str->str = buffer; +} + + +/** Convert str to rfc2047 encoding of str, possibly enclosed in quotes if it contains spaces + * + * @param item pointer to the containing mapi item + * @param str pointer to the mapi string of interest + */ +void pst_rfc2047(pst_item *item, pst_string *str, int needs_quote) { + int has_space = 0; + int needs_coding = 0; + pst_convert_utf8(item, str); + const int8_t *x = (int8_t *)str->str; + while (*x) { + if (*x == 32) has_space = 1; + if (*x < 32) needs_coding = 1; + x++; + } + if (needs_coding) { + char *enc = pst_base64_encode_single(str->str, strlen(str->str)); + free(str->str); + int n = strlen(enc) + 20; + str->str = pst_malloc(n); + snprintf(str->str, n, "=?utf-8?B?%s?=", enc); + free(enc); + } + else if (has_space && needs_quote) { + int n = strlen(str->str) + 10; + char *buffer = pst_malloc(n); + snprintf(buffer, n, "\"%s\"", str->str); + free(str->str); + str->str = buffer; + } +} + + /** Convert str to utf8 if possible; null strings are preserved. * * @param item pointer to the containing mapi item diff -r a863de65e5b8 -r c947b8812120 src/libpst.h --- a/src/libpst.h Mon Sep 13 15:10:52 2010 -0700 +++ b/src/libpst.h Fri Dec 24 19:26:05 2010 -0800 @@ -1070,6 +1070,20 @@ const char* pst_default_charset(pst_item *item, int buflen, char* result); +/** Convert str to rfc2231 encoding of str + * @param item pointer to the containing mapi item + * @param str pointer to the mapi string of interest + */ +void pst_rfc2231(pst_string *str); + + +/** Convert str to rfc2047 encoding of str, possibly enclosed in quotes if it contains spaces + * @param item pointer to the containing mapi item + * @param str pointer to the mapi string of interest + */ +void pst_rfc2047(pst_item *item, pst_string *str, int needs_quote); + + /** Convert str to utf8 if possible; null strings are preserved. * @param item pointer to the containing mapi item * @param str pointer to the mapi string of interest diff -r a863de65e5b8 -r c947b8812120 src/libstrfunc.c --- a/src/libstrfunc.c Mon Sep 13 15:10:52 2010 -0700 +++ b/src/libstrfunc.c Fri Dec 24 19:26:05 2010 -0800 @@ -14,7 +14,7 @@ *line_count = 0; } *(*ou)++ = data; - (*line_count)++; + if (*line_count >= 0) (*line_count)++; } @@ -25,6 +25,13 @@ } +char *pst_base64_encode_single(void *data, size_t size) +{ + int line_count = -1; + return pst_base64_encode_multiple(data, size, &line_count); +} + + char *pst_base64_encode_multiple(void *data, size_t size, int *line_count) { char *output; diff -r a863de65e5b8 -r c947b8812120 src/libstrfunc.h --- a/src/libstrfunc.h Mon Sep 13 15:10:52 2010 -0700 +++ b/src/libstrfunc.h Fri Dec 24 19:26:05 2010 -0800 @@ -5,6 +5,7 @@ #include "common.h" char *pst_base64_encode(void *data, size_t size); +char *pst_base64_encode_single(void *data, size_t size); char *pst_base64_encode_multiple(void *data, size_t size, int *line_count); #endif diff -r a863de65e5b8 -r c947b8812120 src/readpst.c --- a/src/readpst.c Mon Sep 13 15:10:52 2010 -0700 +++ b/src/readpst.c Fri Dec 24 19:26:05 2010 -0800 @@ -813,7 +813,7 @@ if (y == 0) snprintf(dir_name, dirsize, "%s", dir); else - snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y, ""); // enough for 9 digits allocated above + snprintf(dir_name, dirsize, "%s" SEP_MAIL_FILE_TEMPLATE, dir, y, ""); // enough for 9 digits allocated above check_filename(dir_name); DEBUG_INFO(("about to try creating %s\n", dir_name)); @@ -1050,13 +1050,19 @@ } fprintf(f_output, "Content-Transfer-Encoding: base64\n"); - // If there is a long filename (filename2) use that, otherwise - // use the 8.3 filename (filename1) - attach_filename = (attach->filename2.str) ? attach->filename2.str : attach->filename1.str; - if (!attach_filename) { + if (attach->filename2.str) { + // use the long filename, converted to proper encoding if needed. + // it is already utf8 + pst_rfc2231(&attach->filename2); + fprintf(f_output, "Content-Disposition: attachment; \n filename*=%s\n\n", attach->filename2.str); + } + else if (attach->filename1.str) { + // short filename never needs encoding + fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach->filename1.str); + } + else { + // no filename is inline fprintf(f_output, "Content-Disposition: inline\n\n"); - } else { - fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach_filename); } (void)pst_attach_to_file_base64(pst, attach, f_output); @@ -1154,7 +1160,7 @@ int b64 = 0; uint8_t *b = (uint8_t *)body; DEBUG_ENT("test_base64"); - while (*b != 0) { + while (*b) { if ((*b < 32) && (*b != 9) && (*b != 10)) { DEBUG_INFO(("found base64 byte %d\n", (int)*b)); DEBUG_HEXDUMPC(body, strlen(body), 0x10); @@ -1453,6 +1459,18 @@ fprintf(f_output, "%s", headers); // make sure the headers end with a \n if (headers[len-1] != '\n') fprintf(f_output, "\n"); + //char *h = headers; + //while (*h) { + // char *e = strchr(h, '\n'); + // int d = 1; // normally e points to trailing \n + // if (!e) { + // e = h + strlen(h); // e points to trailing null + // d = 0; + // } + // // we could do rfc2047 encoding here if needed + // fprintf(f_output, "%.*s\n", (int)(e-h), h); + // h = e + d; + //} } } @@ -1460,7 +1478,8 @@ if (!has_from) { if (item->email->outlook_sender_name.str){ - fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name.str, sender); + pst_rfc2047(item, &item->email->outlook_sender_name, 1); + fprintf(f_output, "From: %s <%s>\n", item->email->outlook_sender_name.str, sender); } else { fprintf(f_output, "From: <%s>\n", sender); } @@ -1468,6 +1487,7 @@ if (!has_subject) { if (item->subject.str) { + pst_rfc2047(item, &item->subject, 0); fprintf(f_output, "Subject: %s\n", item->subject.str); } else { fprintf(f_output, "Subject: \n"); @@ -1475,12 +1495,12 @@ } if (!has_to && item->email->sentto_address.str) { - pst_convert_utf8(item, &item->email->sentto_address); + pst_rfc2047(item, &item->email->sentto_address, 0); fprintf(f_output, "To: %s\n", item->email->sentto_address.str); } if (!has_cc && item->email->cc_address.str) { - pst_convert_utf8(item, &item->email->cc_address); + pst_rfc2047(item, &item->email->cc_address, 0); fprintf(f_output, "Cc: %s\n", item->email->cc_address.str); }