libpst: src/readpst.c comparison

comparison src/readpst.c @ 345:a8577226f7a9

fixes from AJ Shankar for attachment processing and body encodings that contain embedded null chars

author	Carl Byington <carl@five-ten-sg.com>
date	Mon, 09 Mar 2015 10:47:58 -0700
parents	aedcf979f439
children	a57c15b3108a

comparison

equal deleted inserted replaced

-:aedcf979f439
+:a8577226f7a9
 void      header_has_field(char *header, char *field, int *flag);
 void      header_get_subfield(char *field, const char *subfield, char *body_subfield, size_t size_subfield);
 char*     header_get_field(char *header, char *field);
 char*     header_end_field(char *field);
 void      header_strip_field(char *header, char *field);
-int       test_base64(char *body);
+int       test_base64(char *body, size_t len);
 void      find_html_charset(char *html, char *charset, size_t charsetlen);
 void      find_rfc822_headers(char** extra_mime_headers);
 void      write_body_part(FILE* f_output, pst_string *body, char *mime, char *charset, char *boundary, pst_file* pst);
 void      write_schedule_part_data(FILE* f_output, pst_item* item, const char* sender, const char* method);
 void      write_schedule_part(FILE* f_output, pst_item* item, const char* sender, const char* boundary);
 int         contact_mode = CMODE_VCARD;
 int         deleted_mode = DMODE_EXCLUDE;
 int         output_type_mode = 0xff;    // Default to all.
 int         contact_mode_specified = 0;
 int         overwrite = 0;
+int         prefer_utf8 = 0;
 int         save_rtf_body = 1;
 int         file_name_len = 10;     // enough room for MODE_SPEARATE file name
 pst_file    pstfile;
 regex_t     meta_charset_pattern;
 char*       default_charset = NULL;
 printf("cannot compile regex pattern to find content charset in html bodies\n");
 exit(3);
 }
 // command-line option handling
-while ((c = getopt(argc, argv, "a:bC:c:Dd:emhj:kMo:qrSt:uVw"))!= -1) {
+while ((c = getopt(argc, argv, "a:bC:c:Dd:emhj:kMo:qrSt:uVw8"))!= -1) {
 switch (c) {
 case 'a':
 if (optarg) {
 int n = strlen(optarg);
 acceptable_extensions = (char*)pst_malloc(n+2);
 version();
 exit(0);
 break;
 case 'w':
 overwrite = 1;
+break;
+case '8':
+prefer_utf8 = 1;
 break;
 default:
 usage();
 exit(1);
 break;
 printf("\t-q\t- Quiet. Only print error messages\n");
 printf("\t-r\t- Recursive. Output in a recursive format\n");
 printf("\t-t[eajc]\t- Set the output type list. e = email, a = attachment, j = journal, c = contact\n");
 printf("\t-u\t- Thunderbird mode. Write two extra .size and .type files\n");
 printf("\t-w\t- Overwrite any output mbox files\n");
+printf("\t-8\t- Output bodies in UTF-8, rather than original encoding, if UTF-8 version is available\n");
 printf("\n");
 printf("Only one of -M -S -e -k -m -r should be specified\n");
 DEBUG_RET();
 }
 if (e && f && (f < e)) e = f;
 }
 if (!e || (e > n)) e = n;   // use the trailing lf as terminator if nothing better
 save = *e;
 *e = '\0';
 snprintf(body_subfield, size_subfield, "%s", s);  // copy the subfield to our buffer
 *e = save;
 DEBUG_INFO(("body %s %s from headers\n", subfield, body_subfield));
 }
 DEBUG_RET();
 }
 }
 }
 }
-int  test_base64(char *body)
+int  test_base64(char *body, size_t len)
 {
 int b64 = 0;
 uint8_t *b = (uint8_t *)body;
 DEBUG_ENT("test_base64");
-while (*b) {
+while (len--) {
 if ((*b < 32) && (*b != 9) && (*b != 10)) {
 DEBUG_INFO(("found base64 byte %d\n", (int)*b));
 DEBUG_HEXDUMPC(body, strlen(body), 0x10);
 b64 = 1;
 break;
 void write_body_part(FILE* f_output, pst_string *body, char *mime, char *charset, char *boundary, pst_file* pst)
 {
 DEBUG_ENT("write_body_part");
+removeCR(body->str);
+size_t body_len = strlen(body->str);
 if (body->is_utf8 && (strcasecmp("utf-8", charset))) {
-// try to convert to the specified charset since the target
+if (prefer_utf8) {
-// is not utf-8, and the data came from a unicode (utf16) field
-// and is now in utf-8.
-size_t rc;
-DEBUG_INFO(("Convert %s utf-8 to %s\n", mime, charset));
-pst_vbuf *newer = pst_vballoc(2);
-rc = pst_vb_utf8to8bit(newer, body->str, strlen(body->str), charset);
-if (rc == (size_t)-1) {
-// unable to convert, change the charset to utf8
-free(newer->b);
-DEBUG_INFO(("Failed to convert %s utf-8 to %s\n", mime, charset));
 charset = "utf-8";
-}
+} else {
-else {
+// try to convert to the specified charset since the target
-// null terminate the output string
+// is not utf-8, and the data came from a unicode (utf16) field
-pst_vbgrow(newer, 1);
+// and is now in utf-8.
-newer->b[newer->dlen] = '\0';
+size_t rc;
-free(body->str);
+DEBUG_INFO(("Convert %s utf-8 to %s\n", mime, charset));
-body->str = newer->b;
+pst_vbuf *newer = pst_vballoc(2);
-}
+rc = pst_vb_utf8to8bit(newer, body->str, body_len, charset);
-free(newer);
+if (rc == (size_t)-1) {
-}
+// unable to convert, change the charset to utf8
-removeCR(body->str);
+free(newer->b);
-int base64 = test_base64(body->str);
+DEBUG_INFO(("Failed to convert %s utf-8 to %s\n", mime, charset));
+charset = "utf-8";
+} else {
+// null terminate the output string
+pst_vbgrow(newer, 1);
+newer->b[newer->dlen] = '\0';
+free(body->str);
+body->str = newer->b;
+body_len = newer->dlen;
+}
+free(newer);
+}
+}
+int base64 = test_base64(body->str, body_len);
 fprintf(f_output, "\n--%s\n", boundary);
 fprintf(f_output, "Content-Type: %s; charset=\"%s\"\n", mime, charset);
 if (base64) fprintf(f_output, "Content-Transfer-Encoding: base64\n");
 fprintf(f_output, "\n");
+// Any body that uses an encoding with NULLs, e.g. UTF16, will be base64-encoded here.
 if (base64) {
-char *enc = pst_base64_encode(body->str, strlen(body->str));
+char *enc = pst_base64_encode(body->str, body_len);
 if (enc) {
 write_email_body(f_output, enc);
 fprintf(f_output, "\n");
 free(enc);
 }
 em_time = pst_fileTimeToUnixTime(item->email->sent_date);
 c_time = ctime(&em_time);
 if (c_time)
 c_time[strlen(c_time)-1] = '\0'; //remove end \n
 else
-c_time = "Fri Dec 28 12:06:21 2001";
+c_time = "Thu Jan 1 00:00:00 1970";
 } else
-c_time = "Fri Dec 28 12:06:21 2001";
+c_time = "Thu Jan 1 00:00:00 1970";
 // create our MIME boundaries here.
 snprintf(boundary, sizeof(boundary), "--boundary-LibPST-iamunique-%i_-_-", rand());
 snprintf(altboundary, sizeof(altboundary), "alt-%s", boundary);

Mercurial > libpst

comparison src/readpst.c @ 345:a8577226f7a9