libpst: src/readpst.c comparison

comparison src/readpst.c @ 141:fd4297884319

improve decoding of multipart/report and message/rfc822 mime types

author	Carl Byington <carl@five-ten-sg.com>
date	Sat, 14 Feb 2009 11:02:37 -0800
parents	1b3922080ca8
children	2189a6b8134e

comparison

equal deleted inserted replaced

-:fc6c54c453e5
+:fd4297884319
 char*     mk_separate_dir(char *dir);
 int       close_separate_dir();
 int       mk_separate_file(struct file_ll *f);
 char*     my_stristr(char *haystack, char *needle);
 void      check_filename(char *fname);
-void      write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst);
+void      write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst);
-void      write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char *boundary, pst_file* pst);
+void      write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, char** extra_mime_headers);
+void      write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst);
 void      header_has_field(char *header, char *field, int *flag);
+void      header_get_subfield(char *field, const char *subfield, char *body_subfield, size_t size_subfield);
 char*     header_get_field(char *header, char *field);
+char*     header_end_field(char *field);
 void      header_strip_field(char *header, char *field);
 int       test_base64(char *body);
 void      find_html_charset(char *html, char *charset, size_t charsetlen);
+void      find_rfc822_headers(char** extra_mime_headers);
 void      write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary);
-void      write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf);
+void      write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf, char** extra_mime_headers);
 void      write_vcard(FILE* f_output, pst_item_contact* contact, char comment[]);
 void      write_appointment(FILE* f_output, pst_item_appointment* appointment,
 pst_item_email* email, FILETIME* create_date, FILETIME* modify_date);
 void      create_enter_dir(struct file_ll* f, pst_item *item);
 void      close_enter_dir(struct file_ll *f);
 // Output Quiet is provided so that only errors are printed
 #define OUTPUT_QUIET 1
 // default mime-type for attachments that have a null mime-type
 #define MIME_TYPE_DEFAULT "application/octet-stream"
+#define RFC822            "message/rfc822"
 // output mode for contacts
 #define CMODE_VCARD 0
 #define CMODE_LIST  1
 write_vcard(ff.output, item->contact, item->comment);
 else
 fprintf(ff.output, "%s <%s>\n", item->contact->fullname, item->contact->address1);
 } else if (item->email && (item->type == PST_TYPE_NOTE || item->type == PST_TYPE_REPORT || item->type == PST_TYPE_OTHER)) {
+char *extra_mime_headers = NULL;
 if (mode == MODE_SEPARATE) mk_separate_file(&ff);
 ff.email_count++;
 DEBUG_MAIN(("main: Processing Email\n"));
 if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_REPORT) && (ff.type != PST_TYPE_OTHER)) {
 DEBUG_MAIN(("main: I have an email, but the folder isn't an email folder. Processing anyway\n"));
 }
-write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body);
+write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers);
 } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) {
 // deal with journal items
 if (mode == MODE_SEPARATE) mk_separate_file(&ff);
 ff.email_count++;
 }
 DEBUG_RET();
 }
-void write_separate_attachment(char f_name[], pst_item_attach* current_attach, int attach_num, pst_file* pst)
+void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst)
 {
 FILE *fp = NULL;
 int x = 0;
 char *temp = NULL;
 // If there is a long filename (filename2) use that, otherwise
 // use the 8.3 filename (filename1)
-char *attach_filename = (current_attach->filename2) ? current_attach->filename2
+char *attach_filename = (attach->filename2) ? attach->filename2
-: current_attach->filename1;
+: attach->filename1;
 DEBUG_ENT("write_separate_attachment");
 check_filename(f_name);
 if (!attach_filename) {
 // generate our own (dummy) filename for the attachement
 }
 DEBUG_EMAIL(("Saving attachment to %s\n", temp));
 if (!(fp = fopen(temp, "w"))) {
 WARN(("write_separate_attachment: Cannot open attachment save file \"%s\"\n", temp));
 } else {
-if (current_attach->data)
+if (attach->data)
-pst_fwrite(current_attach->data, 1, current_attach->size, fp);
+pst_fwrite(attach->data, 1, attach->size, fp);
 else {
-(void)pst_attach_to_file(pst, current_attach, fp);
+(void)pst_attach_to_file(pst, attach, fp);
 }
 fclose(fp);
 }
 if (temp) free(temp);
 DEBUG_RET();
 }
-void write_inline_attachment(FILE* f_output, pst_item_attach* current_attach, char *boundary, pst_file* pst)
+void write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, char** extra_mime_headers)
+{
+pst_index_ll *ptr;
+DEBUG_ENT("write_embedded_message");
+fprintf(f_output, "\n--%s\n", boundary);
+fprintf(f_output, "Content-Type: %s\n\n", attach->mimetype);
+ptr = pst_getID(pf, attach->id_val);
+pst_num_array *list = pst_parse_block(pf, ptr->id, NULL, NULL);
+if (list) {
+pst_item *item = (pst_item*) xmalloc(sizeof(pst_item));
+memset(item, 0, sizeof(pst_item));
+if (!pst_process(list, item, NULL)) {
+write_normal_email(f_output, "", item, MODE_NORMAL, 0, pf, 0, extra_mime_headers);
+}
+pst_freeItem(item);
+pst_free_list(list);
+}
+DEBUG_RET();
+}
+void write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst)
 {
 char *attach_filename;
 char *enc = NULL; // base64 encoded attachment
 DEBUG_ENT("write_inline_attachment");
-DEBUG_EMAIL(("Attachment Size is %i\n", current_attach->size));
+DEBUG_EMAIL(("Attachment Size is %i\n", attach->size));
-DEBUG_EMAIL(("Attachment Pointer is %p\n", current_attach->data));
+DEBUG_EMAIL(("Attachment Pointer is %p\n", attach->data));
-if (current_attach->data) {
+if (attach->data) {
-enc = base64_encode (current_attach->data, current_attach->size);
+enc = base64_encode (attach->data, attach->size);
 if (!enc) {
 DEBUG_EMAIL(("ERROR base64_encode returned NULL. Must have failed\n"));
 DEBUG_RET();
 return;
 }
 }
 fprintf(f_output, "\n--%s\n", boundary);
-if (!current_attach->mimetype) {
+if (!attach->mimetype) {
 fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT);
 } else {
-fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype);
+fprintf(f_output, "Content-Type: %s\n", attach->mimetype);
 }
 fprintf(f_output, "Content-Transfer-Encoding: base64\n");
 // If there is a long filename (filename2) use that, otherwise
 // use the 8.3 filename (filename1)
-if (current_attach->filename2) {
+attach_filename = (attach->filename2) ? attach->filename2 : attach->filename1;
-attach_filename = current_attach->filename2;
-} else {
-attach_filename = current_attach->filename1;
-}
 if (!attach_filename) {
 fprintf(f_output, "Content-Disposition: inline\n\n");
 } else {
 fprintf(f_output, "Content-Disposition: attachment; filename=\"%s\"\n\n", attach_filename);
 }
-if (current_attach->data) {
+if (attach->data) {
 pst_fwrite(enc, 1, strlen(enc), f_output);
 DEBUG_EMAIL(("Attachment Size after encoding is %i\n", strlen(enc)));
 free(enc);  // caught by valgrind
 } else {
-(void)pst_attach_to_file_base64(pst, current_attach, f_output);
+(void)pst_attach_to_file_base64(pst, attach, f_output);
 }
 fprintf(f_output, "\n\n");
 DEBUG_RET();
 }
 void header_has_field(char *header, char *field, int *flag)
 {
+DEBUG_ENT("header_has_field");
 if (my_stristr(header, field) || (strncasecmp(header, field+1, strlen(field)-1) == 0)) {
 DEBUG_EMAIL(("header block has %s header\n", field+1));
 *flag = 1;
 }
-}
+DEBUG_RET();
+}
+void header_get_subfield(char *field, const char *subfield, char *body_subfield, size_t size_subfield)
+{
+if (!field) return;
+DEBUG_ENT("header_get_subfield");
+char search[60];
+snprintf(search, sizeof(search), " %s=", subfield);
+field++;
+char *n = header_end_field(field);
+char *s = my_stristr(field, search);
+if (n && s && (s < n)) {
+char *e, *f, save;
+s += strlen(search);    // skip over subfield=
+if (*s == '"') {
+s++;
+e = strchr(s, '"');
+}
+else {
+e = strchr(s, ';');
+f = strchr(s, '\n');
+if (e && f && (f < e)) e = f;
+}
+if (!e || (e > n)) e = n;   // use the trailing lf as terminator if nothing better
+save = *e;
+*e = '\0';
+snprintf(body_subfield, size_subfield, "%s", s);  // copy the subfield to our buffer
+*e = save;
+DEBUG_EMAIL(("body %s %s from headers\n", subfield, body_subfield));
+}
+DEBUG_RET();
+}
 char* header_get_field(char *header, char *field)
 {
 char *t = my_stristr(header, field);
 if (!t && (strncasecmp(header, field+1, strlen(field)-1) == 0)) t = header;
 return t;
 }
+// return pointer to \n at the end of this header field,
+// or NULL if this field goes to the end of the string.
+char *header_end_field(char *field)
+{
+char *e = strchr(field+1, '\n');
+while (e && ((e[1] == ' ') || (e[1] == '\t'))) {
+e = strchr(e+1, '\n');
+}
+return e;
+}
 void header_strip_field(char *header, char *field)
 {
 char *e;
 char *t = header_get_field(header, field);
 if (t) {
-e = strchr(t+1, '\n');
+char *e = header_end_field(t);
-while (e && ((e[1] == ' ') || (e[1] == '\t'))) {
-e = strchr(e+1, '\n');
-}
 if (e) {
 if (t == header) e++;   // if *t is not \n, we don't want to keep the \n at *e either.
 while (*e != '\0') {
 *t = *e;
 t++;
 int  test_base64(char *body)
 {
 int b64 = 0;
 uint8_t *b = (uint8_t *)body;
+DEBUG_ENT("test_base64");
 while (*b != 0) {
 if ((*b < 32) && (*b != 9) && (*b != 10)) {
 DEBUG_EMAIL(("found base64 byte %d\n", (int)*b));
 DEBUG_HEXDUMPC(body, strlen(body), 0x10);
 b64 = 1;
 break;
 }
 b++;
 }
+DEBUG_RET();
 return b64;
 }
 void find_html_charset(char *html, char *charset, size_t charsetlen)
 {
 const int  index = 1;
 const int nmatch = index+1;
 regmatch_t match[nmatch];
+DEBUG_ENT("find_html_charset");
 int rc = regexec(&meta_charset_pattern, html, nmatch, match, 0);
 if (rc == 0) {
 int s = match[index].rm_so;
 int e = match[index].rm_eo;
 if (s != -1) {
 }
 }
 else {
 DEBUG_EMAIL(("regexec returns %d\n", rc));
 }
+DEBUG_RET();
+}
+void find_rfc822_headers(char** extra_mime_headers)
+{
+DEBUG_ENT("find_rfc822_headers");
+char *headers = *extra_mime_headers;
+if (headers) {
+char *temp, *t;
+while (temp = strstr(headers, "\n\n")) {
+temp[1] = '\0';
+t = header_get_field(headers, "\nContent-Type: ");
+if (t) {
+t++;
+DEBUG_EMAIL(("found content type header\n"));
+char *n = strchr(t, '\n');
+char *s = strstr(t, ": ");
+char *e = strchr(t, ';');
+if (!e || (e > n)) e = n;
+if (s && (s < e)) {
+s += 2;
+if (!strncasecmp(s, RFC822, e-s)) {
+headers = temp+2;   // found rfc822 header
+DEBUG_EMAIL(("found 822 headers\n%s\n", headers));
+break;
+}
+}
+}
+DEBUG_EMAIL(("skipping to next block after\n%s\n", headers));
+headers = temp+2;   // skip to next chunk of headers
+}
+*extra_mime_headers = headers;
+}
+DEBUG_RET();
 }
 void write_body_part(FILE* f_output, char *body, char *mime, char *charset, char *boundary)
 {
 char *needfree = NULL;
+DEBUG_ENT("write_body_part");
 if (strcasecmp("utf-8", charset)) {
 // try to convert to the specified charset since it is not utf-8
 size_t rc;
 DEBUG_EMAIL(("Convert %s utf-8 to %s\n", mime, charset));
 vbuf *newer = vballoc(2);
 }
 else {
 write_email_body(f_output, body);
 }
 if (needfree) free(needfree);
-}
+DEBUG_RET();
+}
-void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf)
+void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, int mode_MH, pst_file* pst, int save_rtf, char** extra_mime_headers)
 {
 char boundary[60];
 char body_charset[60];
+char body_report[60];
 char sender[60];
 int  sender_known = 0;
 char *temp = NULL;
 int attach_num;
 time_t em_time;
 char *c_time;
-int has_from, has_subject, has_to, has_cc, has_date;
+char *headers = (item->email->header) ? item->email->header : *extra_mime_headers;
-has_from = has_subject = has_to = has_cc = has_date = 0;
+int has_from, has_subject, has_to, has_cc, has_date, has_msgid;
+has_from = has_subject = has_to = has_cc = has_date = has_msgid = 0;
 DEBUG_ENT("write_normal_email");
-// setup default body character set
+// setup default body character set and report type
 snprintf(body_charset, sizeof(body_charset), "%s", (item->email->body_charset) ? item->email->body_charset : "utf-8");
+body_report[0] = '\0';
 // setup default sender
 if (item->email->sender_address && strchr(item->email->sender_address, '@')) {
 temp = item->email->sender_address;
 sender_known = 1;
 // create our MIME boundary here.
 snprintf(boundary, sizeof(boundary), "--boundary-LibPST-iamunique-%i_-_-", rand());
 // we will always look at the headers to discover some stuff
-if (item->email->header ) {
+if (headers ) {
 char *t;
-removeCR(item->email->header);
+removeCR(headers);
-// some of the headers we get from the file are not properly defined.
+temp = strstr(headers, "\n\n");
-// they can contain some email stuff too. We will cut off the header
-// when we see a \n\n
-temp = strstr(item->email->header, "\n\n");
 if (temp) {
-temp[1] = '\0'; // stop after first \n
+// cut off our real rfc822 headers here
-DEBUG_EMAIL(("Found body text in header %s\n", temp+2));
+temp[1] = '\0';
+// pointer to all the embedded MIME headers.
+// we use these to find the actual rfc822 headers for embedded message/rfc822 mime parts
+*extra_mime_headers = temp+2;
+DEBUG_EMAIL(("Found extra mime headers\n%s\n", temp+2));
 }
 // Check if the headers have all the necessary fields
-header_has_field(item->email->header, "\nFrom: ",    &has_from);
+header_has_field(headers, "\nFrom: ",        &has_from);
-header_has_field(item->email->header, "\nTo: ",      &has_to);
+header_has_field(headers, "\nTo: ",          &has_to);
-header_has_field(item->email->header, "\nSubject: ", &has_subject);
+header_has_field(headers, "\nSubject: ",     &has_subject);
-header_has_field(item->email->header, "\nDate: ",    &has_date);
+header_has_field(headers, "\nDate: ",        &has_date);
-header_has_field(item->email->header, "\nCC: ",      &has_cc);
+header_has_field(headers, "\nCC: ",          &has_cc);
+header_has_field(headers, "\nMessage-Id: ",  &has_msgid);
-// look for charset in Content-Type header
-t = header_get_field(item->email->header, "\nContent-Type: ");
+// look for charset and report-type in Content-Type header
-if (t) {
+t = header_get_field(headers, "\nContent-Type: ");
-// assume charset= will be on the first line, rather than on a continuation line
+header_get_subfield(t, "charset", body_charset, sizeof(body_charset));
-t++;
+header_get_subfield(t, "report-type", body_report, sizeof(body_report));
-char *n = strchr(t, '\n');
-char *s = my_stristr(t, "; charset=");
-if (n && s && (s < n)) {
-char *e;
-char save;
-s += 10;    // skip over charset=
-if (*s == '"') {
-s++;
-e = strchr(s, '"');
-}
-else {
-e = strchr(s, ';');
-}
-if (!e || (e > n)) e = n;   // use the trailing lf as terminator if nothing better
-save = *e;
-*e = '\0';
-snprintf(body_charset, sizeof(body_charset), "%s", s);  // copy the charset to our buffer
-*e = save;
-DEBUG_EMAIL(("body charset %s from headers\n", body_charset));
-}
-}
 // derive a proper sender email address
 if (!sender_known) {
-t = header_get_field(item->email->header, "\nFrom: ");
+t = header_get_field(headers, "\nFrom: ");
 if (t) {
 // assume address is on the first line, rather than on a continuation line
 t++;
 char *n = strchr(t, '\n');
 char *s = strchr(t, '<');
 }
 }
 }
 // Strip out the mime headers and some others that we don't want to emit
-header_strip_field(item->email->header, "\nMicrosoft Mail Internet Headers");
+header_strip_field(headers, "\nMicrosoft Mail Internet Headers");
-header_strip_field(item->email->header, "\nMIME-Version: ");
+header_strip_field(headers, "\nMIME-Version: ");
-header_strip_field(item->email->header, "\nContent-Type: ");
+header_strip_field(headers, "\nContent-Type: ");
-header_strip_field(item->email->header, "\nContent-Transfer-Encoding: ");
+header_strip_field(headers, "\nContent-Transfer-Encoding: ");
-header_strip_field(item->email->header, "\nContent-class: ");
+header_strip_field(headers, "\nContent-class: ");
-header_strip_field(item->email->header, "\nX-MimeOLE: ");
+header_strip_field(headers, "\nX-MimeOLE: ");
-header_strip_field(item->email->header, "\nBcc:");
+header_strip_field(headers, "\nBcc:");
-header_strip_field(item->email->header, "\nX-From_: ");
+header_strip_field(headers, "\nX-From_: ");
 }
 DEBUG_EMAIL(("About to print Header\n"));
 if (item && item->email && item->email->subject && item->email->subject->subj) {
 DEBUG_EMAIL(("item->email->subject->subj = %s\n", item->email->subject->subj));
 }
 if (mode != MODE_SEPARATE) {
 // most modes need this separator line
-fprintf(f_output, "From \"%s\" %s\n", sender, c_time);
+fprintf(f_output, "From %s %s\n", sender, c_time);
 }
-if (item->email->header) {
+// print the supplied email headers
+if (headers) {
 int len;
-fprintf(f_output, "%s", item->email->header);
+fprintf(f_output, "%s", headers);
 // make sure the headers end with a \n
-len = strlen(item->email->header);
+len = strlen(headers);
-if (!len || (item->email->header[len-1] != '\n')) fprintf(f_output, "\n");
+if (!len || (headers[len-1] != '\n')) fprintf(f_output, "\n");
 }
 // create required header fields that are not already written
 if (!has_from) {
 fprintf(f_output, "From: \"%s\" <%s>\n", item->email->outlook_sender_name, sender);
 }
 if (!has_subject) {
 char c_time[C_TIME_SIZE];
 strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time));
 fprintf(f_output, "Date: %s\n", c_time);
 }
+if (!has_msgid && item->email->messageid) {
+fprintf(f_output, "Message-Id: %s\n", item->email->messageid);
+}
 // add forensic headers to capture some .pst stuff that is not really
 // needed or used by mail clients
-if (item->email->sender_address && !strchr(item->email->sender_address, '@')) {
+if (item->email->sender_address && !strchr(item->email->sender_address, '@')
+&& strcmp(item->email->sender_address, ".")) {
 fprintf(f_output, "X-libpst-forensic-sender: %s\n", item->email->sender_address);
 }
 if (item->email->bcc_address) {
 fprintf(f_output, "X-libpst-forensic-bcc: %s\n", item->email->bcc_address);
 }
 // add our own mime headers
 fprintf(f_output, "MIME-Version: 1.0\n");
-if (item->attach || (item->email->rtf_compressed && save_rtf)
+if (body_report[0] != '\0') {
-|| item->email->encrypted_body
+// multipart/report for DSN/MDN reports
-|| item->email->encrypted_htmlbody) {
+fprintf(f_output, "Content-Type: multipart/report; report-type=%s;\n\tboundary=\"%s\"\n", body_report, boundary);
+}
+else if (item->attach || (item->email->rtf_compressed && save_rtf)
+|| item->email->encrypted_body
+|| item->email->encrypted_htmlbody) {
 // use multipart/mixed if we have attachments
 fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary);
 } else {
 // else use multipart/alternative
 fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary);
 {
 pst_item_attach* attach;
 attach_num = 0;
 for (attach = item->attach; attach; attach = attach->next) {
 DEBUG_EMAIL(("Attempting Attachment encoding\n"));
-if (!attach->data) {
+if (!attach->data && attach->mimetype && !strcmp(attach->mimetype, RFC822)) {
-DEBUG_EMAIL(("Data of attachment is NULL!. Size is supposed to be %i\n", attach->size));
+DEBUG_EMAIL(("seem to have special embedded message attachment\n"));
+find_rfc822_headers(extra_mime_headers);
+write_embedded_message(f_output, attach, boundary, pst, extra_mime_headers);
 }
-if (mode == MODE_SEPARATE && !mode_MH)
+else if (mode == MODE_SEPARATE && !mode_MH)
 write_separate_attachment(f_name, attach, ++attach_num, pst);
 else
 write_inline_attachment(f_output, attach, boundary, pst);
 }
 }

Mercurial > libpst

comparison src/readpst.c @ 141:fd4297884319