Mercurial > libpst
view src/msg.cpp @ 355:d1f930be4711
From Jeffrey Morlan:
pst_build_id_ptr and pst_build_desc_ptr require that the first child
of a BTree page have the same starting ID as itself. This is not
required by the spec, and is not true in many real-world PSTs
(presumably, the original first child of the page got
deleted). Because of this, many emails are not being extracted from
these PSTs. It also triggers an infinite loop in lspst (a separate
bug, also fixed)
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Wed, 06 Jul 2016 10:12:22 -0700 |
parents | c5e7f13d2836 |
children | 1c458f13d1d3 |
line wrap: on
line source
extern "C" { #include "define.h" #include "msg.h" #include <gsf/gsf-utils.h> #include <gsf/gsf-input-stdio.h> #include <gsf/gsf-infile.h> #include <gsf/gsf-infile-stdio.h> #include <gsf/gsf-output-stdio.h> #include <gsf/gsf-outfile.h> #include <gsf/gsf-outfile-msole.h> } #include <list> #include <vector> #include <string> using namespace std; struct property { uint32_t tag; uint32_t flags; uint32_t length; // or value uint32_t reserved; }; typedef list<property> property_list; /** Convert str to an 8 bit charset if it is utf8, null strings are preserved. * * @param str reference to the mapi string of interest * @param charset pointer to the 8 bit charset to use */ static void convert_8bit(pst_string &str, const char *charset); static void convert_8bit(pst_string &str, const char *charset) { if (!str.str) return; // null if (!str.is_utf8) return; // not utf8 DEBUG_ENT("convert_8bit"); pst_vbuf *newer = pst_vballoc(2); size_t strsize = strlen(str.str); size_t rc = pst_vb_utf8to8bit(newer, str.str, strsize, charset); if (rc == (size_t)-1) { // unable to convert, change the charset to utf8 free(newer->b); DEBUG_INFO(("Failed to convert utf-8 to %s\n", charset)); DEBUG_HEXDUMPC(str.str, strsize, 0x10); } else { // null terminate the output string pst_vbgrow(newer, 1); newer->b[newer->dlen] = '\0'; free(str.str); str.str = newer->b; } free(newer); DEBUG_RET(); } static void empty_property(GsfOutfile *out, uint32_t tag); static void empty_property(GsfOutfile *out, uint32_t tag) { vector<char> n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char *contents, size_t size); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char *contents, size_t size) { if (!contents) return; size_t term = ((tag & 0x0000ffff) == 0x001e) ? 1 : ((tag & 0x0000ffff) == 0x001f) ? 2 : 0; // null terminator vector<char> n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); gsf_output_write(dst, size, (const guint8*)contents); if (term) { memset(&n[0], 0, term); gsf_output_write(dst, term, (const guint8*)&n[0]); size += term; } gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); property p; p.tag = tag; p.flags = 0x6; // make all the properties writable p.length = size; p.reserved = 0; prop.push_back(p); } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, FILE *fp); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, FILE *fp) { vector<char> n(50); snprintf(&n[0], n.size(), "__substg1.0_%08X", tag); GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false); size_t size = 0; const size_t bsize = 10000; char buf[bsize]; while (1) { size_t s = fread(buf, 1, bsize, fp); if (!s) break; gsf_output_write(dst, s, (const guint8*)buf); } gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); property p; p.tag = tag; p.flags = 0x6; // make all the properties writable p.length = size; p.reserved = 0; prop.push_back(p); } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents) { if (contents.str) { convert_8bit(contents, charset); string_property(out, prop, tag, contents.str, strlen(contents.str)); } } static void strin0_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents); static void strin0_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents) { if (contents.str) { convert_8bit(contents, charset); string_property(out, prop, tag, contents.str, strlen(contents.str)+1); } } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const string &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const string &contents) { string_property(out, prop, tag, contents.c_str(), contents.size()); } static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, pst_binary &contents); static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, pst_binary &contents) { if (contents.size) string_property(out, prop, tag, contents.data, contents.size); } static void write_properties(GsfOutfile *out, property_list &prop, const guint8* header, size_t hlen); static void write_properties(GsfOutfile *out, property_list &prop, const guint8* header, size_t hlen) { GsfOutput* dst = gsf_outfile_new_child(out, "__properties_version1.0", false); gsf_output_write(dst, hlen, header); for (property_list::iterator i=prop.begin(); i!=prop.end(); i++) { property &p = *i; gsf_output_write(dst, sizeof(property), (const guint8*)&p); } gsf_output_close(dst); g_object_unref(G_OBJECT(dst)); } static void int_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value); static void int_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value) { property p; p.tag = tag; p.flags = flags; p.length = value; p.reserved = 0; prop_list.push_back(p); } static void i64_property(property_list &prop_list, uint32_t tag, uint32_t flags, FILETIME *value); static void i64_property(property_list &prop_list, uint32_t tag, uint32_t flags, FILETIME *value) { if (value) { property p; p.tag = tag; p.flags = flags; p.length = value->dwLowDateTime; p.reserved = value->dwHighDateTime; prop_list.push_back(p); } } static void nzi_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value); static void nzi_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value) { if (value) int_property(prop_list, tag, flags, value); } void write_msg_email(char *fname, pst_item* item, pst_file* pst) { // this is not an email item if (!item->email) return; DEBUG_ENT("write_msg_email"); pst_item_email &email = *(item->email); char charset[30]; const char* body_charset = pst_default_charset(item, sizeof(charset), charset); DEBUG_INFO(("%s body charset seems to be %s\n", fname, body_charset)); body_charset = "iso-8859-1//TRANSLIT//IGNORE"; gsf_init(); GsfOutfile *outfile; GsfOutput *output; GError *err = NULL; output = gsf_output_stdio_new(fname, &err); if (output == NULL) { gsf_shutdown(); DEBUG_INFO(("unable to open output .msg file %s\n", fname)); DEBUG_RET(); return; } struct top_property_header { uint32_t reserved1; uint32_t reserved2; uint32_t next_recipient; // same as recipient count uint32_t next_attachment; // same as attachment count uint32_t recipient_count; uint32_t attachment_count; uint32_t reserved3; uint32_t reserved4; }; top_property_header top_head; memset(&top_head, 0, sizeof(top_head)); outfile = gsf_outfile_msole_new(output); g_object_unref(G_OBJECT(output)); output = GSF_OUTPUT(outfile); property_list prop_list; int_property(prop_list, 0x00170003, 0x6, email.importance); nzi_property(prop_list, 0x0023000B, 0x6, email.delivery_report); nzi_property(prop_list, 0x00260003, 0x6, email.priority); nzi_property(prop_list, 0x0029000B, 0x6, email.read_receipt); nzi_property(prop_list, 0x002E0003, 0x6, email.original_sensitivity); nzi_property(prop_list, 0x00360003, 0x6, email.sensitivity); nzi_property(prop_list, 0x0C17000B, 0x6, email.reply_requested); nzi_property(prop_list, 0x0E01000B, 0x6, email.delete_after_submit); int_property(prop_list, 0x0E070003, 0x6, item->flags); i64_property(prop_list, 0x00390040, 0x6, email.sent_date); GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x001A001E, item->ascii_type); string_property(out, prop_list, 0x0037001E, body_charset, item->subject); strin0_property(out, prop_list, 0x003B0102, body_charset, email.outlook_sender); string_property(out, prop_list, 0x003D001E, string("")); string_property(out, prop_list, 0x0040001E, body_charset, email.outlook_received_name1); string_property(out, prop_list, 0x0042001E, body_charset, email.outlook_sender_name); string_property(out, prop_list, 0x0044001E, body_charset, email.outlook_recipient_name); string_property(out, prop_list, 0x0050001E, body_charset, email.reply_to); strin0_property(out, prop_list, 0x00510102, body_charset, email.outlook_recipient); strin0_property(out, prop_list, 0x00520102, body_charset, email.outlook_recipient2); string_property(out, prop_list, 0x0064001E, body_charset, email.sender_access); string_property(out, prop_list, 0x0065001E, body_charset, email.sender_address); string_property(out, prop_list, 0x0070001E, body_charset, email.processed_subject); string_property(out, prop_list, 0x00710102, email.conversation_index); string_property(out, prop_list, 0x0072001E, body_charset, email.original_bcc); string_property(out, prop_list, 0x0073001E, body_charset, email.original_cc); string_property(out, prop_list, 0x0074001E, body_charset, email.original_to); string_property(out, prop_list, 0x0075001E, body_charset, email.recip_access); string_property(out, prop_list, 0x0076001E, body_charset, email.recip_address); string_property(out, prop_list, 0x0077001E, body_charset, email.recip2_access); string_property(out, prop_list, 0x0078001E, body_charset, email.recip2_address); string_property(out, prop_list, 0x007D001E, body_charset, email.header); string_property(out, prop_list, 0x0C1A001E, body_charset, email.outlook_sender_name2); strin0_property(out, prop_list, 0x0C1D0102, body_charset, email.outlook_sender2); string_property(out, prop_list, 0x0C1E001E, body_charset, email.sender2_access); string_property(out, prop_list, 0x0C1F001E, body_charset, email.sender2_address); string_property(out, prop_list, 0x0E02001E, body_charset, email.bcc_address); string_property(out, prop_list, 0x0E03001E, body_charset, email.cc_address); string_property(out, prop_list, 0x0E04001E, body_charset, email.sentto_address); string_property(out, prop_list, 0x0E1D001E, body_charset, email.outlook_normalized_subject); string_property(out, prop_list, 0x1000001E, body_charset, item->body); string_property(out, prop_list, 0x1013001E, body_charset, email.htmlbody); string_property(out, prop_list, 0x1035001E, body_charset, email.messageid); string_property(out, prop_list, 0x1042001E, body_charset, email.in_reply_to); string_property(out, prop_list, 0x1046001E, body_charset, email.return_path_address); // any property over 0x8000 needs entries in the __nameid to make them // either string named or numerical named properties. { vector<char> n(50); { snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { int v = 1; // to property_list prop_list; int_property(prop_list, 0x0C150003, 0x6, v); // PidTagRecipientType int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x3001001E, body_charset, item->file_as); if (item->contact) { string_property(out, prop_list, 0x3002001E, body_charset, item->contact->address1_transport); string_property(out, prop_list, 0x3003001E, body_charset, item->contact->address1); string_property(out, prop_list, 0x5ff6001E, body_charset, item->contact->address1); } strin0_property(out, prop_list, 0x300B0102, body_charset, email.outlook_search_key); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_recipient++; top_head.recipient_count++; } } if (email.cc_address.str) { snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { int v = 2; // cc property_list prop_list; int_property(prop_list, 0x0C150003, 0x6, v); // PidTagRecipientType int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x3001001E, body_charset, email.cc_address); string_property(out, prop_list, 0x3003001E, body_charset, email.cc_address); string_property(out, prop_list, 0x5ff6001E, body_charset, email.cc_address); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_recipient++; top_head.recipient_count++; } } if (email.bcc_address.str) { snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { int v = 3; // bcc property_list prop_list; int_property(prop_list, 0x0C150003, 0x6, v); // PidTagRecipientType int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x3001001E, body_charset, email.bcc_address); string_property(out, prop_list, 0x3003001E, body_charset, email.bcc_address); string_property(out, prop_list, 0x5ff6001E, body_charset, email.bcc_address); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_recipient++; top_head.recipient_count++; } } } pst_item_attach *a = item->attach; while (a) { if (a->method == PST_ATTACH_EMBEDDED) { // not implemented yet } else if (a->data.data || a->i_id) { vector<char> n(50); snprintf(&n[0], n.size(), "__attach_version1.0_#%08X", top_head.attachment_count); GsfOutput *output = gsf_outfile_new_child(out, &n[0], true); { FILE *fp = fopen("temp_file_attachment", "w+b"); if (fp) { pst_attach_to_file(pst, a, fp); // data is now in the file fseek(fp, 0, SEEK_SET); property_list prop_list; int_property(prop_list, 0x0E210003, 0x2, top_head.attachment_count); // MAPI_ATTACH_NUM int_property(prop_list, 0x0FF40003, 0x2, 2); // PR_ACCESS read int_property(prop_list, 0x0FF70003, 0x2, 0); // PR_ACCESS_LEVEL read only int_property(prop_list, 0x0FFE0003, 0x2, 7); // PR_OBJECT_TYPE attachment int_property(prop_list, 0x37050003, 0x7, 1); // PR_ATTACH_METHOD by value int_property(prop_list, 0x370B0003, 0x7, a->position); // PR_RENDERING_POSITION int_property(prop_list, 0x37100003, 0x6, a->sequence); // PR_ATTACH_MIME_SEQUENCE GsfOutfile *out = GSF_OUTFILE (output); string_property(out, prop_list, 0x0FF90102, item->record_key); string_property(out, prop_list, 0x37010102, fp); if (a->filename2.str) { // have long file name string_property(out, prop_list, 0x3707001E, body_charset, a->filename2); } else if (a->filename1.str) { // have short file name string_property(out, prop_list, 0x3704001E, body_charset, a->filename1); } else { // make up a name const char *n = "inline"; string_property(out, prop_list, 0x3704001E, n, strlen(n)); } string_property(out, prop_list, 0x370E001E, body_charset, a->mimetype); write_properties(out, prop_list, (const guint8*)&top_head, 8); // convenient 8 bytes of reserved zeros gsf_output_close(output); g_object_unref(G_OBJECT(output)); top_head.next_attachment++; top_head.attachment_count++; fclose(fp); } } } a = a->next; } write_properties(out, prop_list, (const guint8*)&top_head, sizeof(top_head)); { GsfOutput *output = gsf_outfile_new_child(out, "__nameid_version1.0", true); { GsfOutfile *out = GSF_OUTFILE (output); empty_property(out, 0x00020102); empty_property(out, 0x00030102); empty_property(out, 0x00040102); gsf_output_close(output); g_object_unref(G_OBJECT(output)); } } gsf_output_close(output); g_object_unref(G_OBJECT(output)); gsf_shutdown(); DEBUG_RET(); }