changeset 316:c4537664ff50

merge .msg generation code
author Carl Byington <carl@five-ten-sg.com>
date Mon, 24 Dec 2012 17:51:12 -0800
parents db6db9a26a19 (current diff) 3e6cdd543857 (diff)
children af6ae20be5ba
files ChangeLog NEWS configure.in libpst.spec.in regression/regression-tests.bash src/Makefile.am src/libpst.c src/libpst.h src/readpst.c xml/libpst.in
diffstat 12 files changed, 501 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Mon Dec 24 16:26:39 2012 -0800
+++ b/ChangeLog	Mon Dec 24 17:51:12 2012 -0800
@@ -54,6 +54,14 @@
     * prefer libpthread over librt for finding sem_init function.
     * rebuild for fedora 13 change in implicit dso linking semantics.
 
+LibPST 0.6.48 (2010-05-24)
+===============================
+    * fix to/cc/bcc recipients in .msg file output format
+
+LibPST 0.6.46 (2009-12-11)
+===============================
+    * add readpst -m switch to produce Outlook .msg files
+
 LibPST 0.6.45 (2009-11-18)
 ===============================
     * patch from Hugo DesRosiers to export categories and notes into vcards.
--- a/NEWS	Mon Dec 24 16:26:39 2012 -0800
+++ b/NEWS	Mon Dec 24 17:51:12 2012 -0800
@@ -8,6 +8,8 @@
 0.6.48  2010-09-02 fix for broken internet headers from Outlook, change to mboxrd quoting
 0.6.47  2010-05-07 patches from Kenneth Berland for solaris
 0.6.46  2010-02-13 fixes for fedora 13 change in implicit dso linking semantics
+0.6.48  2010-05-24 fix to/cc/bcc recipients in .msg file output format
+0.6.46  2009-12-11 add readpst -m switch to produce Outlook .msg files
 0.6.45  2009-11-18 patch from Hugo DesRosiers to export categories and notes into vcards
 0.6.44  2009-09-20 patch from Lee Ayres to add file name extensions in separate mode
 0.6.43  2009-09-12 patches from Justin Greer, Chris White, Roberto Polli; better rfc822 embedded message decoding
--- a/configure.in	Mon Dec 24 16:26:39 2012 -0800
+++ b/configure.in	Mon Dec 24 17:51:12 2012 -0800
@@ -355,6 +355,10 @@
 AC_MSG_RESULT([$enable_profiling])
 AM_CONDITIONAL(GPROF_PROFILING, [test "$enable_profiling" = "yes"])
 
+gsf_flags="`pkg-config libgsf-1 --cflags`"
+gsf_libs="`pkg-config libgsf-1 --libs`"
+AC_SUBST(GSF_FLAGS, [$gsf_flags])
+AC_SUBST(GSF_LIBS, [$gsf_libs])
 
 AC_OUTPUT(                  \
     Makefile                \
--- a/libpst.spec.in	Mon Dec 24 16:26:39 2012 -0800
+++ b/libpst.spec.in	Mon Dec 24 17:51:12 2012 -0800
@@ -7,9 +7,9 @@
 Source:             http://www.five-ten-sg.com/%{name}/packages/%{name}-%{version}.tar.gz
 BuildRoot:          %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 URL:                http://www.five-ten-sg.com/%{name}/
-Requires:           ImageMagick
+Requires:           ImageMagick libgsf
 Requires:           %{name}-libs = %{version}-%{release}
-BuildRequires:      ImageMagick freetype-devel gd-devel libjpeg-devel zlib-devel python-devel boost-devel
+BuildRequires:      ImageMagick freetype-devel gd-devel libjpeg-devel zlib-devel python-devel boost-devel libgsf-devel
 
 %{!?python_sitelib:  %global python_sitelib  %(%{__python} -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")}
 %{!?python_sitearch: %global python_sitearch %(%{__python} -c "from distutils.sysconfig import get_python_lib; print get_python_lib(1)")}
--- a/src/Makefile.am	Mon Dec 24 16:26:39 2012 -0800
+++ b/src/Makefile.am	Mon Dec 24 17:51:12 2012 -0800
@@ -45,7 +45,7 @@
     bin_PROGRAMS   += pst2dii
 endif
 lspst_SOURCES       = lspst.c          $(common_header)
-readpst_SOURCES     = readpst.c        $(common_header)
+readpst_SOURCES     = readpst.c        $(common_header) msg.cpp msg.h
 pst2ldif_SOURCES    = pst2ldif.cpp     $(common_header)
 pst2dii_SOURCES     = pst2dii.cpp      $(common_header)
 deltasearch_SOURCES = deltasearch.cpp  $(common_header)
@@ -53,6 +53,7 @@
 getidblock_SOURCES  = getidblock.c     $(common_header)
 nick2ldif_SOURCES   = nick2ldif.cpp    $(common_header)
 
+readpst_CPPFLAGS    = $(AM_CPPFLAGS) $(GSF_FLAGS)
 
 lspst_DEPENDENCIES        = libpst.la
 readpst_DEPENDENCIES      = libpst.la
@@ -91,7 +92,7 @@
 
 # the library search path.
 lspst_LDADD       = $(all_libraries) $(PSTLIB) $(LTLIBICONV)
-readpst_LDADD     = $(all_libraries) $(PSTLIB) $(LTLIBICONV) $(REGEXLIB)
+readpst_LDADD     = $(all_libraries) $(PSTLIB) $(LTLIBICONV) $(REGEXLIB) $(GSF_LIBS)
 pst2ldif_LDADD    = $(all_libraries) $(PSTLIB) $(LTLIBICONV)
 pst2dii_LDADD     = $(all_libraries) $(PSTLIB) $(LTLIBICONV) -lgd
 deltasearch_LDADD = $(all_libraries) $(PSTLIB) $(LTLIBICONV)
--- a/src/libpst.c	Mon Dec 24 16:26:39 2012 -0800
+++ b/src/libpst.c	Mon Dec 24 17:51:12 2012 -0800
@@ -2300,7 +2300,7 @@
                     DEBUG_INFO(("Recipient Structure 1 -- NOT PROCESSED\n"));
                     break;
                 case 0x0040: // PR_RECEIVED_BY_NAME Name of Recipient Structure
-                    DEBUG_INFO(("Received By Name 1 -- NOT PROCESSED\n"));
+                    LIST_COPY_EMAIL_STR("Received By Name 1", item->email->outlook_received_name1);
                     break;
                 case 0x0041: // PR_SENT_REPRESENTING_ENTRYID Structure containing Sender
                     DEBUG_INFO(("Sent on behalf of Structure 1 -- NOT PROCESSED\n"));
@@ -2393,7 +2393,7 @@
                     DEBUG_INFO(("Sender Structure 2 -- NOT PROCESSED\n"));
                     break;
                 case 0x0C1A: // PR_SENDER_NAME Name of Sender Structure 2
-                    DEBUG_INFO(("Name of Sender Structure 2 -- NOT PROCESSED\n"));
+                    LIST_COPY_EMAIL_STR("Name of Sender Structure 2", item->email->outlook_sender_name2);
                     break;
                 case 0x0C1B: // PR_SUPPLEMENTARY_INFO
                     LIST_COPY_EMAIL_STR("Supplementary info", item->email->supplementary_info);
@@ -2435,6 +2435,9 @@
                     // folder that this message is sent to after submission
                     LIST_COPY_EMAIL_ENTRYID("Sentmail EntryID", item->email->sentmail_folder);
                     break;
+                case 0x0E1D: // PR_NORMALIZED_SUBJECT
+                    LIST_COPY_EMAIL_STR("Normalized subject", item->email->outlook_normalized_subject);
+                    break;
                 case 0x0E1F: // PR_RTF_IN_SYNC
                     // True means that the rtf version is same as text body
                     // False means rtf version is more up-to-date than text body
@@ -2514,7 +2517,7 @@
                     LIST_COPY_TIME("Date 5 (Modify Date)", item->modify_date);
                     break;
                 case 0x300B: // PR_SEARCH_KEY Record Header 2
-                    DEBUG_INFO(("Record Search 2 -- NOT PROCESSED\n"));
+                    LIST_COPY_EMAIL_STR("Record Search 2", item->email->outlook_search_key);
                     break;
                 case 0x35DF: // PR_VALID_FOLDER_MASK
                     LIST_COPY_STORE_INT32("Valid Folder Mask", item->message_store->valid_mask);
--- a/src/libpst.h	Mon Dec 24 16:26:39 2012 -0800
+++ b/src/libpst.h	Mon Dec 24 17:51:12 2012 -0800
@@ -319,6 +319,16 @@
     pst_string  supplementary_info;
     /** mapi element 0x0c20 PR_NDR_STATUS_CODE */
     int32_t     ndr_status_code;
+
+    // elements added for .msg processing
+    /** mapi element 0x0040 PR_RECEIVED_BY_NAME */
+    pst_string  outlook_received_name1;
+    /** mapi element 0x0c1a PR_SENDER_NAME */
+    pst_string  outlook_sender_name2;
+    /** mapi element 0x0e1d PR_NORMALIZED_SUBJECT */
+    pst_string  outlook_normalized_subject;
+    /** mapi element 0x300b PR_SEARCH_KEY */
+    pst_string  outlook_search_key;
 } pst_item_email;
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/msg.cpp	Mon Dec 24 17:51:12 2012 -0800
@@ -0,0 +1,419 @@
+extern "C" {
+    #include "define.h"
+    #include "msg.h"
+    #include <gsf/gsf-utils.h>
+
+    #include <gsf/gsf-input-stdio.h>
+    #include <gsf/gsf-infile.h>
+    #include <gsf/gsf-infile-stdio.h>
+
+    #include <gsf/gsf-output-stdio.h>
+    #include <gsf/gsf-outfile.h>
+    #include <gsf/gsf-outfile-msole.h>
+}
+
+#include <list>
+#include <vector>
+#include <string>
+
+using namespace std;
+
+struct property {
+    uint32_t  tag;
+    uint32_t  flags;
+    uint32_t  length; // or value
+    uint32_t  reserved;
+};
+typedef list<property> property_list;
+
+
+/** Convert str to an 8 bit charset if it is utf8, null strings are preserved.
+ *
+ *  @param str     reference to the mapi string of interest
+ *  @param charset pointer to the 8 bit charset to use
+ */
+static void convert_8bit(pst_string &str, const char *charset);
+static void convert_8bit(pst_string &str, const char *charset) {
+    if (!str.str)     return;  // null
+    if (!str.is_utf8) return;  // not utf8
+
+    DEBUG_ENT("convert_8bit");
+    pst_vbuf *newer = pst_vballoc(2);
+    size_t strsize = strlen(str.str);
+    size_t rc = pst_vb_utf8to8bit(newer, str.str, strsize, charset);
+    if (rc == (size_t)-1) {
+        // unable to convert, change the charset to utf8
+        free(newer->b);
+        DEBUG_INFO(("Failed to convert utf-8 to %s\n", charset));
+        DEBUG_HEXDUMPC(str.str, strsize, 0x10);
+    }
+    else {
+        // null terminate the output string
+        pst_vbgrow(newer, 1);
+        newer->b[newer->dlen] = '\0';
+        free(str.str);
+        str.str = newer->b;
+    }
+    free(newer);
+    DEBUG_RET();
+}
+
+
+static void empty_property(GsfOutfile *out, uint32_t tag);
+static void empty_property(GsfOutfile *out, uint32_t tag) {
+    vector<char> n(50);
+    snprintf(&n[0], n.size(), "__substg1.0_%08X", tag);
+    GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false);
+    gsf_output_close(dst);
+    g_object_unref(G_OBJECT(dst));
+}
+
+
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char *contents, size_t size);
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char *contents, size_t size) {
+    if (!contents) return;
+    size_t term = ((tag & 0x0000ffff) == 0x001e) ? 1 :
+                  ((tag & 0x0000ffff) == 0x001f) ? 2 : 0;  // null terminator
+    vector<char> n(50);
+    snprintf(&n[0], n.size(), "__substg1.0_%08X", tag);
+    GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false);
+    gsf_output_write(dst, size, (const guint8*)contents);
+    if (term) {
+        memset(&n[0], 0, term);
+        gsf_output_write(dst, term, (const guint8*)&n[0]);
+        size += term;
+    }
+    gsf_output_close(dst);
+    g_object_unref(G_OBJECT(dst));
+
+    property p;
+    p.tag      = tag;
+    p.flags    = 0x6;   // make all the properties writable
+    p.length   = size;
+    p.reserved = 0;
+    prop.push_back(p);
+}
+
+
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, FILE *fp);
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, FILE *fp) {
+    vector<char> n(50);
+    snprintf(&n[0], n.size(), "__substg1.0_%08X", tag);
+    GsfOutput* dst = gsf_outfile_new_child(out, &n[0], false);
+
+    size_t size = 0;
+    const size_t bsize = 10000;
+    char buf[bsize];
+
+    while (1) {
+        size_t s = fread(buf, 1, bsize, fp);
+        if (!s) break;
+        gsf_output_write(dst, s, (const guint8*)buf);
+    }
+
+    gsf_output_close(dst);
+    g_object_unref(G_OBJECT(dst));
+
+    property p;
+    p.tag      = tag;
+    p.flags    = 0x6;   // make all the properties writable
+    p.length   = size;
+    p.reserved = 0;
+    prop.push_back(p);
+}
+
+
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents);
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents) {
+    if (contents.str) {
+        convert_8bit(contents, charset);
+        string_property(out, prop, tag, contents.str, strlen(contents.str));
+    }
+}
+
+
+static void strin0_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents);
+static void strin0_property(GsfOutfile *out, property_list &prop, uint32_t tag, const char* charset, pst_string &contents) {
+    if (contents.str) {
+        convert_8bit(contents, charset);
+        string_property(out, prop, tag, contents.str, strlen(contents.str)+1);
+    }
+}
+
+
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const string &contents);
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, const string &contents) {
+    string_property(out, prop, tag, contents.c_str(), contents.size());
+}
+
+
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, pst_binary &contents);
+static void string_property(GsfOutfile *out, property_list &prop, uint32_t tag, pst_binary &contents) {
+    if (contents.size) string_property(out, prop, tag, contents.data, contents.size);
+}
+
+
+static void write_properties(GsfOutfile *out, property_list &prop, const guint8* header, size_t hlen);
+static void write_properties(GsfOutfile *out, property_list &prop, const guint8* header, size_t hlen) {
+    GsfOutput* dst = gsf_outfile_new_child(out, "__properties_version1.0", false);
+    gsf_output_write(dst, hlen, header);
+    for (property_list::iterator i=prop.begin(); i!=prop.end(); i++) {
+        property &p = *i;
+        gsf_output_write(dst, sizeof(property), (const guint8*)&p);
+    }
+    gsf_output_close(dst);
+    g_object_unref(G_OBJECT(dst));
+}
+
+
+static void int_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value);
+static void int_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value) {
+    property p;
+    p.tag      = tag;
+    p.flags    = flags;
+    p.length   = value;
+    p.reserved = 0;
+    prop_list.push_back(p);
+}
+
+
+static void i64_property(property_list &prop_list, uint32_t tag, uint32_t flags, FILETIME *value);
+static void i64_property(property_list &prop_list, uint32_t tag, uint32_t flags, FILETIME *value) {
+    if (value) {
+        property p;
+        p.tag      = tag;
+        p.flags    = flags;
+        p.length   = value->dwLowDateTime;
+        p.reserved = value->dwHighDateTime;
+        prop_list.push_back(p);
+    }
+}
+
+
+static void nzi_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value);
+static void nzi_property(property_list &prop_list, uint32_t tag, uint32_t flags, uint32_t value) {
+    if (value) int_property(prop_list, tag, flags, value);
+}
+
+
+void write_msg_email(char *fname, pst_item* item, pst_file* pst) {
+    // this is not an email item
+    if (!item->email) return;
+    DEBUG_ENT("write_msg_email");
+
+    pst_item_email &email = *(item->email);
+
+    char charset[30];
+    const char* body_charset = pst_default_charset(item, sizeof(charset), charset);
+    DEBUG_INFO(("%s body charset seems to be %s\n", fname, body_charset));
+    body_charset = "iso-8859-1//TRANSLIT//IGNORE";
+
+    gsf_init();
+
+    GsfOutfile *outfile;
+    GsfOutput  *output;
+    GError    *err = NULL;
+
+    output = gsf_output_stdio_new(fname, &err);
+    if (output == NULL) {
+        gsf_shutdown();
+        DEBUG_INFO(("unable to open output .msg file %s\n", fname));
+        DEBUG_RET();
+        return;
+    }
+
+    struct top_property_header {
+        uint32_t  reserved1;
+        uint32_t  reserved2;
+        uint32_t  next_recipient;   // same as recipient count
+        uint32_t  next_attachment;  // same as attachment count
+        uint32_t  recipient_count;
+        uint32_t  attachment_count;
+        uint32_t  reserved3;
+        uint32_t  reserved4;
+    };
+
+    top_property_header top_head;
+    memset(&top_head, 0, sizeof(top_head));
+
+    outfile = gsf_outfile_msole_new(output);
+    g_object_unref(G_OBJECT(output));
+
+    output = GSF_OUTPUT(outfile);
+    property_list prop_list;
+
+    int_property(prop_list, 0x00170003, 0x6, email.importance);
+    nzi_property(prop_list, 0x0023000B, 0x6, email.delivery_report);
+    nzi_property(prop_list, 0x00260003, 0x6, email.priority);
+    nzi_property(prop_list, 0x0029000B, 0x6, email.read_receipt);
+    nzi_property(prop_list, 0x002E0003, 0x6, email.original_sensitivity);
+    nzi_property(prop_list, 0x00360003, 0x6, email.sensitivity);
+    nzi_property(prop_list, 0x0C17000B, 0x6, email.reply_requested);
+    nzi_property(prop_list, 0x0E01000B, 0x6, email.delete_after_submit);
+    int_property(prop_list, 0x0E070003, 0x6, item->flags);
+    i64_property(prop_list, 0x00390040, 0x6, email.sent_date);
+    GsfOutfile *out = GSF_OUTFILE (output);
+    string_property(out, prop_list, 0x001A001E, item->ascii_type);
+    string_property(out, prop_list, 0x0037001E, body_charset, item->subject);
+    strin0_property(out, prop_list, 0x003B0102, body_charset, email.outlook_sender);
+    string_property(out, prop_list, 0x003D001E, string(""));
+    string_property(out, prop_list, 0x0040001E, body_charset, email.outlook_received_name1);
+    string_property(out, prop_list, 0x0042001E, body_charset, email.outlook_sender_name);
+    string_property(out, prop_list, 0x0044001E, body_charset, email.outlook_recipient_name);
+    string_property(out, prop_list, 0x0050001E, body_charset, email.reply_to);
+    strin0_property(out, prop_list, 0x00510102, body_charset, email.outlook_recipient);
+    strin0_property(out, prop_list, 0x00520102, body_charset, email.outlook_recipient2);
+    string_property(out, prop_list, 0x0064001E, body_charset, email.sender_access);
+    string_property(out, prop_list, 0x0065001E, body_charset, email.sender_address);
+    string_property(out, prop_list, 0x0070001E, body_charset, email.processed_subject);
+    string_property(out, prop_list, 0x00710102,               email.conversation_index);
+    string_property(out, prop_list, 0x0072001E, body_charset, email.original_bcc);
+    string_property(out, prop_list, 0x0073001E, body_charset, email.original_cc);
+    string_property(out, prop_list, 0x0074001E, body_charset, email.original_to);
+    string_property(out, prop_list, 0x0075001E, body_charset, email.recip_access);
+    string_property(out, prop_list, 0x0076001E, body_charset, email.recip_address);
+    string_property(out, prop_list, 0x0077001E, body_charset, email.recip2_access);
+    string_property(out, prop_list, 0x0078001E, body_charset, email.recip2_address);
+    string_property(out, prop_list, 0x007D001E, body_charset, email.header);
+    string_property(out, prop_list, 0x0C1A001E, body_charset, email.outlook_sender_name2);
+    strin0_property(out, prop_list, 0x0C1D0102, body_charset, email.outlook_sender2);
+    string_property(out, prop_list, 0x0C1E001E, body_charset, email.sender2_access);
+    string_property(out, prop_list, 0x0C1F001E, body_charset, email.sender2_address);
+    string_property(out, prop_list, 0x0E02001E, body_charset, email.bcc_address);
+    string_property(out, prop_list, 0x0E03001E, body_charset, email.cc_address);
+    string_property(out, prop_list, 0x0E04001E, body_charset, email.sentto_address);
+    string_property(out, prop_list, 0x0E1D001E, body_charset, email.outlook_normalized_subject);
+    string_property(out, prop_list, 0x1000001E, body_charset, item->body);
+    string_property(out, prop_list, 0x1013001E, body_charset, email.htmlbody);
+    string_property(out, prop_list, 0x1035001E, body_charset, email.messageid);
+    string_property(out, prop_list, 0x1042001E, body_charset, email.in_reply_to);
+    string_property(out, prop_list, 0x1046001E, body_charset, email.return_path_address);
+    // any property over 0x8000 needs entries in the __nameid to make them
+    // either string named or numerical named properties.
+
+    {
+        vector<char> n(50);
+        {
+            snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count);
+            GsfOutput  *output = gsf_outfile_new_child(out, &n[0], true);
+            {
+                int v = 1;  // to
+                property_list prop_list;
+                int_property(prop_list, 0x0C150003, 0x6, v);                        // PidTagRecipientType
+                int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID
+                GsfOutfile *out = GSF_OUTFILE (output);
+                string_property(out, prop_list, 0x3001001E, body_charset, item->file_as);
+                if (item->contact) {
+                    string_property(out, prop_list, 0x3002001E, body_charset, item->contact->address1_transport);
+                    string_property(out, prop_list, 0x3003001E, body_charset, item->contact->address1);
+                    string_property(out, prop_list, 0x5ff6001E, body_charset, item->contact->address1);
+                }
+                strin0_property(out, prop_list, 0x300B0102, body_charset, email.outlook_search_key);
+                write_properties(out, prop_list, (const guint8*)&top_head, 8);  // convenient 8 bytes of reserved zeros
+                gsf_output_close(output);
+                g_object_unref(G_OBJECT(output));
+                top_head.next_recipient++;
+                top_head.recipient_count++;
+            }
+        }
+        if (email.cc_address.str) {
+            snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count);
+            GsfOutput  *output = gsf_outfile_new_child(out, &n[0], true);
+            {
+                int v = 2;  // cc
+                property_list prop_list;
+                int_property(prop_list, 0x0C150003, 0x6, v);                        // PidTagRecipientType
+                int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID
+                GsfOutfile *out = GSF_OUTFILE (output);
+                string_property(out, prop_list, 0x3001001E, body_charset, email.cc_address);
+                string_property(out, prop_list, 0x3003001E, body_charset, email.cc_address);
+                string_property(out, prop_list, 0x5ff6001E, body_charset, email.cc_address);
+                write_properties(out, prop_list, (const guint8*)&top_head, 8);  // convenient 8 bytes of reserved zeros
+                gsf_output_close(output);
+                g_object_unref(G_OBJECT(output));
+                top_head.next_recipient++;
+                top_head.recipient_count++;
+            }
+        }
+        if (email.bcc_address.str) {
+            snprintf(&n[0], n.size(), "__recip_version1.0_#%08X", top_head.recipient_count);
+            GsfOutput  *output = gsf_outfile_new_child(out, &n[0], true);
+            {
+                int v = 3;  // bcc
+                property_list prop_list;
+                int_property(prop_list, 0x0C150003, 0x6, v);                        // PidTagRecipientType
+                int_property(prop_list, 0x30000003, 0x6, top_head.recipient_count); // PR_ROWID
+                GsfOutfile *out = GSF_OUTFILE (output);
+                string_property(out, prop_list, 0x3001001E, body_charset, email.bcc_address);
+                string_property(out, prop_list, 0x3003001E, body_charset, email.bcc_address);
+                string_property(out, prop_list, 0x5ff6001E, body_charset, email.bcc_address);
+                write_properties(out, prop_list, (const guint8*)&top_head, 8);  // convenient 8 bytes of reserved zeros
+                gsf_output_close(output);
+                g_object_unref(G_OBJECT(output));
+                top_head.next_recipient++;
+                top_head.recipient_count++;
+            }
+        }
+    }
+
+    pst_item_attach *a = item->attach;
+    while (a) {
+        if (a->method == PST_ATTACH_EMBEDDED) {
+            // not implemented yet
+        }
+        else if (a->data.data || a->i_id) {
+            vector<char> n(50);
+            snprintf(&n[0], n.size(), "__attach_version1.0_#%08X", top_head.attachment_count);
+            GsfOutput  *output = gsf_outfile_new_child(out, &n[0], true);
+            {
+                FILE *fp = fopen("temp_file_attachment", "w+b");
+                if (fp) {
+                    pst_attach_to_file(pst, a, fp); // data is now in the file
+                    fseek(fp, 0, SEEK_SET);
+                    property_list prop_list;
+                    int_property(prop_list, 0x0E210003, 0x2, top_head.attachment_count);    // MAPI_ATTACH_NUM
+                    int_property(prop_list, 0x0FF40003, 0x2, 2);            // PR_ACCESS read
+                    int_property(prop_list, 0x0FF70003, 0x2, 0);            // PR_ACCESS_LEVEL read only
+                    int_property(prop_list, 0x0FFE0003, 0x2, 7);            // PR_OBJECT_TYPE attachment
+                    int_property(prop_list, 0x37050003, 0x7, 1);            // PR_ATTACH_METHOD by value
+                    int_property(prop_list, 0x370B0003, 0x7, a->position);  // PR_RENDERING_POSITION
+                    int_property(prop_list, 0x37100003, 0x6, a->sequence);  // PR_ATTACH_MIME_SEQUENCE
+                    GsfOutfile *out = GSF_OUTFILE (output);
+                    string_property(out, prop_list, 0x0FF90102, item->record_key);
+                    string_property(out, prop_list, 0x37010102, fp);
+                    string_property(out, prop_list, 0x3704001E, body_charset, a->filename1);
+                    string_property(out, prop_list, 0x3707001E, body_charset, a->filename2);
+                    string_property(out, prop_list, 0x370E001E, body_charset, a->mimetype);
+                    write_properties(out, prop_list, (const guint8*)&top_head, 8);  // convenient 8 bytes of reserved zeros
+                    gsf_output_close(output);
+                    g_object_unref(G_OBJECT(output));
+                    top_head.next_attachment++;
+                    top_head.attachment_count++;
+                    fclose(fp);
+                }
+            }
+        }
+        a = a->next;
+    }
+
+    write_properties(out, prop_list, (const guint8*)&top_head, sizeof(top_head));
+
+    {
+        GsfOutput  *output = gsf_outfile_new_child(out, "__nameid_version1.0", true);
+        {
+            GsfOutfile *out = GSF_OUTFILE (output);
+            empty_property(out, 0x00020102);
+            empty_property(out, 0x00030102);
+            empty_property(out, 0x00040102);
+            gsf_output_close(output);
+            g_object_unref(G_OBJECT(output));
+        }
+    }
+
+    gsf_output_close(output);
+    g_object_unref(G_OBJECT(output));
+
+    gsf_shutdown();
+    DEBUG_RET();
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/msg.h	Mon Dec 24 17:51:12 2012 -0800
@@ -0,0 +1,2 @@
+
+void write_msg_email(char *fname, pst_item* item, pst_file* pst);
--- a/src/readpst.c	Mon Dec 24 16:26:39 2012 -0800
+++ b/src/readpst.c	Mon Dec 24 17:51:12 2012 -0800
@@ -7,6 +7,7 @@
 
 #include "define.h"
 #include "lzfu.h"
+#include "msg.h"
 
 #define OUTPUT_TEMPLATE "%s"
 #define OUTPUT_KMAIL_DIR_TEMPLATE ".%s.directory"
@@ -39,7 +40,7 @@
 int       close_recurse_dir();
 char*     mk_separate_dir(char *dir);
 int       close_separate_dir();
-void      mk_separate_file(struct file_ll *f, char *extension);
+void      mk_separate_file(struct file_ll *f, char *extension, int openit);
 void      close_separate_file(struct file_ll *f);
 char*     my_stristr(char *haystack, char *needle);
 void      check_filename(char *fname);
@@ -122,6 +123,7 @@
 int         mode         = MODE_NORMAL;
 int         mode_MH      = 0;   // a submode of MODE_SEPARATE
 int         mode_EX      = 0;   // a submode of MODE_SEPARATE
+int         mode_MSG     = 0;   // a submode of MODE_SEPARATE
 int         mode_thunder = 0;   // a submode of MODE_RECURSE
 int         output_mode  = OUTPUT_NORMAL;
 int         contact_mode = CMODE_VCARD;
@@ -306,7 +308,7 @@
                 }
                 else {
                     ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".vcf" : "");
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".vcf" : "", 1);
                     if (contact_mode == CMODE_VCARD) {
                         pst_convert_utf8_null(item, &item->comment);
                         write_vcard(ff.output, item, item->contact, item->comment.str);
@@ -342,9 +344,13 @@
                         if (child == 0) {
                             // we are the child process, or the original parent if no children were available
                             pid_t me = getpid();
-                            mk_separate_file(&ff, (mode_EX) ? ".eml" : "");
+                            mk_separate_file(&ff, (mode_EX) ? ".eml" : "", 1);
                             write_normal_email(ff.output, ff.name, item, mode, mode_MH, &pstfile, save_rtf_body, &extra_mime_headers);
                             close_separate_file(&ff);
+                            if (mode_MSG) {
+                                mk_separate_file(&ff, ".msg", 0);
+                                write_msg_email(ff.name, item, &pstfile);
+                            }
 #ifdef HAVE_FORK
 #ifdef HAVE_SEMAPHORE_H
                             if (me != parent) {
@@ -380,7 +386,7 @@
                 }
                 else {
                     ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "");
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "", 1);
                     write_journal(ff.output, item);
                     fprintf(ff.output, "\n");
                     if (mode == MODE_SEPARATE) close_separate_file(&ff);
@@ -401,7 +407,7 @@
                 }
                 else {
                     ff.item_count++;
-                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "");
+                    if (mode == MODE_SEPARATE) mk_separate_file(&ff, (mode_EX) ? ".ics" : "", 1);
                     write_schedule_part_data(ff.output, item, NULL, NULL);
                     fprintf(ff.output, "\n");
                     if (mode == MODE_SEPARATE) close_separate_file(&ff);
@@ -444,7 +450,7 @@
     }
 
     // command-line option handling
-    while ((c = getopt(argc, argv, "bC:c:Dd:ehj:kMo:qrSt:uVw"))!= -1) {
+    while ((c = getopt(argc, argv, "bC:c:Dd:emhj:kMo:qrSt:uVw"))!= -1) {
         switch (c) {
         case 'b':
             save_rtf_body = 0;
@@ -491,13 +497,22 @@
             break;
         case 'M':
             mode = MODE_SEPARATE;
-            mode_MH = 1;
-            mode_EX = 0;
+            mode_MH  = 1;
+            mode_EX  = 0;
+            mode_MSG = 0;
             break;
         case 'e':
             mode = MODE_SEPARATE;
-            mode_MH = 1;
-            mode_EX = 1;
+            mode_MH  = 1;
+            mode_EX  = 1;
+            mode_MSG = 0;
+            file_name_len = 14;
+            break;
+        case 'm':
+            mode = MODE_SEPARATE;
+            mode_MH  = 1;
+            mode_EX  = 1;
+            mode_MSG = 1;
             file_name_len = 14;
             break;
         case 'o':
@@ -512,8 +527,9 @@
             break;
         case 'S':
             mode = MODE_SEPARATE;
-            mode_MH = 0;
-            mode_EX = 0;
+            mode_MH  = 0;
+            mode_EX  = 0;
+            mode_MSG = 0;
             break;
         case 't':
             // email, appointment, contact, other
@@ -719,6 +735,7 @@
     printf("\t-h\t- Help. This screen\n");
     printf("\t-j <integer>\t- Number of parallel jobs to run\n");
     printf("\t-k\t- KMail. Output in kmail format\n");
+    printf("\t-m\t- As with -e, but write .msg files also\n");
     printf("\t-o <dirname>\t- Output directory to write files to. CWD is changed *after* opening pst file\n");
     printf("\t-q\t- Quiet. Only print error messages\n");
     printf("\t-r\t- Recursive. Output in a recursive format\n");
@@ -726,7 +743,7 @@
     printf("\t-u\t- Thunderbird mode. Write two extra .size and .type files\n");
     printf("\t-w\t- Overwrite any output mbox files\n");
     printf("\n");
-    printf("Only one of -k -M -r -S should be specified\n");
+    printf("Only one of -M -S -e -k -m -r should be specified\n");
     DEBUG_RET();
 }
 
@@ -924,7 +941,7 @@
 }
 
 
-void mk_separate_file(struct file_ll *f, char *extension) {
+void mk_separate_file(struct file_ll *f, char *extension, int openit) {
     DEBUG_ENT("mk_separate_file");
     DEBUG_INFO(("opening next file to save email\n"));
     if (f->item_count > 999999999) { // bigger than nine 9's
@@ -932,8 +949,10 @@
     }
     sprintf(f->name, SEP_MAIL_FILE_TEMPLATE, f->item_count, extension);
     check_filename(f->name);
-    if (!(f->output = fopen(f->name, "w"))) {
-        DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name));
+    if (openit) {
+        if (!(f->output = fopen(f->name, "w"))) {
+            DIE(("mk_separate_file: Cannot open file to save email \"%s\"\n", f->name));
+        }
     }
     DEBUG_RET();
 }
--- a/src/vbuf.c	Mon Dec 24 16:26:39 2012 -0800
+++ b/src/vbuf.c	Mon Dec 24 17:51:12 2012 -0800
@@ -71,6 +71,7 @@
     char *outbuf        = NULL;
     int   myerrno;
 
+    DEBUG_ENT("sbcs_conversion");
     pst_vbresize(dest, 2*iblen);
 
     do {
@@ -85,9 +86,11 @@
     if (icresult == (size_t)-1) {
         DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno)));
         pst_unicode_init();
+        DEBUG_RET();
         return (size_t)-1;
     }
-    return (icresult) ? (size_t)-1 : 0;
+    DEBUG_RET();
+    return 0;
 }
 
 
--- a/xml/libpst.in	Mon Dec 24 16:26:39 2012 -0800
+++ b/xml/libpst.in	Mon Dec 24 17:51:12 2012 -0800
@@ -174,6 +174,12 @@
                     </para></listitem>
                 </varlistentry>
                 <varlistentry>
+                    <term>-m</term>
+                    <listitem><para>
+                        Same as the e option, but write .msg files also
+                    </para></listitem>
+                </varlistentry>
+                <varlistentry>
                     <term>-o <replaceable class="parameter">output-directory</replaceable></term>
                     <listitem><para>
                         Specifies the output directory. The directory must already exist, and