# HG changeset patch # User Carl Byington # Date 1203806177 28800 # Node ID cfd6175f93340768ee3aaaa3fe3cfaf46c29722a # Parent ee4e0d00bc94c25e287bdb2a9bb663991e9ea509 Start work on pst2dii to convert to Summation dii load file format. diff -r ee4e0d00bc94 -r cfd6175f9334 ChangeLog --- a/ChangeLog Sat Feb 16 12:43:42 2008 -0800 +++ b/ChangeLog Sat Feb 23 14:36:17 2008 -0800 @@ -1,3 +1,7 @@ +LibPST 0.6.8 (2008-xx-xx) +=============================== + * Start work on pst2dii to convert to Summation dii load file format. + LibPST 0.6.7 (2008-02-16) =============================== * Work around bogus 7c.b5 blocks in some messages that have been diff -r ee4e0d00bc94 -r cfd6175f9334 NEWS --- a/NEWS Sat Feb 16 12:43:42 2008 -0800 +++ b/NEWS Sat Feb 23 14:36:17 2008 -0800 @@ -1,3 +1,4 @@ +0.6.8 2008-xx-xx Start work on pst2dii to convert to Summation dii load file format. 0.6.7 2008-02-16 Ignore unknown attachments on some read messages; autoconf cleanup. 0.6.6 2008-01-31 Code cleanup, switch from cvs to mercurial source control. 0.6.5 2008-01-22 Code cleanup, rpm group Applications/Productivity. diff -r ee4e0d00bc94 -r cfd6175f9334 configure.in --- a/configure.in Sat Feb 16 12:43:42 2008 -0800 +++ b/configure.in Sat Feb 23 14:36:17 2008 -0800 @@ -1,11 +1,20 @@ AC_PREREQ(2.59) -AC_INIT(libpst,0.6.7,carl@five-ten-sg.com) +AC_INIT(libpst,0.6.8,carl@five-ten-sg.com) AC_CONFIG_SRCDIR([config.h.in]) AC_CONFIG_HEADER([config.h]) AM_INIT_AUTOMAKE($PACKAGE_NAME,$PACKAGE_VERSION) # Checks for programs. +my_build_dii=yes +AC_PATH_PROG(CONVERT, convert) +if test "x$CONVERT" = x ; then + AC_MSG_WARN([convert not found. pst2dii disabled]) + my_build_dii=no +fi +AM_CONDITIONAL(BUILD_DII, test $my_build_dii = yes) + +# Checks for programs. AC_PROG_CXX AC_PROG_CC AC_PROG_CPP @@ -23,7 +32,7 @@ ) AC_HEADER_DIRENT AC_HEADER_STDC -AC_CHECK_HEADERS([limits.h malloc.h netinet/in.h stdint.h stdlib.h string.h sys/param.h wchar.h]) +AC_CHECK_HEADERS([fcntl.h limits.h malloc.h netinet/in.h stdint.h stdlib.h string.h sys/param.h wchar.h]) # Checks for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL @@ -70,6 +79,7 @@ man/Makefile \ src/Makefile \ src/version.h \ + src/pst2dii.cpp \ xml/Makefile \ xml/libpst \ ) diff -r ee4e0d00bc94 -r cfd6175f9334 man/Makefile.am --- a/man/Makefile.am Sat Feb 16 12:43:42 2008 -0800 +++ b/man/Makefile.am Sat Feb 23 14:36:17 2008 -0800 @@ -1,2 +1,2 @@ -man_MANS = readpst.1 readpstlog.1 pst2ldif.1 lspst.1 outlook.pst.5 +man_MANS = readpst.1 readpstlog.1 pst2ldif.1 lspst.1 pst2dii.1 outlook.pst.5 EXTRA_DIST = $(man_MANS) diff -r ee4e0d00bc94 -r cfd6175f9334 regression/regression-tests.bash --- a/regression/regression-tests.bash Sat Feb 16 12:43:42 2008 -0800 +++ b/regression/regression-tests.bash Sat Feb 23 14:36:17 2008 -0800 @@ -1,12 +1,24 @@ #!/bin/bash val="valgrind --leak-check=full" +val='' for i in {1..10}; do rm -rf output$i mkdir output$i done +hash=$(md5sum ams.pst) +pre="$hash +bates-" +$val ../src/pst2dii -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "$pre" -o output1 -O mydii -d dumper ams.pst + ../src/readpstlog -f I dumper >ams.log +$val ../src/pst2dii -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output2 -O mydii2 -d dumper sample_64.pst + ../src/readpstlog -f I dumper >sample_64.log +$val ../src/pst2dii -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output3 -O mydii3 -d dumper test.pst + ../src/readpstlog -f I dumper >test.log + ../src/pst2dii -f /usr/share/fonts/bitstream-vera/VeraMono.ttf -B "bates-" -o output4 -O mydii4 -d dumper big_mail.pst + ../src/readpstlog -f I dumper >big_mail.log $val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' ams.pst >ams.err 2>&1 $val ../src/readpst -cv -o output1 ams.pst >out1.err 2>&1 @@ -18,7 +30,7 @@ ../src/readpstlog -f I dumper >mbmg.archive.log $val ../src/readpst -o output6 -d dumper test.pst >out6.err 2>&1 - ../src/readpstlog -f I dumper >dumpertest.log + ../src/readpstlog -f I dumper >test.log $val ../src/readpst -cv -o output7 -d dumper sample_64.pst >out7.err 2>&1 ../src/readpstlog -f I dumper >sample_64.log diff -r ee4e0d00bc94 -r cfd6175f9334 src/Makefile.am --- a/src/Makefile.am Sat Feb 16 12:43:42 2008 -0800 +++ b/src/Makefile.am Sat Feb 23 14:36:17 2008 -0800 @@ -1,11 +1,15 @@ hfiles = $(wildcard *.h) bin_PROGRAMS = lspst readpst readpstlog pst2ldif +if BUILD_DII + bin_PROGRAMS += pst2dii +endif noinst_PROGRAMS = deltasearch dumpblocks getidblock -lspst_SOURCES = $(hfiles) debug.c libpst.c libstrfunc.c lspst.c timeconv.c vbuf.c generic.c -readpst_SOURCES = $(hfiles) readpst.c libpst.c timeconv.c libstrfunc.c debug.c lzfu.c vbuf.c generic.c -readpstlog_SOURCES = $(hfiles) readpstlog.c debug.c +lspst_SOURCES = $(hfiles) lspst.c libpst.c timeconv.c libstrfunc.c debug.c vbuf.c generic.c +readpst_SOURCES = $(hfiles) readpst.c libpst.c timeconv.c libstrfunc.c debug.c lzfu.c vbuf.c generic.c +readpstlog_SOURCES = $(hfiles) readpstlog.c debug.c pst2ldif_SOURCES = $(hfiles) pst2ldif.cpp libpst.c timeconv.c libstrfunc.c debug.c lzfu.c vbuf.c generic.c +pst2dii_SOURCES = $(hfiles) pst2dii.cpp libpst.c timeconv.c libstrfunc.c debug.c lzfu.c vbuf.c generic.c deltasearch_SOURCES = deltasearch.cpp debug.c dumpblocks_SOURCES = dumpblocks.c debug.c libpst.c libstrfunc.c timeconv.c vbuf.c generic.c @@ -16,6 +20,7 @@ readpst_SOURCES += XGetopt.c readpstlog_SOURCES += XGetopt.c pst2ldif_SOURCES += XGetopt.c + pst2dii_SOURCES += XGetopt.c endif EXTRA_DIST = testdebug.c @@ -28,4 +33,5 @@ readpst_LDFLAGS = $(all_libraries) readpstlog_LDFLAGS = $(all_libraries) pst2ldif_LDFLAGS = $(all_libraries) +pst2dii_LDFLAGS = $(all_libraries) -lgd -lpng -lz -lfreetype -lm diff -r ee4e0d00bc94 -r cfd6175f9334 src/define.h --- a/src/define.h Sat Feb 16 12:43:42 2008 -0800 +++ b/src/define.h Sat Feb 23 14:36:17 2008 -0800 @@ -2,11 +2,11 @@ * define.h * Part of the LibPST project * Written by David Smith - * dave.s@earthcorp.com + * dave.s@earthcorp.com */ #ifdef HAVE_CONFIG_H - #include "config.h" + #include "config.h" #endif #include "version.h" @@ -28,13 +28,13 @@ //number of items to save in memory between writes #define DEBUG_MAX_ITEMS 0 -#define DEBUG_FILE_NO 1 -#define DEBUG_INDEX_NO 2 -#define DEBUG_EMAIL_NO 3 -#define DEBUG_WARN_NO 4 -#define DEBUG_READ_NO 5 -#define DEBUG_INFO_NO 6 -#define DEBUG_MAIN_NO 7 +#define DEBUG_FILE_NO 1 +#define DEBUG_INDEX_NO 2 +#define DEBUG_EMAIL_NO 3 +#define DEBUG_WARN_NO 4 +#define DEBUG_READ_NO 5 +#define DEBUG_INFO_NO 6 +#define DEBUG_MAIN_NO 7 #define DEBUG_DECRYPT_NO 8 #define DEBUG_FUNCENT_NO 9 #define DEBUG_FUNCRET_NO 10 @@ -96,7 +96,7 @@ void * xmalloc(size_t size); #define MESSAGEPRINT(x,y) {pst_debug_msg_info(__LINE__,__FILE__,y);\ - pst_debug_msg_text x;} + pst_debug_msg_text x;} #define LOGSTOP() {MESSAGESTOP();DEBUGSTOP();} @@ -131,7 +131,7 @@ #ifdef DEBUG_MODE_EMAIL #define DEBUG_EMAIL(x) MESSAGEPRINT(x, DEBUG_EMAIL_NO); #define DEBUG_EMAIL_HEXPRINT(x,y) {pst_debug_msg_info(__LINE__, __FILE__, 11);\ - pst_debug_hexdump(x, y, 0x10, 0);} + pst_debug_hexdump(x, y, 0x10, 0);} #else #define DEBUG_EMAIL(x) {} #define DEBUG_EMAIL_HEXPRINT(x,y) {} @@ -186,19 +186,19 @@ #endif #define DEBUG_FILE(x) {pst_debug_msg_info(__LINE__, __FILE__, DEBUG_FILE_NO);\ - pst_debug_msg_text x;} + pst_debug_msg_text x;} #ifdef DEBUG_MODE_FUNC -# define DEBUG_ENT(x) \ - { \ - pst_debug_func(x); \ - MESSAGEPRINT(("Entering function\n"),DEBUG_FUNCENT_NO); \ - } -# define DEBUG_RET() \ - { \ - MESSAGEPRINT(("Leaving function\n"),DEBUG_FUNCRET_NO); \ - pst_debug_func_ret(); \ - } +# define DEBUG_ENT(x) \ + { \ + pst_debug_func(x); \ + MESSAGEPRINT(("Entering function %s\n",x),DEBUG_FUNCENT_NO); \ + } +# define DEBUG_RET() \ + { \ + MESSAGEPRINT(("Leaving function\n"),DEBUG_FUNCRET_NO); \ + pst_debug_func_ret(); \ + } #else # define DEBUG_ENT(x) {} # define DEBUG_RET() {} @@ -209,28 +209,28 @@ #define DEBUG_REGISTER_CLOSE() {if(atexit(pst_debug_close)!=0) fprintf(stderr, "Error registering atexit function\n");} #define RET_DERROR(res, ret_val, x)\ - if (res) { DIE(x);} + if (res) { DIE(x);} #define RET_ERROR(res, ret_val)\ - if (res) {return ret_val;} + if (res) {return ret_val;} #define DEBUG_VERSION 1 struct pst_debug_file_rec_m { - unsigned short int funcname; - unsigned short int filename; - unsigned short int text; - unsigned short int end; - unsigned int line; - unsigned int type; + unsigned short int funcname; + unsigned short int filename; + unsigned short int text; + unsigned short int end; + unsigned int line; + unsigned int type; }; struct pst_debug_file_rec_l { - unsigned int funcname; - unsigned int filename; - unsigned int text; - unsigned int end; - unsigned int line; - unsigned int type; + unsigned int funcname; + unsigned int filename; + unsigned int text; + unsigned int end; + unsigned int line; + unsigned int type; }; #endif //DEFINEH_H diff -r ee4e0d00bc94 -r cfd6175f9334 src/libpst.c --- a/src/libpst.c Sat Feb 16 12:43:42 2008 -0800 +++ b/src/libpst.c Sat Feb 23 14:36:17 2008 -0800 @@ -2002,6 +2002,9 @@ break; case 0x0044: // PR_RCVD_REPRESENTING_NAME Name of Recipient Structure 2 DEBUG_EMAIL(("Received on behalf of Structure Name -- NOT HANDLED\n")); + MALLOC_EMAIL(item); + LIST_COPY(item->email->outlook_recipient_name, (char*)); + DEBUG_EMAIL(("%s\n", item->email->outlook_recipient_name)); break; case 0x004F: // PR_REPLY_RECIPIENT_ENTRIES Reply-To Structure DEBUG_EMAIL(("Reply-To Structure -- NOT HANDLED\n")); @@ -2088,12 +2091,30 @@ LIST_COPY(item->email->proc_subject, (char*)); DEBUG_EMAIL(("%s\n", item->email->proc_subject)); break; - case 0x0071: // PR_CONVERSATION_INDEX Date 2 + case 0x0071: // PR_CONVERSATION_INDEX DEBUG_EMAIL(("Conversation Index - ")); MALLOC_EMAIL(item); memcpy(&(item->email->conv_index), list->items[x]->data, sizeof(item->email->conv_index)); DEBUG_EMAIL(("%i\n", item->email->conv_index)); break; + case 0x0072: // PR_ORIGINAL_DISPLAY_BCC + DEBUG_EMAIL(("Original display bcc - ")); + MALLOC_EMAIL(item); + LIST_COPY(item->email->original_bcc, (char*)); + DEBUG_EMAIL(("%s\n", item->email->original_bcc)); + break; + case 0x0073: // PR_ORIGINAL_DISPLAY_CC + DEBUG_EMAIL(("Original display cc - ")); + MALLOC_EMAIL(item); + LIST_COPY(item->email->original_cc, (char*)); + DEBUG_EMAIL(("%s\n", item->email->original_cc)); + break; + case 0x0074: // PR_ORIGINAL_DISPLAY_TO + DEBUG_EMAIL(("Original display to - ")); + MALLOC_EMAIL(item); + LIST_COPY(item->email->original_to, (char*)); + DEBUG_EMAIL(("%s\n", item->email->original_to)); + break; case 0x0075: // PR_RECEIVED_BY_ADDRTYPE Recipient Access Method DEBUG_EMAIL(("Received by Address type - ")); MALLOC_EMAIL(item); @@ -3755,7 +3776,11 @@ SAFE_FREE(item->email->htmlbody); SAFE_FREE(item->email->in_reply_to); SAFE_FREE(item->email->messageid); + SAFE_FREE(item->email->original_bcc); + SAFE_FREE(item->email->original_cc); + SAFE_FREE(item->email->original_to); SAFE_FREE(item->email->outlook_recipient); + SAFE_FREE(item->email->outlook_recipient_name); SAFE_FREE(item->email->outlook_recipient2); SAFE_FREE(item->email->outlook_sender); SAFE_FREE(item->email->outlook_sender_name); diff -r ee4e0d00bc94 -r cfd6175f9334 src/libpst.h --- a/src/libpst.h Sat Feb 16 12:43:42 2008 -0800 +++ b/src/libpst.h Sat Feb 23 14:36:17 2008 -0800 @@ -240,7 +240,11 @@ int message_to_me; // 1 = true, 0 = false char *messageid; int32_t orig_sensitivity; + char *original_bcc; + char *original_cc; + char *original_to; char *outlook_recipient; + char *outlook_recipient_name; char *outlook_recipient2; char *outlook_sender; char *outlook_sender_name; @@ -259,7 +263,7 @@ int32_t rtf_body_crc; char *rtf_body_tag; char *rtf_compressed; - uint32_t rtf_compressed_size; + uint32_t rtf_compressed_size; int rtf_in_sync; // 1 = true, 0 = doesn't exist, -1 = false int32_t rtf_ws_prefix_count; int32_t rtf_ws_trailing_count; diff -r ee4e0d00bc94 -r cfd6175f9334 src/pst2dii.cpp.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/pst2dii.cpp.in Sat Feb 23 14:36:17 2008 -0800 @@ -0,0 +1,734 @@ +/* + +Copyright (c) 2008 Carl Byington - 510 Software Group, released under +the GPL version 2 or any later version at your choice available at +http://www.fsf.org/licenses/gpl.txt + +Based on readpst.c by David Smith + +*/ +#include "gd.h" +#include +#include +#include +#include +#include + +using namespace std; + +extern "C" { + #include "define.h" + #include "libstrfunc.h" + #include "libpst.h" + #include "common.h" + #include "timeconv.h" + #include "lzfu.h" +} + +struct file_ll { + string name; + int32_t stored_count; + int32_t email_count; + int32_t skip_count; + int32_t type; + file_ll() { + stored_count = 0; + email_count = 0; + skip_count = 0; + type - 0; + }; +}; + + +// global settings +char* convert = "@CONVERT@"; // fully qualified path of the convert program from image magick +char* prog_name = NULL; // our arg0 name +char* bates_prefix = ""; // string to prefix bates numbers +int bates_index = 0; // current bates sequence +char* output_directory = "."; +char* output_file = "load.dii"; +char* font_file = NULL; +int bates_color = 0xff0000; // color of bates header stamp +int email_sequence = 0; // current pdf sequence number +char pdf_name[PATH_MAX]; // current pdf file name +FILE* dii_file = NULL; // the output dii load file +pst_file pstfile; // the input pst file + +// pdf writer globals +bool pdf_open = false; // is pdf writer started +char* pst_folder; // current folder name +int page_sequence; // current page number +string conversion; // conversion command +vector png_names; + +// png writer globals +bool png_open = false; // is current page open +int line_height; // in pixels +int char_width; // in pixels +int col_number, col_max; // in characters +int line_number, line_max; // lines per page +int x_position, y_position; // in pixels +int black, red; // text colors +gdImagePtr image; // current gd image + +const int DPI = 300; +const double sz = 10.0; +const int margin = DPI/2; +const int LINE_SIZE = 2000; +const int PAGE_WIDTH = DPI*17/2; +const int PAGE_HEIGHT = DPI*11; + +// max size of the c_time char*. It will store the date of the email +#define C_TIME_SIZE 500 + +static void open_png(); +static void close_png(); + + +static int version(); +static int version() +{ + printf("pst2dii v%s\n", VERSION); +#if BYTE_ORDER == BIG_ENDIAN + printf("Big Endian implementation being used.\n"); +#elif BYTE_ORDER == LITTLE_ENDIAN + printf("Little Endian implementation being used.\n"); +#else +# error "Byte order not supported by this library" +#endif +#ifdef __GNUC__ + printf("GCC %d.%d : %s %s\n", __GNUC__, __GNUC_MINOR__, __DATE__, __TIME__); +#endif + return 0; +} + + +static int usage(); +static int usage() +{ + version(); + printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); + printf("OPTIONS:\n"); + printf("\t-B bates-prefix \t- Set the bates prefix string\n"); + printf("\t-O dii-output-file\t- Set the dii load file output filename\n"); + printf("\t-V \t- Version. Display program version\n"); + printf("\t-b bates-number \t- Set the starting bates sequence number\n"); + printf("\t-c bates-color \t- Specify the color of the bates stamps as 6 digit hex\n"); + printf("\t-d filename \t- Debug to file. This is a binary log. Use readpstlog to print it.\n"); + printf("\t-f ttf-font-file \t- Set the font file\n"); + printf("\t-h \t- Help. This screen\n"); + printf("\t-o dirname \t- Output directory to write files to.\n"); + return 0; +} + + +static char *removeCR (char *c); +static char *removeCR (char *c) { + // converts /r/n to /n + char *a, *b; + DEBUG_ENT("removeCR"); + a = b = c; + while (*a != '\0') { + *b = *a; + if (*a != '\r') + b++; + a++; + } + *b = '\0'; + DEBUG_RET(); + return c; +} + + +// The sole purpose of this function is to bypass the pseudo-header prologue +// that Microsoft Outlook inserts at the beginning of the internet email +// headers for emails stored in their "Personal Folders" files. +static char *skip_header_prologue(char *headers); +static char *skip_header_prologue(char *headers) { + const char *bad = "Microsoft Mail Internet Headers"; + if (strncmp(headers, bad, strlen(bad)) == 0) { + // Found the offensive header prologue + char *pc = strchr(headers, '\n'); + return pc + 1; + } + return headers; +} + + +static void check_filename(string &fname); +static void check_filename(string &fname) { + char *t = strdup(fname.c_str()); + DEBUG_ENT("check_filename"); + if (!t) { + DEBUG_RET(); + return; + } + char *tt = t; + bool fixed = false; + while (t = strpbrk(t, " /\\:")) { + // while there are characters in the second string that we don't want + *t = '_'; //replace them with an underscore + fixed = true; + } + if (fixed) fname = string(tt); + free(tt); + DEBUG_RET(); +} + + +static string write_separate_attachment(string fname, pst_item_attach* current_attach, int attach_num, pst_file* pst); +static string write_separate_attachment(string fname, pst_item_attach* current_attach, int attach_num, pst_file* pst) +{ + FILE *fp = NULL; + int x = 0; + char *temp = NULL; + + // If there is a long filename (filename2) use that, otherwise + // use the 8.3 filename (filename1) + char *attach_filename = (current_attach->filename2) ? current_attach->filename2 + : current_attach->filename1; + DEBUG_ENT("write_separate_attachment"); + check_filename(fname); + const char* f_name = fname.c_str(); + DEBUG_EMAIL(("dirname=%s, pathname=%s, filename=%s\n", output_directory, f_name, attach_filename)); + int len = strlen(output_directory) + 1 + strlen(f_name) + 15; + if (!attach_filename) { + // generate our own (dummy) filename for the attachement + temp = (char*)xmalloc(len); + sprintf(temp, "%s/%s_attach%i", output_directory, f_name, attach_num); + } else { + // have an attachment name, make sure it's unique + temp = (char*)xmalloc(len+strlen(attach_filename)); + do { + if (fp) fclose(fp); + if (x == 0) + sprintf(temp, "%s/%s_%s", output_directory, f_name, attach_filename); + else + sprintf(temp, "%s/%s_%s-%i", output_directory, f_name, attach_filename, x); + } while ((fp = fopen(temp, "r")) && ++x < 99999999); + if (x > 99999999) { + DIE(("error finding attachment name. exhausted possibilities to %s\n", temp)); + } + } + DEBUG_EMAIL(("Saving attachment to %s\n", temp)); + if (!(fp = fopen(temp, "wb"))) { + WARN(("write_separate_attachment: Cannot open attachment save file \"%s\"\n", temp)); + } else { + if (current_attach->data) + fwrite(current_attach->data, 1, current_attach->size, fp); + else { + (void)pst_attach_to_file(pst, current_attach, fp); + } + fclose(fp); + } + string rc(temp); + if (temp) free(temp); + DEBUG_RET(); + return rc; +} + + +static void print_pdf_short(char *line, int len, int color); +static void print_pdf_short(char *line, int len, int color) +{ + if (line_number >= line_max) { + close_png(); + open_png(); + } + int brect[8]; + gdFTStringExtra strex; + strex.flags = gdFTEX_RESOLUTION; + strex.linespacing = 1.20; + strex.charmap = 0; + strex.hdpi = DPI; + strex.vdpi = DPI; + char x = line[len]; + line[len] = '\0'; + char *p; + char *l = line; + while (p = strchr(l, '&')) { + *p = '\0'; + char *err = gdImageStringFTEx(image, &brect[0], color, font_file, sz, 0.0, x_position, y_position, l, &strex); + if (err) printf(err); + x_position += (brect[2]-brect[6]); + l = p+1; + err = gdImageStringFTEx(image, &brect[0], color, font_file, sz, 0.0, x_position, y_position, "&", &strex); + if (err) printf(err); + x_position += (brect[2]-brect[6]); + } + char *err = gdImageStringFTEx(image, &brect[0], color, font_file, sz, 0.0, x_position, y_position, l, &strex); + if (err) printf(err); + line[len] = x; + x_position += (brect[2]-brect[6]); + col_number += len; +} + + +static void print_pdf_single(char *line, int color); +static void print_pdf_single(char *line, int color) +{ + int n = strlen(line); + while (n) { + int m = col_max - col_number; // number of chars that will fit on this line + m = (n > m) ? m : n; + print_pdf_short(line, m, color); + line += m; + n -= m; + if (n) { + y_position += line_height; + line_number += 1; + x_position = margin; + col_number = 0; + } + } +} + + +static void print_pdf_only(char *line, int color); +static void print_pdf_only(char *line, int color) +{ + char *p; + while (p = strchr(line, '\n')) { + *p = '\0'; + print_pdf_single(line, color); + *p = '\n'; + line = p+1; + y_position += line_height; + line_number += 1; + x_position = margin; + col_number = 0; + } + print_pdf_single(line, color); +} + + +static void print_pdf(char *line); +static void print_pdf(char *line) +{ + fwrite(line, 1, strlen(line), dii_file); + print_pdf_only(line, black); +} + + +static void open_png() +{ + if (!png_open) { + png_open = true; + int brect[8]; + image = gdImageCreate(PAGE_WIDTH, PAGE_HEIGHT); + int w = gdImageColorAllocate(image, 255, 255, 255); // background color first one allocated + black = gdImageColorAllocate(image, 0, 0, 0); + int r = (bates_color & 0xff0000) >> 16; + int g = (bates_color & 0x00ff00) >> 8; + int b = (bates_color & 0x0000ff); + red = gdImageColorAllocate(image, r, g, b); + + gdFTStringExtra strex; + strex.flags = gdFTEX_RESOLUTION; + strex.linespacing = 1.20; + strex.charmap = 0; + strex.hdpi = DPI; + strex.vdpi = DPI; + + char line[LINE_SIZE]; + char *err = gdImageStringFTEx(NULL, &brect[0], black, font_file, sz, 0.0, margin, margin, "LMgqQ", &strex); + if (err) printf(err); + line_height = (brect[3]-brect[7]) * 12/10; + char_width = (brect[2]-brect[6]) / 5; + col_number = 0; + col_max = (PAGE_WIDTH - margin*2) / char_width; + line_number = 0; + line_max = (PAGE_HEIGHT - margin*2) / line_height; + x_position = margin; + y_position = margin + line_height; + snprintf(line, sizeof(line), "%s%06d\n", bates_prefix, bates_index++); + print_pdf_only(line, red); + print_pdf_only(pst_folder, red); + } +} + + +static void close_png() +{ + if (png_open) { + png_open = false; + char fn[PATH_MAX]; + snprintf(fn, sizeof(fn), "page%d.png", ++page_sequence); + FILE *pngout = fopen(fn, "wb"); + if (pngout) { + gdImagePng(image, pngout); + fclose(pngout); + } + gdImageDestroy(image); // free memory + png_names.push_back(fn); + conversion += string(" ") + fn; + } +} + + +static void open_pdf(char *line); +static void open_pdf(char *line) +{ + pst_folder = line; + page_sequence = 0; + conversion = string(convert); + png_names.clear(); + open_png(); + snprintf(pdf_name, sizeof(pdf_name), "dii%06d", ++email_sequence); + fprintf(dii_file, "\n@T %s\n", pdf_name); + snprintf(pdf_name, sizeof(pdf_name), "%s/dii%06d.pdf", output_directory, email_sequence); +} + + +static void close_pdf(); +static void close_pdf() +{ + close_png(); + conversion += string(" ") + pdf_name; + system(conversion.c_str()); + for (vector::iterator i=png_names.begin(); i!=png_names.end(); i++) { + remove((*i).c_str()); + } + fprintf(dii_file, "@D %s\n", pdf_name); +} + + +static void write_simple(char *tag, const char *value); +static void write_simple(char *tag, const char *value) +{ + if (value) fprintf(dii_file, "@%s %s\n", tag, value); +} + + +static void write_simple(char *tag, string value); +static void write_simple(char *tag, string value) +{ + fprintf(dii_file, "@%s %s\n", tag, value.c_str()); +} + + +static void write_simple(char *tag, const char *value, const char *value2); +static void write_simple(char *tag, const char *value, const char *value2) +{ + if (value) { + if (value2) fprintf(dii_file, "@%s \"%s\" <%s>\n", tag, value, value2); + else fprintf(dii_file, "@%s \"%s\"\n", tag, value); + } +} + + +static string extract_header(char *headers, char *field); +static string extract_header(char *headers, char *field) +{ + string rc; + int len = strlen(field) + 4; + char f[len]; + snprintf(f, len, "\n%s: ", field); + char *p = strstr(headers, f); + if (p) { + p += strlen(f); + char *n = strchr(p, '\n'); + if (n) { + *n = '\0'; + rc = string(p); + *n = '\n'; + } + else { + rc = string(p); + } + } + return rc; +} + + +static void write_normal_email(file_ll &f, pst_item* item, pst_file* pst); +static void write_normal_email(file_ll &f, pst_item* item, pst_file* pst) +{ + DEBUG_ENT("write_normal_email"); + char *soh = NULL; // real start of headers. + if (item->email->header) { + // some of the headers we get from the file are not properly defined. + // they can contain some email stuff too. We will cut off the header + // when we see a \n\n or \r\n\r\n + removeCR(item->email->header); + char *temp = strstr(item->email->header, "\n\n"); + if (temp) { + DEBUG_EMAIL(("Found body text in header\n")); + temp[1] = '\0'; // stop after first \n + } + soh = skip_header_prologue(item->email->header); + } + + char folder_line[LINE_SIZE]; + char line[LINE_SIZE]; + // reset pdf writer to new file + int bates = bates_index; // save starting index + snprintf(folder_line, sizeof(folder_line), "pst folder = %s\n", f.name.c_str()); + open_pdf(folder_line); + + // start printing this email + fprintf(dii_file, "@FOLDERNAME %s\n", f.name.c_str()); + string myfrom = extract_header(soh, "From"); + string myto = extract_header(soh, "To"); + string mycc = extract_header(soh, "Cc"); + string mybcc = extract_header(soh, "Bcc"); + if (myfrom.empty()) write_simple("FROM", item->email->outlook_sender_name, item->email->sender_address); + else write_simple("FROM", myfrom); + if (myto.empty()) write_simple("TO", item->email->sentto_address, item->email->recip_address); + else write_simple("TO", myto); + if (mycc.empty()) write_simple("CC", item->email->cc_address); + else write_simple("CC", mycc); + if (mybcc.empty()) write_simple("BCC", item->email->bcc_address); + else write_simple("BCC", mybcc); + if (item->email->sent_date) { + time_t t = fileTimeToUnixTime(item->email->sent_date, NULL); + char c_time[C_TIME_SIZE]; + strftime(c_time, C_TIME_SIZE, "%F +0000", gmtime(&t)); + write_simple("DATESENT", c_time); + strftime(c_time, C_TIME_SIZE, "%T +0000", gmtime(&t)); + write_simple("TIMESENT", c_time); + } + if (item->email->arrival_date) { + time_t t = fileTimeToUnixTime(item->email->arrival_date, NULL); + char c_time[C_TIME_SIZE]; + strftime(c_time, C_TIME_SIZE, "%F +0000", gmtime(&t)); + write_simple("DATERCVD", c_time); + strftime(c_time, C_TIME_SIZE, "%T +0000", gmtime(&t)); + write_simple("TIMERCVD", c_time); + } + if (item->email->subject) { + write_simple("SUBJECT", item->email->subject->subj); + } + write_simple("MSGID", item->email->messageid); + if (item->email->flag) { + write_simple("READ", (item->email->flag & 1) ? "Y" : "N"); + } + + DEBUG_EMAIL(("About to print Header\n")); + fprintf(dii_file, "@HEADER\n"); + + if (item && item->email && item->email->subject && item->email->subject->subj) { + DEBUG_EMAIL(("item->email->subject->subj = %s\n", item->email->subject->subj)); + } + + if (soh) { + // Now, write out the header... + print_pdf(soh); + int len = strlen(soh); + if (!len || (soh[len-1] != '\n')) { + snprintf(line, sizeof(line), "\n"); + print_pdf(line); + } + + } else { + //make up our own headers + char *temp = item->email->outlook_sender; + if (!temp) temp = ""; + snprintf(line, sizeof(line), "From: \"%s\" <%s>\n", item->email->outlook_sender_name, temp); + print_pdf(line); + + if (item->email->subject) { + snprintf(line, sizeof(line), "Subject: %s\n", item->email->subject->subj); + } else { + snprintf(line, sizeof(line), "Subject: \n"); + } + print_pdf(line); + + snprintf(line, sizeof(line), "To: %s\n", item->email->sentto_address); + print_pdf(line); + + if (item->email->cc_address) { + snprintf(line, sizeof(line), "Cc: %s\n", item->email->cc_address); + print_pdf(line); + } + + if (item->email->sent_date) { + time_t em_time = fileTimeToUnixTime(item->email->sent_date, 0); + char c_time[C_TIME_SIZE]; + strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); + snprintf(line, sizeof(line), "Date: %s\n", c_time); + print_pdf(line); + } + } + snprintf(line, sizeof(line), "\n"); + print_pdf_only(line, black); + fprintf(dii_file, "@HEADER-END\n"); + + DEBUG_EMAIL(("About to print Body\n")); + fprintf(dii_file, "@EMAIL-BODY\n"); + if (item->email->body) { + removeCR(item->email->body); + print_pdf(item->email->body); + } else if (item->email->htmlbody) { + removeCR(item->email->htmlbody); + print_pdf(item->email->htmlbody); + } else if (item->email->encrypted_body || item->email->encrypted_htmlbody) { + print_pdf("The body of this email is encrypted. This isn't supported yet, but the body is now an attachment\n"); + } + fprintf(dii_file, "@EMAIL-END\n"); + + int attach_num = 0; + for (pst_item_attach* current_attach = item->attach; current_attach; current_attach = current_attach->next) { + DEBUG_EMAIL(("Attempting Attachment encoding\n")); + if (!current_attach->data) { + DEBUG_EMAIL(("Data of attachment is NULL!. Size is supposed to be %i\n", current_attach->size)); + } + string an = write_separate_attachment(f.name, current_attach, ++attach_num, pst); + fprintf(dii_file, "@EATTACH %s\n", an.c_str()); + } + close_pdf(); + fprintf(dii_file, "@BATESBEG %d\n", bates); + fprintf(dii_file, "@BATESEND %d\n", bates_index-1); + DEBUG_RET(); +} + + +static void create_enter_dir(file_ll &f, file_ll *parent, pst_item *item); +static void create_enter_dir(file_ll &f, file_ll *parent, pst_item *item) +{ + f.email_count = 0; + f.skip_count = 0; + f.type = item->type; + f.stored_count = (item->folder) ? item->folder->email_count : 0; + f.name = ((parent) ? parent->name + "/" : "") + string(item->file_as); +} + + +static void close_enter_dir(file_ll &f); +static void close_enter_dir(file_ll &f) +{ +} + + +static void process(pst_item *outeritem, file_ll *parent, pst_desc_ll *d_ptr); +static void process(pst_item *outeritem, file_ll *parent, pst_desc_ll *d_ptr) +{ + file_ll ff; + pst_item *item = NULL; + DEBUG_ENT("process"); + create_enter_dir(ff, parent, outeritem); + while (d_ptr) { + if (d_ptr->desc) { + item = pst_parse_item(&pstfile, d_ptr); + DEBUG_INFO(("item pointer is %p\n", item)); + if (item) { + if (item->message_store) { + // there should only be one message_store, and we have already done it + DIE(("main: A second message_store has been found. Sorry, this must be an error.\n")); + } + + if (item->folder && d_ptr->child ) { + //if this is a non-empty folder other than deleted items, we want to recurse into it + fprintf(stderr, "entering folder %s\n", item->file_as); + process(item, &ff, d_ptr->child); + } else if (item->email && (item->type == PST_TYPE_NOTE || item->type == PST_TYPE_REPORT)) { + ff.email_count++; + DEBUG_MAIN(("main: Processing Email\n")); + if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_REPORT)) { + DEBUG_MAIN(("main: I have an email, but the folder isn't an email folder. Processing anyway\n")); + } + write_normal_email(ff, item, &pstfile); + } + pst_freeItem(item); + } else { + ff.skip_count++; + DEBUG_MAIN(("main: A NULL item was seen\n")); + } + } + d_ptr = d_ptr->next; + } + close_enter_dir(ff); + DEBUG_RET(); +} + + +int main(int argc, char **argv) +{ + pst_desc_ll *d_ptr; + char *fname = NULL; + char c; + char *d_log = NULL; + prog_name = argv[0]; + pst_item *item = NULL; + + while ((c = getopt(argc, argv, "B:b:c:d:f:o:O:Vh"))!= -1) { + switch (c) { + case 'B': + bates_prefix = optarg; + break; + case 'b': + bates_index = atoi(optarg); + break; + case 'c': + bates_color = (int)strtol(optarg, (char**)NULL, 16); + break; + case 'f': + font_file = optarg; + break; + case 'o': + output_directory = optarg; + break; + case 'O': + output_file = optarg; + break; + case 'd': + d_log = optarg; + break; + case 'h': + usage(); + exit(0); + break; + case 'V': + version(); + exit(0); + break; + default: + usage(); + exit(1); + break; + } + } + + if (argc > optind) { + fname = argv[optind]; + } else { + usage(); + exit(2); + } + + + #ifdef DEBUG_ALL + // force a log file + if (!d_log) d_log = "pst2dii.log"; + #endif + DEBUG_INIT(d_log); + DEBUG_REGISTER_CLOSE(); + DEBUG_ENT("main"); + RET_DERROR(pst_open(&pstfile, fname), 1, ("Error opening File\n")); + RET_DERROR(pst_load_index(&pstfile), 2, ("Index Error\n")); + + pst_load_extended_attributes(&pstfile); + + d_ptr = pstfile.d_head; // first record is main record + item = (pst_item*)pst_parse_item(&pstfile, d_ptr); + if (!item || !item->message_store) { + DEBUG_RET(); + DIE(("main: Could not get root record\n")); + } + + d_ptr = pst_getTopOfFolders(&pstfile, item); + if (!d_ptr) { + DEBUG_RET(); + DIE(("Top of folders record not found. Cannot continue\n")); + } + + dii_file = fopen(output_file, "wb"); + if (dii_file) { + process(item, NULL, d_ptr->child); // do the children of TOPF + pst_freeItem(item); + pst_close(&pstfile); + fclose(dii_file); + } + DEBUG_RET(); + return 0; +} diff -r ee4e0d00bc94 -r cfd6175f9334 src/readpst.c --- a/src/readpst.c Sat Feb 16 12:43:42 2008 -0800 +++ b/src/readpst.c Sat Feb 23 14:36:17 2008 -0800 @@ -6,7 +6,7 @@ */ #include "define.h" #include "libstrfunc.h" -#include "vbuf.h" +//#include "vbuf.h" #include "libpst.h" #include "common.h" #include "timeconv.h" @@ -198,9 +198,10 @@ write_appointment(ff.output, item->appointment, item->email, item->create_date, item->modify_date); } else { - ff.skip_count++; - DEBUG_MAIN(("main: Unknown item type. %i. Ascii1=\"%s\"\n", - item->type, item->ascii_type)); + // these all seem to be things that MS agrees are not included in the item count + //ff.skip_count++; + DEBUG_MAIN(("main: Unknown item type %i (%s) name (%s)\n", + item->type, item->ascii_type, item->file_as)); } pst_freeItem(item); } else { @@ -367,10 +368,9 @@ DIE(("Top of folders record not found. Cannot continue\n")); } - process(item, d_ptr->child); // do the children of TOPF + process(item, d_ptr->child); // do the children of TOPF pst_freeItem(item); pst_close(&pstfile); - DEBUG_RET(); return 0; } @@ -417,19 +417,19 @@ version(); printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); printf("OPTIONS:\n"); + printf("\t-C\t- Decrypt the entire file and output on stdout (not typically useful)\n"); + printf("\t-M\t- MH. Write emails in the MH format\n"); + printf("\t-S\t- Seperate. Write emails in the seperate format\n"); + printf("\t-V\t- Version. Display program version\n"); printf("\t-b\t- Don't save RTF-Body attachments\n"); printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); - printf("\t-d \t- Debug to file. This is a binary log. Use readlog to print it\n"); + printf("\t-d \t- Debug to file. This is a binary log. Use readpstlog to print it\n"); printf("\t-h\t- Help. This screen\n"); printf("\t-k\t- KMail. Output in kmail format\n"); - printf("\t-M\t- MH. Write emails in the MH format\n"); - printf("\t-o \t- Output Dir. Directory to write files to. CWD is changed *after* opening pst file\n"); + printf("\t-o \t- Output directory to write files to. CWD is changed *after* opening pst file\n"); printf("\t-q\t- Quiet. Only print error messages\n"); printf("\t-r\t- Recursive. Output in a recursive format\n"); - printf("\t-S\t- Seperate. Write emails in the seperate format\n"); - printf("\t-V\t- Version. Display program version\n"); printf("\t-w\t- Overwrite any output mbox files\n"); - printf("\t-C\t- Decrypt the entire file and output on stdout (not typically useful)\n"); DEBUG_RET(); return 0; } @@ -915,7 +915,7 @@ if (!len || (soh[len-1] != '\n')) fprintf(f_output, "\n"); } else { - //make up our own header! + //make up our own headers if (mode != MODE_SEPERATE) { // don't want this first line for this mode if (item->email->outlook_sender_name) { @@ -942,10 +942,9 @@ } if (item->email->sent_date) { - c_time = (char*) xmalloc(C_TIME_SIZE); + char c_time[C_TIME_SIZE]; strftime(c_time, C_TIME_SIZE, "%a, %d %b %Y %H:%M:%S %z", gmtime(&em_time)); fprintf(f_output, "Date: %s\n", c_time); - free(c_time); } } diff -r ee4e0d00bc94 -r cfd6175f9334 src/timeconv.c --- a/src/timeconv.c Sat Feb 16 12:43:42 2008 -0800 +++ b/src/timeconv.c Sat Feb 23 14:36:17 2008 -0800 @@ -18,13 +18,13 @@ char * fileTimeToAscii (const FILETIME *filetime) { time_t t1; - t1 = fileTimeToUnixTime(filetime,0); + t1 = fileTimeToUnixTime(filetime, NULL); return ctime(&t1); } struct tm * fileTimeToStructTM (const FILETIME *filetime) { time_t t1; - t1 = fileTimeToUnixTime(filetime, 0); + t1 = fileTimeToUnixTime(filetime, NULL); return gmtime(&t1); } diff -r ee4e0d00bc94 -r cfd6175f9334 xml/libpst.in --- a/xml/libpst.in Sat Feb 16 12:43:42 2008 -0800 +++ b/xml/libpst.in Sat Feb 23 14:36:17 2008 -0800 @@ -29,7 +29,7 @@ - 2008-01-27 + 2008-02-23 @@ -47,8 +47,11 @@ Synopsis readpst + + + + - @@ -56,9 +59,6 @@ - - - pstfile @@ -76,18 +76,44 @@ Options + -C + + Decrypt the entire pst file and dump it to stdout. + + + + -M + + Output messages in MH format as separate files. This will create + folders as named in the PST file, and will put each email together with + any attachments into its own file. These files will be numbered from 1 + to n with no leading zeros. + + + + -S + + Output messages into separate files. This will create folders as named + in the PST file, and will put each email in its own file. These files + will be numbered from 1 increasing in intervals of 1 (ie 1, 2, 3, ...). + Any attachments are saved alongside each email as XXXXXXXXX-attach1, + XXXXXXXXX-attach2 and so on, or with the name of the attachment if one + is present. + + + + -V + + Show program version and exit. + + + -b Do not save the attachments for the RTF format of the email body. - -C - - Decrypt the entire pst file and dump it to stdout. - - - -c format Set the Contact output mode. Use -cv for vcard format or -cl for an email list. @@ -138,32 +164,6 @@ - -S - - Output messages into separate files. This will create folders as named - in the PST file, and will put each email in its own file. These files - will be numbered from 1 increasing in intervals of 1 (ie 1, 2, 3, ...). - Any attachments are saved alongside each email as XXXXXXXXX-attach1, - XXXXXXXXX-attach2 and so on, or with the name of the attachment if one - is present. - - - - -M - - Output messages in MH format as separate files. This will create - folders as named in the PST file, and will put each email together with - any attachments into its own file. These files will be numbered from 1 - to n with no leading zeros. - - - - -V - - Show program version and exit. - - - -w Overwrite any previous output files. Beware: When used with the -S @@ -222,7 +222,7 @@ - 2008-01-27 + 2008-02-23 @@ -240,9 +240,9 @@ Synopsis lspst + - pstfile @@ -251,6 +251,12 @@ Options + -V + + Show program version and exit. + + + -d debug-file Specify name of debug log file. The @@ -264,12 +270,6 @@ Show summary of options and exit. - - -V - - Show program version and exit. - - @@ -325,7 +325,7 @@ - 2008-01-27 + 2008-02-23 @@ -500,7 +500,7 @@ - 2008-01-27 + 2008-02-23 @@ -518,11 +518,11 @@ Synopsis pst2ldif - + pstfilename @@ -531,13 +531,7 @@ Options - -h - - Show summary of options. Subsequent options are then ignored. - - - - -V include-types + -V Show program version. Subsequent options are then ignored. @@ -565,6 +559,12 @@ by readpstlog. + + -h + + Show summary of options. Subsequent options are then ignored. + + @@ -622,9 +622,143 @@ + + + 2008-02-23 + + + + pst2dii + 1 + pst2dii @VERSION@ + + + + pst2dii + extract email messages from a MS Outlook .pst file in DII load format + + + + Synopsis + + pst2dii + + + + + + + + + + pstfilename + + + + + Options + + + -B bates-prefix + + Sets the bates prefix string. The bates sequence number is appended to + this string, and printed on each page. + + + + -O dii-output-file + + Name of the output DII load file. + + + + -V + + Show program version. Subsequent options are then ignored. + + + + -b bates-number + + Starting bates sequence number. The default is zero. + + + + -c bates-color + + Font color for the bates stamp on each page, specified as 6 hex digits + as rrggbb values. The default is ff0000 for bright red. + + + + -d debug-file + + Specify name of debug log file. The + log file is not an ascii file, it is a binary file readable + by readpstlog. + + + + -f ttf-font-file + + Specify name of a true type font file. This should be a fixed pitch font. + + + + -h + + Show summary of options. Subsequent options are then ignored. + + + + -o output-directory + + Specifies the output directory. The directory must already exist. + + + + + + + Description + pst2dii + reads the email messages from a MS Outlook .pst file + and produces a DII load file that may be used to import message + summaries into a Summation DII system. The DII output file contains + references to the image and attachment files in the output directory. + + + + + Copyright + + Copyright (C) 2008 by 510 Software Group <carl@five-ten-sg.com> + + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + + You should have received a copy of the GNU General Public License along + with this program; see the file COPYING. If not, please write to the + Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + + + + Version + + @VERSION@ + + + + + - 2008-01-27 + 2008-02-23 @@ -1512,8 +1646,11 @@ 0063 Response requested 0064 Sender's Address access method (SMTP, EX) 0065 Sender's Address -0070 Processed Subject (with Fwd:, Re, ... removed) -0071 Date. Another date +0070 Conversation topic, processed subject (with Fwd:, Re, ... removed) +0071 Conversation index +0072 Original display BCC +0073 Original display CC +0074 Original display TO 0075 Recipient Address Access Method (SMTP, EX) 0076 Recipient's Address 0077 Second Recipient Access Method (SMTP, EX)