# HG changeset patch # User Carl Byington # Date 1371091544 25200 # Node ID c507af52515aa2e728fbb68484f674cee4978d61 # Parent e1b9f9aa507494709e7ce3a3dbb40d7a34ffaa73 add readpst -a option diff -r e1b9f9aa5074 -r c507af52515a AUTHORS --- a/AUTHORS Fri May 17 14:32:26 2013 -0700 +++ b/AUTHORS Wed Jun 12 19:45:44 2013 -0700 @@ -35,6 +35,7 @@ Kenneth Berland Leo 'costela' Antunes Svante Signell + Dominique Leuenberger a.k.a. Dimstar Testing team: Mac OSX - Michael Watson diff -r e1b9f9aa5074 -r c507af52515a ChangeLog --- a/ChangeLog Fri May 17 14:32:26 2013 -0700 +++ b/ChangeLog Wed Jun 12 19:45:44 2013 -0700 @@ -1,3 +1,8 @@ +LibPST 0.6.60 (2013-06-12) +=============================== + * patch from Dominique Leuenberger to add AC_USE_SYSTEM_EXTENSIONS + * add readpst -a option for attachment stripping + LibPST 0.6.59 (2013-05-17) =============================== * add autoconf checking for libgsf diff -r e1b9f9aa5074 -r c507af52515a NEWS --- a/NEWS Fri May 17 14:32:26 2013 -0700 +++ b/NEWS Wed Jun 12 19:45:44 2013 -0700 @@ -1,3 +1,4 @@ +0.6.60 2013-06-12 add AC_USE_SYSTEM_EXTENSIONS, add readpst -a option for attachment stripping 0.6.59 2013-05-17 add autoconf checking for libgsf 0.6.58 2012-12-28 fix From quoting on embedded rfc/822 messages 0.6.57 2012-12-27 remove useless dependencies diff -r e1b9f9aa5074 -r c507af52515a configure.in --- a/configure.in Fri May 17 14:32:26 2013 -0700 +++ b/configure.in Wed Jun 12 19:45:44 2013 -0700 @@ -1,9 +1,11 @@ -AC_PREREQ(2.59) -AC_INIT(libpst,0.6.59,carl@five-ten-sg.com) +AC_PREREQ(2.60) +AC_INIT(libpst,0.6.60,carl@five-ten-sg.com) AC_CONFIG_SRCDIR([src/libpst.c]) AC_CONFIG_HEADER([config.h]) +AC_CONFIG_MACRO_DIR([m4]) AM_INIT_AUTOMAKE AC_CANONICAL_HOST +AC_USE_SYSTEM_EXTENSIONS # # 1. Remember that version-info is current:revision:age, and age <= current. @@ -19,7 +21,7 @@ # 6. libtool will build libpst.so.x.y.z where the SONAME is libpst.so.x # and x=current-age, y=age, z=revision -libpst_version_info='5:8:1' +libpst_version_info='5:9:1' AC_SUBST(LIBPST_VERSION_INFO, [$libpst_version_info]) libpst_so_major='4' AC_SUBST(LIBPST_SO_MAJOR, [$libpst_so_major]) @@ -44,6 +46,7 @@ # 0.6.57 libpst.so.4 libpst.so.4.1.6 # 0.6.58 libpst.so.4 libpst.so.4.1.7 # 0.6.59 libpst.so.4 libpst.so.4.1.8 +# 0.6.60 libpst.so.4 libpst.so.4.1.9 @@ -147,7 +150,6 @@ AC_PROG_LN_S AC_PROG_LIBTOOL AC_PROG_MAKE_SET -AC_PROG_RANLIB # make sure we get large file support diff -r e1b9f9aa5074 -r c507af52515a libpst.spec.in --- a/libpst.spec.in Fri May 17 14:32:26 2013 -0700 +++ b/libpst.spec.in Wed Jun 12 19:45:44 2013 -0700 @@ -154,6 +154,12 @@ %changelog +* xxx xxx xx 2013 Carl Byington 0.6.60-1 +- patch from Dominique Leuenberger to add AC_USE_SYSTEM_EXTENSIONS + +* Tue Jun 11 2013 Remi Collet - 0.6.59-2 +- rebuild for new GD 2.1.0 + * Fri May 17 2013 Carl Byington 0.6.59-1 - add autoconf checking for libgsf diff -r e1b9f9aa5074 -r c507af52515a regression/regression-tests.bash --- a/regression/regression-tests.bash Fri May 17 14:32:26 2013 -0700 +++ b/regression/regression-tests.bash Wed Jun 12 19:45:44 2013 -0700 @@ -67,8 +67,8 @@ ## normal recursive dump char='us-ascii' #char='BIG-5' - echo $val ../src/readpst -C $char -j 0 -r -cv -o output$n -d $ba.log $fn - $val ../src/readpst -C $char -j 0 -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 + echo $val ../src/readpst -a '.xls,.doc' -C $char -j 0 -r -cv -o output$n -d $ba.log $fn + $val ../src/readpst -a '.xls,.doc' -C $char -j 0 -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1 ## separate mode with filename extensions and .msg files #echo $val ../src/readpst $jobs -r -m -D -cv -o output$n -d $ba.log $fn @@ -128,13 +128,13 @@ #$func 21 rendgen3.pst # mime signed email #$func 22 rendgen4.pst # appointment test cases #$func 23 rendgen5.pst # appointment test cases -$func 24 paul.sheer.pst # embedded rfc822 attachment -$func 25 jerry.pst # non ascii subject lines +#$func 24 paul.sheer.pst # embedded rfc822 attachment +#$func 25 jerry.pst # non ascii subject lines #$func 26 phill.bertolus.pst # possible segfault in forked process, cannot reproduce -$func 27 kaiser.pst # appointments with other character sets -$func 28 pstsample.pst # character set issue +#$func 27 kaiser.pst # appointments with other character sets +#$func 28 pstsample.pst # character set issue #$func 29 pstsample2.pst # embedded image in rtf data -$func 30 pstsample3.pst # exports of rtf and html +#$func 30 pstsample3.pst # exports of rtf and html [ -n "$val" ] && grep 'lost:' *err | grep -v 'lost: 0 ' diff -r e1b9f9aa5074 -r c507af52515a src/libpst.c --- a/src/libpst.c Fri May 17 14:32:26 2013 -0700 +++ b/src/libpst.c Wed Jun 12 19:45:44 2013 -0700 @@ -284,7 +284,6 @@ static int pst_process(uint64_t block_id, pst_mapi_object *list, pst_item *item, pst_item_attach *attach); static size_t pst_read_block_size(pst_file *pf, int64_t offset, size_t size, char **buf); static int pst_decrypt(uint64_t i_id, char *buf, size_t size, unsigned char type); -static int pst_stricmp(char *a, char *b); static int pst_strincmp(char *a, char *b, size_t x); static char* pst_wide_to_single(char *wt, size_t size); @@ -3391,6 +3390,10 @@ SAFE_FREE_STR(item->email->report_text); SAFE_FREE(item->email->report_time); SAFE_FREE_STR(item->email->supplementary_info); + SAFE_FREE_STR(item->email->outlook_received_name1); + SAFE_FREE_STR(item->email->outlook_sender_name2); + SAFE_FREE_STR(item->email->outlook_normalized_subject); + SAFE_FREE_STR(item->email->outlook_search_key); free(item->email); } if (item->folder) { @@ -4155,9 +4158,10 @@ } -static int pst_stricmp(char *a, char *b) { - // compare strings case-insensitive. - // returns -1 if a < b, 0 if a==b, 1 if a > b +/** compare strings case-insensitive. + * @return -1 if a < b, 0 if a==b, 1 if a > b + */ +int pst_stricmp(char *a, char *b) { while(*a != '\0' && *b != '\0' && toupper(*a)==toupper(*b)) { a++; b++; } diff -r e1b9f9aa5074 -r c507af52515a src/libpst.h --- a/src/libpst.h Fri May 17 14:32:26 2013 -0700 +++ b/src/libpst.h Wed Jun 12 19:45:44 2013 -0700 @@ -891,7 +891,7 @@ /** original file name when the file was opened */ char* fname; /** default character set for items without one */ - char* charset; + const char* charset; /** the head and tail of the linked list of index structures */ pst_index_ll *i_head, *i_tail; /** the head and tail of the top level of the descriptor tree */ @@ -1041,6 +1041,12 @@ size_t pst_ff_getIDblock_dec(pst_file *pf, uint64_t i_id, char **buf); +/** compare strings case-insensitive. + * @return -1 if a < b, 0 if a==b, 1 if a > b + */ +int pst_stricmp(char *a, char *b); + + /** fwrite with checking for null pointer. * @param ptr pointer to the buffer * @param size size of each item diff -r e1b9f9aa5074 -r c507af52515a src/readpst.c --- a/src/readpst.c Fri May 17 14:32:26 2013 -0700 +++ b/src/readpst.c Wed Jun 12 19:45:44 2013 -0700 @@ -44,6 +44,7 @@ void close_separate_file(struct file_ll *f); char* my_stristr(char *haystack, char *needle); void check_filename(char *fname); +int acceptable_ext(pst_item_attach* attach); void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst); void write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, int save_rtf, char** extra_mime_headers); void write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst); @@ -136,6 +137,7 @@ pst_file pstfile; regex_t meta_charset_pattern; char* default_charset = NULL; +char* acceptable_extensions = NULL; int number_processors = 1; // number of cpus we have int max_children = 0; // based on number of cpus and command line args @@ -450,8 +452,21 @@ } // command-line option handling - while ((c = getopt(argc, argv, "bC:c:Dd:emhj:kMo:qrSt:uVw"))!= -1) { + while ((c = getopt(argc, argv, "a:bC:c:Dd:emhj:kMo:qrSt:uVw"))!= -1) { switch (c) { + case 'a': + if (optarg) { + int n = strlen(optarg); + acceptable_extensions = (char*)pst_malloc(n+2); + strcpy(acceptable_extensions, optarg); + acceptable_extensions[n+1] = '\0'; // double null terminates array of non-empty null terminated strings. + char *p = acceptable_extensions; + while (*p) { + if (*p == ',') *p = '\0'; + p++; + } + } + break; case 'b': save_rtf_body = 0; break; @@ -728,6 +743,7 @@ printf("\t-D\t- Include deleted items in output\n"); printf("\t-M\t- Write emails in the MH (rfc822) format\n"); printf("\t-S\t- Separate. Write emails in the separate format\n"); + printf("\t-a \t- Discard any attachment without an extension on the list\n"); printf("\t-b\t- Don't save RTF-Body attachments\n"); printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); printf("\t-d \t- Debug to file.\n"); @@ -1014,6 +1030,37 @@ } +/** + * check if the file name extension is acceptable. If not, the attachment + * will be discarded + * @param attach pst attachment object + * @return true if the attachment filename contains an extension that we want. + */ +int acceptable_ext(pst_item_attach* attach) +{ + if (!acceptable_extensions || *acceptable_extensions == '\0') return 1; // acceptable list missing or empty + char *attach_filename = (attach->filename2.str) ? attach->filename2.str + : attach->filename1.str; + if (!attach_filename) return 1; // attachment with no name is always acceptable + char *e = strrchr(attach_filename, '.'); + if (!e) return 1; // attachment with no extension is always acceptable. + DEBUG_ENT("acceptable_ext"); + DEBUG_INFO(("attachment extension %s\n", e)); + int rc = 0; + char *a = acceptable_extensions; + while (*a) { + if (pst_stricmp(a, e) == 0) { + rc = 1; + break; + } + a += strlen(a) + 1; + } + DEBUG_INFO(("attachment acceptable returns %d\n", rc)); + DEBUG_RET(); + return rc; +} + + void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst) { FILE *fp = NULL; @@ -1755,10 +1802,12 @@ write_embedded_message(f_output, attach, boundary, pst, save_rtf, extra_mime_headers); } else if (attach->data.data || attach->i_id) { - if (mode == MODE_SEPARATE && !mode_MH) - write_separate_attachment(f_name, attach, ++attach_num, pst); - else - write_inline_attachment(f_output, attach, boundary, pst); + if (acceptable_ext(attach)) { + if (mode == MODE_SEPARATE && !mode_MH) + write_separate_attachment(f_name, attach, ++attach_num, pst); + else + write_inline_attachment(f_output, attach, boundary, pst); + } } } } diff -r e1b9f9aa5074 -r c507af52515a xml/libpst.in --- a/xml/libpst.in Fri May 17 14:32:26 2013 -0700 +++ b/xml/libpst.in Wed Jun 12 19:45:44 2013 -0700 @@ -35,7 +35,7 @@ - 2011-05-27 + 2013-06-12 @@ -58,6 +58,7 @@ + @@ -128,6 +129,14 @@ + -a attachment-extension-list + + Set the list of acceptable attachment extensions. Any attachment that + does not have an extension on this list will be discarded. All attachments + are acceptable if the list is empty, or this option is not specified. + + + -b Do not save the attachments for the RTF format of the email body. @@ -285,7 +294,7 @@ - 2011-05-27 + 2013-06-12 @@ -380,7 +389,7 @@ - 2011-05-27 + 2013-06-12 @@ -548,7 +557,7 @@ - 2011-05-27 + 2013-06-12 @@ -681,7 +690,7 @@ - 2011-05-27 + 2013-06-12