changeset 328:c507af52515a

add readpst -a option
author Carl Byington <carl@five-ten-sg.com>
date Wed, 12 Jun 2013 19:45:44 -0700
parents e1b9f9aa5074
children 88230744c895
files AUTHORS ChangeLog NEWS configure.in libpst.spec.in regression/regression-tests.bash src/libpst.c src/libpst.h src/readpst.c xml/libpst.in
diffstat 10 files changed, 109 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/AUTHORS	Fri May 17 14:32:26 2013 -0700
+++ b/AUTHORS	Wed Jun 12 19:45:44 2013 -0700
@@ -35,6 +35,7 @@
     Kenneth Berland <ken@hero.com>
     Leo 'costela' Antunes <costela@debian.org>
     Svante Signell <svante.signell@telia.com>
+    Dominique Leuenberger a.k.a. Dimstar <dimstar@opensuse.org>
 
 Testing team:
     Mac OSX - Michael Watson <mike@mikeandgayle.com>
--- a/ChangeLog	Fri May 17 14:32:26 2013 -0700
+++ b/ChangeLog	Wed Jun 12 19:45:44 2013 -0700
@@ -1,3 +1,8 @@
+LibPST 0.6.60 (2013-06-12)
+===============================
+    * patch from Dominique Leuenberger to add AC_USE_SYSTEM_EXTENSIONS
+    * add readpst -a option for attachment stripping
+
 LibPST 0.6.59 (2013-05-17)
 ===============================
     * add autoconf checking for libgsf
--- a/NEWS	Fri May 17 14:32:26 2013 -0700
+++ b/NEWS	Wed Jun 12 19:45:44 2013 -0700
@@ -1,3 +1,4 @@
+0.6.60  2013-06-12 add AC_USE_SYSTEM_EXTENSIONS, add readpst -a option for attachment stripping
 0.6.59  2013-05-17 add autoconf checking for libgsf
 0.6.58  2012-12-28 fix From quoting on embedded rfc/822 messages
 0.6.57  2012-12-27 remove useless dependencies
--- a/configure.in	Fri May 17 14:32:26 2013 -0700
+++ b/configure.in	Wed Jun 12 19:45:44 2013 -0700
@@ -1,9 +1,11 @@
-AC_PREREQ(2.59)
-AC_INIT(libpst,0.6.59,carl@five-ten-sg.com)
+AC_PREREQ(2.60)
+AC_INIT(libpst,0.6.60,carl@five-ten-sg.com)
 AC_CONFIG_SRCDIR([src/libpst.c])
 AC_CONFIG_HEADER([config.h])
+AC_CONFIG_MACRO_DIR([m4])
 AM_INIT_AUTOMAKE
 AC_CANONICAL_HOST
+AC_USE_SYSTEM_EXTENSIONS
 
 #
 #  1. Remember that version-info is current:revision:age, and age <= current.
@@ -19,7 +21,7 @@
 #  6. libtool will build libpst.so.x.y.z where the SONAME is libpst.so.x
 #     and x=current-age, y=age, z=revision
 
-libpst_version_info='5:8:1'
+libpst_version_info='5:9:1'
 AC_SUBST(LIBPST_VERSION_INFO, [$libpst_version_info])
 libpst_so_major='4'
 AC_SUBST(LIBPST_SO_MAJOR, [$libpst_so_major])
@@ -44,6 +46,7 @@
 # 0.6.57    libpst.so.4     libpst.so.4.1.6
 # 0.6.58    libpst.so.4     libpst.so.4.1.7
 # 0.6.59    libpst.so.4     libpst.so.4.1.8
+# 0.6.60    libpst.so.4     libpst.so.4.1.9
 
 
 
@@ -147,7 +150,6 @@
 AC_PROG_LN_S
 AC_PROG_LIBTOOL
 AC_PROG_MAKE_SET
-AC_PROG_RANLIB
 
 
 # make sure we get large file support
--- a/libpst.spec.in	Fri May 17 14:32:26 2013 -0700
+++ b/libpst.spec.in	Wed Jun 12 19:45:44 2013 -0700
@@ -154,6 +154,12 @@
 
 
 %changelog
+* xxx xxx xx 2013 Carl Byington <carl@five-ten-sg.com> 0.6.60-1
+- patch from Dominique Leuenberger to add AC_USE_SYSTEM_EXTENSIONS
+
+* Tue Jun 11 2013 Remi Collet <rcollet@redhat.com> - 0.6.59-2
+- rebuild for new GD 2.1.0
+
 * Fri May 17 2013 Carl Byington <carl@five-ten-sg.com> 0.6.59-1
 - add autoconf checking for libgsf
 
--- a/regression/regression-tests.bash	Fri May 17 14:32:26 2013 -0700
+++ b/regression/regression-tests.bash	Wed Jun 12 19:45:44 2013 -0700
@@ -67,8 +67,8 @@
             ## normal recursive dump
             char='us-ascii'
             #char='BIG-5'
-            echo $val ../src/readpst -C $char -j 0 -r -cv -o output$n -d $ba.log $fn
-                 $val ../src/readpst -C $char -j 0 -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
+            echo $val ../src/readpst -a '.xls,.doc' -C $char -j 0 -r -cv -o output$n -d $ba.log $fn
+                 $val ../src/readpst -a '.xls,.doc' -C $char -j 0 -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
 
             ## separate mode with filename extensions and .msg files
             #echo $val ../src/readpst $jobs     -r -m -D -cv -o output$n -d $ba.log $fn
@@ -128,13 +128,13 @@
 #$func  21 rendgen3.pst          # mime signed email
 #$func  22 rendgen4.pst          # appointment test cases
 #$func  23 rendgen5.pst          # appointment test cases
-$func  24 paul.sheer.pst        # embedded rfc822 attachment
-$func  25 jerry.pst             # non ascii subject lines
+#$func  24 paul.sheer.pst        # embedded rfc822 attachment
+#$func  25 jerry.pst             # non ascii subject lines
 #$func  26 phill.bertolus.pst    # possible segfault in forked process, cannot reproduce
-$func  27 kaiser.pst            # appointments with other character sets
-$func  28 pstsample.pst         # character set issue
+#$func  27 kaiser.pst            # appointments with other character sets
+#$func  28 pstsample.pst         # character set issue
 #$func  29 pstsample2.pst        # embedded image in rtf data
-$func  30 pstsample3.pst        # exports of rtf and html
+#$func  30 pstsample3.pst        # exports of rtf and html
 
 [ -n "$val" ] && grep 'lost:' *err | grep -v 'lost: 0 '
 
--- a/src/libpst.c	Fri May 17 14:32:26 2013 -0700
+++ b/src/libpst.c	Wed Jun 12 19:45:44 2013 -0700
@@ -284,7 +284,6 @@
 static int              pst_process(uint64_t block_id, pst_mapi_object *list, pst_item *item, pst_item_attach *attach);
 static size_t           pst_read_block_size(pst_file *pf, int64_t offset, size_t size, char **buf);
 static int              pst_decrypt(uint64_t i_id, char *buf, size_t size, unsigned char type);
-static int              pst_stricmp(char *a, char *b);
 static int              pst_strincmp(char *a, char *b, size_t x);
 static char*            pst_wide_to_single(char *wt, size_t size);
 
@@ -3391,6 +3390,10 @@
             SAFE_FREE_STR(item->email->report_text);
             SAFE_FREE(item->email->report_time);
             SAFE_FREE_STR(item->email->supplementary_info);
+            SAFE_FREE_STR(item->email->outlook_received_name1);
+            SAFE_FREE_STR(item->email->outlook_sender_name2);
+            SAFE_FREE_STR(item->email->outlook_normalized_subject);
+            SAFE_FREE_STR(item->email->outlook_search_key);
             free(item->email);
         }
         if (item->folder) {
@@ -4155,9 +4158,10 @@
 }
 
 
-static int pst_stricmp(char *a, char *b) {
-    // compare strings case-insensitive.
-    // returns -1 if a < b, 0 if a==b, 1 if a > b
+/** compare strings case-insensitive.
+ *  @return  -1 if a < b, 0 if a==b, 1 if a > b
+ */
+int pst_stricmp(char *a, char *b) {
     while(*a != '\0' && *b != '\0' && toupper(*a)==toupper(*b)) {
         a++; b++;
     }
--- a/src/libpst.h	Fri May 17 14:32:26 2013 -0700
+++ b/src/libpst.h	Wed Jun 12 19:45:44 2013 -0700
@@ -891,7 +891,7 @@
     /** original file name when the file was opened */
     char*   fname;
     /** default character set for items without one */
-    char*   charset;
+    const char*   charset;
     /** the head and tail of the linked list of index structures */
     pst_index_ll *i_head, *i_tail;
     /** the head and tail of the top level of the descriptor tree */
@@ -1041,6 +1041,12 @@
 size_t          pst_ff_getIDblock_dec(pst_file *pf, uint64_t i_id, char **buf);
 
 
+/** compare strings case-insensitive.
+ *  @return  -1 if a < b, 0 if a==b, 1 if a > b
+ */
+int pst_stricmp(char *a, char *b);
+
+
 /** fwrite with checking for null pointer.
  * @param ptr pointer to the buffer
  * @param size  size of each item
--- a/src/readpst.c	Fri May 17 14:32:26 2013 -0700
+++ b/src/readpst.c	Wed Jun 12 19:45:44 2013 -0700
@@ -44,6 +44,7 @@
 void      close_separate_file(struct file_ll *f);
 char*     my_stristr(char *haystack, char *needle);
 void      check_filename(char *fname);
+int       acceptable_ext(pst_item_attach* attach);
 void      write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst);
 void      write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, int save_rtf, char** extra_mime_headers);
 void      write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst);
@@ -136,6 +137,7 @@
 pst_file    pstfile;
 regex_t     meta_charset_pattern;
 char*       default_charset = NULL;
+char*       acceptable_extensions = NULL;
 
 int         number_processors = 1;  // number of cpus we have
 int         max_children  = 0;      // based on number of cpus and command line args
@@ -450,8 +452,21 @@
     }
 
     // command-line option handling
-    while ((c = getopt(argc, argv, "bC:c:Dd:emhj:kMo:qrSt:uVw"))!= -1) {
+    while ((c = getopt(argc, argv, "a:bC:c:Dd:emhj:kMo:qrSt:uVw"))!= -1) {
         switch (c) {
+        case 'a':
+            if (optarg) {
+                int n = strlen(optarg);
+                acceptable_extensions = (char*)pst_malloc(n+2);
+                strcpy(acceptable_extensions, optarg);
+                acceptable_extensions[n+1] = '\0';  // double null terminates array of non-empty null terminated strings.
+                char *p = acceptable_extensions;
+                while (*p) {
+                    if (*p == ',') *p = '\0';
+                    p++;
+                }
+            }
+            break;
         case 'b':
             save_rtf_body = 0;
             break;
@@ -728,6 +743,7 @@
     printf("\t-D\t- Include deleted items in output\n");
     printf("\t-M\t- Write emails in the MH (rfc822) format\n");
     printf("\t-S\t- Separate. Write emails in the separate format\n");
+    printf("\t-a <attachment-extension-list>\t- Discard any attachment without an extension on the list\n");
     printf("\t-b\t- Don't save RTF-Body attachments\n");
     printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n");
     printf("\t-d <filename> \t- Debug to file.\n");
@@ -1014,6 +1030,37 @@
 }
 
 
+/**
+ * check if the file name extension is acceptable. If not, the attachment
+ * will be discarded
+ * @param attach  pst attachment object
+ * @return        true if the attachment filename contains an extension that we want.
+ */
+int  acceptable_ext(pst_item_attach* attach)
+{
+    if (!acceptable_extensions || *acceptable_extensions == '\0') return 1;     // acceptable list missing or empty
+    char *attach_filename = (attach->filename2.str) ? attach->filename2.str
+                                                    : attach->filename1.str;
+    if (!attach_filename) return 1; // attachment with no name is always acceptable
+    char *e = strrchr(attach_filename, '.');
+    if (!e) return 1;               // attachment with no extension is always acceptable.
+    DEBUG_ENT("acceptable_ext");
+    DEBUG_INFO(("attachment extension %s\n", e));
+    int rc = 0;
+    char *a = acceptable_extensions;
+    while (*a) {
+        if (pst_stricmp(a, e) == 0) {
+            rc = 1;
+            break;
+        }
+        a += strlen(a) + 1;
+    }
+    DEBUG_INFO(("attachment acceptable returns %d\n", rc));
+    DEBUG_RET();
+    return rc;
+}
+
+
 void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst)
 {
     FILE *fp = NULL;
@@ -1755,10 +1802,12 @@
                 write_embedded_message(f_output, attach, boundary, pst, save_rtf, extra_mime_headers);
             }
             else if (attach->data.data || attach->i_id) {
-                if (mode == MODE_SEPARATE && !mode_MH)
-                    write_separate_attachment(f_name, attach, ++attach_num, pst);
-                else
-                    write_inline_attachment(f_output, attach, boundary, pst);
+                if (acceptable_ext(attach)) {
+                    if (mode == MODE_SEPARATE && !mode_MH)
+                        write_separate_attachment(f_name, attach, ++attach_num, pst);
+                    else
+                        write_inline_attachment(f_output, attach, boundary, pst);
+                }
             }
         }
     }
--- a/xml/libpst.in	Fri May 17 14:32:26 2013 -0700
+++ b/xml/libpst.in	Wed Jun 12 19:45:44 2013 -0700
@@ -35,7 +35,7 @@
 
     <refentry id="readpst.1">
         <refentryinfo>
-            <date>2011-05-27</date>
+            <date>2013-06-12</date>
         </refentryinfo>
 
         <refmeta>
@@ -58,6 +58,7 @@
                 <arg><option>-M</option></arg>
                 <arg><option>-S</option></arg>
                 <arg><option>-V</option></arg>
+                <arg><option>-a <replaceable class="parameter">attachment-extension-list</replaceable></option></arg>
                 <arg><option>-b</option></arg>
                 <arg><option>-c <replaceable class="parameter">format</replaceable></option></arg>
                 <arg><option>-d <replaceable class="parameter">debug-file</replaceable></option></arg>
@@ -128,6 +129,14 @@
                     </para></listitem>
                 </varlistentry>
                 <varlistentry>
+                    <term>-a <replaceable class="parameter">attachment-extension-list</replaceable></term>
+                    <listitem><para>
+                        Set the list of acceptable attachment extensions. Any attachment that
+                        does not have an extension on this list will be discarded. All attachments
+                        are acceptable if the list is empty, or this option is not specified.
+                    </para></listitem>
+                </varlistentry>
+                <varlistentry>
                     <term>-b</term>
                     <listitem><para>
                         Do not save the attachments for the RTF format of the email body.
@@ -285,7 +294,7 @@
 
     <refentry id="lspst.1">
         <refentryinfo>
-            <date>2011-05-27</date>
+            <date>2013-06-12</date>
         </refentryinfo>
 
         <refmeta>
@@ -380,7 +389,7 @@
 
     <refentry id="pst2ldif.1">
         <refentryinfo>
-            <date>2011-05-27</date>
+            <date>2013-06-12</date>
         </refentryinfo>
 
         <refmeta>
@@ -548,7 +557,7 @@
 
     <refentry id="pst2dii.1">
         <refentryinfo>
-            <date>2011-05-27</date>
+            <date>2013-06-12</date>
         </refentryinfo>
 
         <refmeta>
@@ -681,7 +690,7 @@
 
     <refentry id="pst.5">
         <refentryinfo>
-            <date>2011-05-27</date>
+            <date>2013-06-12</date>
         </refentryinfo>
 
         <refmeta>