libpst

changeset 328:c507af52515a

add readpst -a option
author Carl Byington <carl@five-ten-sg.com>
date Wed, 12 Jun 2013 19:45:44 -0700
parents e1b9f9aa5074
children 88230744c895
files AUTHORS ChangeLog NEWS configure.in libpst.spec.in regression/regression-tests.bash src/libpst.c src/libpst.h src/readpst.c xml/libpst.in
diffstat 10 files changed, 109 insertions(+), 26 deletions(-) [+]
line diff
     1.1 --- a/AUTHORS	Fri May 17 14:32:26 2013 -0700
     1.2 +++ b/AUTHORS	Wed Jun 12 19:45:44 2013 -0700
     1.3 @@ -35,6 +35,7 @@
     1.4      Kenneth Berland <ken@hero.com>
     1.5      Leo 'costela' Antunes <costela@debian.org>
     1.6      Svante Signell <svante.signell@telia.com>
     1.7 +    Dominique Leuenberger a.k.a. Dimstar <dimstar@opensuse.org>
     1.8  
     1.9  Testing team:
    1.10      Mac OSX - Michael Watson <mike@mikeandgayle.com>
     2.1 --- a/ChangeLog	Fri May 17 14:32:26 2013 -0700
     2.2 +++ b/ChangeLog	Wed Jun 12 19:45:44 2013 -0700
     2.3 @@ -1,3 +1,8 @@
     2.4 +LibPST 0.6.60 (2013-06-12)
     2.5 +===============================
     2.6 +    * patch from Dominique Leuenberger to add AC_USE_SYSTEM_EXTENSIONS
     2.7 +    * add readpst -a option for attachment stripping
     2.8 +
     2.9  LibPST 0.6.59 (2013-05-17)
    2.10  ===============================
    2.11      * add autoconf checking for libgsf
     3.1 --- a/NEWS	Fri May 17 14:32:26 2013 -0700
     3.2 +++ b/NEWS	Wed Jun 12 19:45:44 2013 -0700
     3.3 @@ -1,3 +1,4 @@
     3.4 +0.6.60  2013-06-12 add AC_USE_SYSTEM_EXTENSIONS, add readpst -a option for attachment stripping
     3.5  0.6.59  2013-05-17 add autoconf checking for libgsf
     3.6  0.6.58  2012-12-28 fix From quoting on embedded rfc/822 messages
     3.7  0.6.57  2012-12-27 remove useless dependencies
     4.1 --- a/configure.in	Fri May 17 14:32:26 2013 -0700
     4.2 +++ b/configure.in	Wed Jun 12 19:45:44 2013 -0700
     4.3 @@ -1,9 +1,11 @@
     4.4 -AC_PREREQ(2.59)
     4.5 -AC_INIT(libpst,0.6.59,carl@five-ten-sg.com)
     4.6 +AC_PREREQ(2.60)
     4.7 +AC_INIT(libpst,0.6.60,carl@five-ten-sg.com)
     4.8  AC_CONFIG_SRCDIR([src/libpst.c])
     4.9  AC_CONFIG_HEADER([config.h])
    4.10 +AC_CONFIG_MACRO_DIR([m4])
    4.11  AM_INIT_AUTOMAKE
    4.12  AC_CANONICAL_HOST
    4.13 +AC_USE_SYSTEM_EXTENSIONS
    4.14  
    4.15  #
    4.16  #  1. Remember that version-info is current:revision:age, and age <= current.
    4.17 @@ -19,7 +21,7 @@
    4.18  #  6. libtool will build libpst.so.x.y.z where the SONAME is libpst.so.x
    4.19  #     and x=current-age, y=age, z=revision
    4.20  
    4.21 -libpst_version_info='5:8:1'
    4.22 +libpst_version_info='5:9:1'
    4.23  AC_SUBST(LIBPST_VERSION_INFO, [$libpst_version_info])
    4.24  libpst_so_major='4'
    4.25  AC_SUBST(LIBPST_SO_MAJOR, [$libpst_so_major])
    4.26 @@ -44,6 +46,7 @@
    4.27  # 0.6.57    libpst.so.4     libpst.so.4.1.6
    4.28  # 0.6.58    libpst.so.4     libpst.so.4.1.7
    4.29  # 0.6.59    libpst.so.4     libpst.so.4.1.8
    4.30 +# 0.6.60    libpst.so.4     libpst.so.4.1.9
    4.31  
    4.32  
    4.33  
    4.34 @@ -147,7 +150,6 @@
    4.35  AC_PROG_LN_S
    4.36  AC_PROG_LIBTOOL
    4.37  AC_PROG_MAKE_SET
    4.38 -AC_PROG_RANLIB
    4.39  
    4.40  
    4.41  # make sure we get large file support
     5.1 --- a/libpst.spec.in	Fri May 17 14:32:26 2013 -0700
     5.2 +++ b/libpst.spec.in	Wed Jun 12 19:45:44 2013 -0700
     5.3 @@ -154,6 +154,12 @@
     5.4  
     5.5  
     5.6  %changelog
     5.7 +* xxx xxx xx 2013 Carl Byington <carl@five-ten-sg.com> 0.6.60-1
     5.8 +- patch from Dominique Leuenberger to add AC_USE_SYSTEM_EXTENSIONS
     5.9 +
    5.10 +* Tue Jun 11 2013 Remi Collet <rcollet@redhat.com> - 0.6.59-2
    5.11 +- rebuild for new GD 2.1.0
    5.12 +
    5.13  * Fri May 17 2013 Carl Byington <carl@five-ten-sg.com> 0.6.59-1
    5.14  - add autoconf checking for libgsf
    5.15  
     6.1 --- a/regression/regression-tests.bash	Fri May 17 14:32:26 2013 -0700
     6.2 +++ b/regression/regression-tests.bash	Wed Jun 12 19:45:44 2013 -0700
     6.3 @@ -67,8 +67,8 @@
     6.4              ## normal recursive dump
     6.5              char='us-ascii'
     6.6              #char='BIG-5'
     6.7 -            echo $val ../src/readpst -C $char -j 0 -r -cv -o output$n -d $ba.log $fn
     6.8 -                 $val ../src/readpst -C $char -j 0 -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
     6.9 +            echo $val ../src/readpst -a '.xls,.doc' -C $char -j 0 -r -cv -o output$n -d $ba.log $fn
    6.10 +                 $val ../src/readpst -a '.xls,.doc' -C $char -j 0 -r -cv -o output$n -d $ba.log $fn >$ba.err 2>&1
    6.11  
    6.12              ## separate mode with filename extensions and .msg files
    6.13              #echo $val ../src/readpst $jobs     -r -m -D -cv -o output$n -d $ba.log $fn
    6.14 @@ -128,13 +128,13 @@
    6.15  #$func  21 rendgen3.pst          # mime signed email
    6.16  #$func  22 rendgen4.pst          # appointment test cases
    6.17  #$func  23 rendgen5.pst          # appointment test cases
    6.18 -$func  24 paul.sheer.pst        # embedded rfc822 attachment
    6.19 -$func  25 jerry.pst             # non ascii subject lines
    6.20 +#$func  24 paul.sheer.pst        # embedded rfc822 attachment
    6.21 +#$func  25 jerry.pst             # non ascii subject lines
    6.22  #$func  26 phill.bertolus.pst    # possible segfault in forked process, cannot reproduce
    6.23 -$func  27 kaiser.pst            # appointments with other character sets
    6.24 -$func  28 pstsample.pst         # character set issue
    6.25 +#$func  27 kaiser.pst            # appointments with other character sets
    6.26 +#$func  28 pstsample.pst         # character set issue
    6.27  #$func  29 pstsample2.pst        # embedded image in rtf data
    6.28 -$func  30 pstsample3.pst        # exports of rtf and html
    6.29 +#$func  30 pstsample3.pst        # exports of rtf and html
    6.30  
    6.31  [ -n "$val" ] && grep 'lost:' *err | grep -v 'lost: 0 '
    6.32  
     7.1 --- a/src/libpst.c	Fri May 17 14:32:26 2013 -0700
     7.2 +++ b/src/libpst.c	Wed Jun 12 19:45:44 2013 -0700
     7.3 @@ -284,7 +284,6 @@
     7.4  static int              pst_process(uint64_t block_id, pst_mapi_object *list, pst_item *item, pst_item_attach *attach);
     7.5  static size_t           pst_read_block_size(pst_file *pf, int64_t offset, size_t size, char **buf);
     7.6  static int              pst_decrypt(uint64_t i_id, char *buf, size_t size, unsigned char type);
     7.7 -static int              pst_stricmp(char *a, char *b);
     7.8  static int              pst_strincmp(char *a, char *b, size_t x);
     7.9  static char*            pst_wide_to_single(char *wt, size_t size);
    7.10  
    7.11 @@ -3391,6 +3390,10 @@
    7.12              SAFE_FREE_STR(item->email->report_text);
    7.13              SAFE_FREE(item->email->report_time);
    7.14              SAFE_FREE_STR(item->email->supplementary_info);
    7.15 +            SAFE_FREE_STR(item->email->outlook_received_name1);
    7.16 +            SAFE_FREE_STR(item->email->outlook_sender_name2);
    7.17 +            SAFE_FREE_STR(item->email->outlook_normalized_subject);
    7.18 +            SAFE_FREE_STR(item->email->outlook_search_key);
    7.19              free(item->email);
    7.20          }
    7.21          if (item->folder) {
    7.22 @@ -4155,9 +4158,10 @@
    7.23  }
    7.24  
    7.25  
    7.26 -static int pst_stricmp(char *a, char *b) {
    7.27 -    // compare strings case-insensitive.
    7.28 -    // returns -1 if a < b, 0 if a==b, 1 if a > b
    7.29 +/** compare strings case-insensitive.
    7.30 + *  @return  -1 if a < b, 0 if a==b, 1 if a > b
    7.31 + */
    7.32 +int pst_stricmp(char *a, char *b) {
    7.33      while(*a != '\0' && *b != '\0' && toupper(*a)==toupper(*b)) {
    7.34          a++; b++;
    7.35      }
     8.1 --- a/src/libpst.h	Fri May 17 14:32:26 2013 -0700
     8.2 +++ b/src/libpst.h	Wed Jun 12 19:45:44 2013 -0700
     8.3 @@ -891,7 +891,7 @@
     8.4      /** original file name when the file was opened */
     8.5      char*   fname;
     8.6      /** default character set for items without one */
     8.7 -    char*   charset;
     8.8 +    const char*   charset;
     8.9      /** the head and tail of the linked list of index structures */
    8.10      pst_index_ll *i_head, *i_tail;
    8.11      /** the head and tail of the top level of the descriptor tree */
    8.12 @@ -1041,6 +1041,12 @@
    8.13  size_t          pst_ff_getIDblock_dec(pst_file *pf, uint64_t i_id, char **buf);
    8.14  
    8.15  
    8.16 +/** compare strings case-insensitive.
    8.17 + *  @return  -1 if a < b, 0 if a==b, 1 if a > b
    8.18 + */
    8.19 +int pst_stricmp(char *a, char *b);
    8.20 +
    8.21 +
    8.22  /** fwrite with checking for null pointer.
    8.23   * @param ptr pointer to the buffer
    8.24   * @param size  size of each item
     9.1 --- a/src/readpst.c	Fri May 17 14:32:26 2013 -0700
     9.2 +++ b/src/readpst.c	Wed Jun 12 19:45:44 2013 -0700
     9.3 @@ -44,6 +44,7 @@
     9.4  void      close_separate_file(struct file_ll *f);
     9.5  char*     my_stristr(char *haystack, char *needle);
     9.6  void      check_filename(char *fname);
     9.7 +int       acceptable_ext(pst_item_attach* attach);
     9.8  void      write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst);
     9.9  void      write_embedded_message(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pf, int save_rtf, char** extra_mime_headers);
    9.10  void      write_inline_attachment(FILE* f_output, pst_item_attach* attach, char *boundary, pst_file* pst);
    9.11 @@ -136,6 +137,7 @@
    9.12  pst_file    pstfile;
    9.13  regex_t     meta_charset_pattern;
    9.14  char*       default_charset = NULL;
    9.15 +char*       acceptable_extensions = NULL;
    9.16  
    9.17  int         number_processors = 1;  // number of cpus we have
    9.18  int         max_children  = 0;      // based on number of cpus and command line args
    9.19 @@ -450,8 +452,21 @@
    9.20      }
    9.21  
    9.22      // command-line option handling
    9.23 -    while ((c = getopt(argc, argv, "bC:c:Dd:emhj:kMo:qrSt:uVw"))!= -1) {
    9.24 +    while ((c = getopt(argc, argv, "a:bC:c:Dd:emhj:kMo:qrSt:uVw"))!= -1) {
    9.25          switch (c) {
    9.26 +        case 'a':
    9.27 +            if (optarg) {
    9.28 +                int n = strlen(optarg);
    9.29 +                acceptable_extensions = (char*)pst_malloc(n+2);
    9.30 +                strcpy(acceptable_extensions, optarg);
    9.31 +                acceptable_extensions[n+1] = '\0';  // double null terminates array of non-empty null terminated strings.
    9.32 +                char *p = acceptable_extensions;
    9.33 +                while (*p) {
    9.34 +                    if (*p == ',') *p = '\0';
    9.35 +                    p++;
    9.36 +                }
    9.37 +            }
    9.38 +            break;
    9.39          case 'b':
    9.40              save_rtf_body = 0;
    9.41              break;
    9.42 @@ -728,6 +743,7 @@
    9.43      printf("\t-D\t- Include deleted items in output\n");
    9.44      printf("\t-M\t- Write emails in the MH (rfc822) format\n");
    9.45      printf("\t-S\t- Separate. Write emails in the separate format\n");
    9.46 +    printf("\t-a <attachment-extension-list>\t- Discard any attachment without an extension on the list\n");
    9.47      printf("\t-b\t- Don't save RTF-Body attachments\n");
    9.48      printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n");
    9.49      printf("\t-d <filename> \t- Debug to file.\n");
    9.50 @@ -1014,6 +1030,37 @@
    9.51  }
    9.52  
    9.53  
    9.54 +/**
    9.55 + * check if the file name extension is acceptable. If not, the attachment
    9.56 + * will be discarded
    9.57 + * @param attach  pst attachment object
    9.58 + * @return        true if the attachment filename contains an extension that we want.
    9.59 + */
    9.60 +int  acceptable_ext(pst_item_attach* attach)
    9.61 +{
    9.62 +    if (!acceptable_extensions || *acceptable_extensions == '\0') return 1;     // acceptable list missing or empty
    9.63 +    char *attach_filename = (attach->filename2.str) ? attach->filename2.str
    9.64 +                                                    : attach->filename1.str;
    9.65 +    if (!attach_filename) return 1; // attachment with no name is always acceptable
    9.66 +    char *e = strrchr(attach_filename, '.');
    9.67 +    if (!e) return 1;               // attachment with no extension is always acceptable.
    9.68 +    DEBUG_ENT("acceptable_ext");
    9.69 +    DEBUG_INFO(("attachment extension %s\n", e));
    9.70 +    int rc = 0;
    9.71 +    char *a = acceptable_extensions;
    9.72 +    while (*a) {
    9.73 +        if (pst_stricmp(a, e) == 0) {
    9.74 +            rc = 1;
    9.75 +            break;
    9.76 +        }
    9.77 +        a += strlen(a) + 1;
    9.78 +    }
    9.79 +    DEBUG_INFO(("attachment acceptable returns %d\n", rc));
    9.80 +    DEBUG_RET();
    9.81 +    return rc;
    9.82 +}
    9.83 +
    9.84 +
    9.85  void write_separate_attachment(char f_name[], pst_item_attach* attach, int attach_num, pst_file* pst)
    9.86  {
    9.87      FILE *fp = NULL;
    9.88 @@ -1755,10 +1802,12 @@
    9.89                  write_embedded_message(f_output, attach, boundary, pst, save_rtf, extra_mime_headers);
    9.90              }
    9.91              else if (attach->data.data || attach->i_id) {
    9.92 -                if (mode == MODE_SEPARATE && !mode_MH)
    9.93 -                    write_separate_attachment(f_name, attach, ++attach_num, pst);
    9.94 -                else
    9.95 -                    write_inline_attachment(f_output, attach, boundary, pst);
    9.96 +                if (acceptable_ext(attach)) {
    9.97 +                    if (mode == MODE_SEPARATE && !mode_MH)
    9.98 +                        write_separate_attachment(f_name, attach, ++attach_num, pst);
    9.99 +                    else
   9.100 +                        write_inline_attachment(f_output, attach, boundary, pst);
   9.101 +                }
   9.102              }
   9.103          }
   9.104      }
    10.1 --- a/xml/libpst.in	Fri May 17 14:32:26 2013 -0700
    10.2 +++ b/xml/libpst.in	Wed Jun 12 19:45:44 2013 -0700
    10.3 @@ -35,7 +35,7 @@
    10.4  
    10.5      <refentry id="readpst.1">
    10.6          <refentryinfo>
    10.7 -            <date>2011-05-27</date>
    10.8 +            <date>2013-06-12</date>
    10.9          </refentryinfo>
   10.10  
   10.11          <refmeta>
   10.12 @@ -58,6 +58,7 @@
   10.13                  <arg><option>-M</option></arg>
   10.14                  <arg><option>-S</option></arg>
   10.15                  <arg><option>-V</option></arg>
   10.16 +                <arg><option>-a <replaceable class="parameter">attachment-extension-list</replaceable></option></arg>
   10.17                  <arg><option>-b</option></arg>
   10.18                  <arg><option>-c <replaceable class="parameter">format</replaceable></option></arg>
   10.19                  <arg><option>-d <replaceable class="parameter">debug-file</replaceable></option></arg>
   10.20 @@ -128,6 +129,14 @@
   10.21                      </para></listitem>
   10.22                  </varlistentry>
   10.23                  <varlistentry>
   10.24 +                    <term>-a <replaceable class="parameter">attachment-extension-list</replaceable></term>
   10.25 +                    <listitem><para>
   10.26 +                        Set the list of acceptable attachment extensions. Any attachment that
   10.27 +                        does not have an extension on this list will be discarded. All attachments
   10.28 +                        are acceptable if the list is empty, or this option is not specified.
   10.29 +                    </para></listitem>
   10.30 +                </varlistentry>
   10.31 +                <varlistentry>
   10.32                      <term>-b</term>
   10.33                      <listitem><para>
   10.34                          Do not save the attachments for the RTF format of the email body.
   10.35 @@ -285,7 +294,7 @@
   10.36  
   10.37      <refentry id="lspst.1">
   10.38          <refentryinfo>
   10.39 -            <date>2011-05-27</date>
   10.40 +            <date>2013-06-12</date>
   10.41          </refentryinfo>
   10.42  
   10.43          <refmeta>
   10.44 @@ -380,7 +389,7 @@
   10.45  
   10.46      <refentry id="pst2ldif.1">
   10.47          <refentryinfo>
   10.48 -            <date>2011-05-27</date>
   10.49 +            <date>2013-06-12</date>
   10.50          </refentryinfo>
   10.51  
   10.52          <refmeta>
   10.53 @@ -548,7 +557,7 @@
   10.54  
   10.55      <refentry id="pst2dii.1">
   10.56          <refentryinfo>
   10.57 -            <date>2011-05-27</date>
   10.58 +            <date>2013-06-12</date>
   10.59          </refentryinfo>
   10.60  
   10.61          <refmeta>
   10.62 @@ -681,7 +690,7 @@
   10.63  
   10.64      <refentry id="pst.5">
   10.65          <refentryinfo>
   10.66 -            <date>2011-05-27</date>
   10.67 +            <date>2013-06-12</date>
   10.68          </refentryinfo>
   10.69  
   10.70          <refmeta>