# HG changeset patch # User carl # Date 1140477294 28800 # Node ID f4643b609bdf13c790423c070131a8b40d0753c4 # Parent f86fa9e378a2d78b6cbd41c1bbc214c6d9115136 switch to automake/autoconf diff -r f86fa9e378a2 -r f4643b609bdf ChangeLog --- a/ChangeLog Sun Feb 19 21:50:40 2006 -0800 +++ b/ChangeLog Mon Feb 20 15:14:54 2006 -0800 @@ -1,10 +1,14 @@ -LibPST 0.5.3 (2006-02-19) +LibPST 0.5.3 (2006-02-20) =============================== - * switch to gnu autoconf/automake + * switch to gnu autoconf/automake. This breaks the MS VC++ projects + since the source code is now in the src subdirectory. + * documentation switched to xml, building man pages and html from the master xml copy. + * include rpm .spec file for building src and binary rpms. + LibPST 0.5.2 (2006-02-18) =============================== diff -r f86fa9e378a2 -r f4643b609bdf Makefile.am --- a/Makefile.am Sun Feb 19 21:50:40 2006 -0800 +++ b/Makefile.am Mon Feb 20 15:14:54 2006 -0800 @@ -1,3 +1,3 @@ SUBDIRS = src man html info CLEANFILES = xml/libpst xml/Makefile -EXTRA_DIST = $(wildcard xml/M*) $(wildcard xml/h*) $(wildcard xml/lib*) +EXTRA_DIST = libpst.spec $(wildcard xml/M*) $(wildcard xml/h*) $(wildcard xml/lib*) diff -r f86fa9e378a2 -r f4643b609bdf libpst.spec.in --- a/libpst.spec.in Sun Feb 19 21:50:40 2006 -0800 +++ b/libpst.spec.in Mon Feb 20 15:14:54 2006 -0800 @@ -44,11 +44,8 @@ datadir=$RPM_BUILD_ROOT%{_datadir} \ includedir=$RPM_BUILD_ROOT%{_includedir} \ sysconfdir=$RPM_BUILD_ROOT%{_sysconfdir} install -mkdir -p $RPM_BUILD_ROOT/etc/rc.d/init.d mkdir -p $RPM_BUILD_ROOT%{_datadir}/doc/@PACKAGE@-@VERSION@ -mv -f $RPM_BUILD_ROOT%{_sysconfdir}/dnsbl/dnsbl $RPM_BUILD_ROOT/etc/rc.d/init.d -mv AUTHORS COPYING ChangeLog NEWS README $RPM_BUILD_ROOT%{_datadir}/doc/@PACKAGE@-@VERSION@ -mkdir -p %{buildroot}/var/run/@PACKAGE@ +mv AUTHORS COPYING ChangeLog NEWS README $RPM_BUILD_ROOT%{_datadir}/doc/@PACKAGE@-@VERSION@ %pre @@ -63,15 +60,13 @@ %files %defattr(-,root,root) -%{_sbindir}/* +%{_bindir}/* %doc %{_mandir}/* %docdir %{_datadir}/doc/@PACKAGE@-@VERSION@ %{_datadir}/doc/@PACKAGE@-@VERSION@ -%config(noreplace) %{_sysconfdir}/@PACKAGE@ -%config(noreplace) %{_sysconfdir}/@PACKAGE@/*.conf -/etc/rc.d/init.d/@PACKAGE@ -%dir %attr(0750,@PACKAGE@,root) /var/run/@PACKAGE@ %changelog * Sun Feb 19 2006 Carl Byington 1.0 +- initial spec file using autoconf and http://www.fedora.us/docs/rpm-packaging-guidelines.html + diff -r f86fa9e378a2 -r f4643b609bdf man/Makefile.am --- a/man/Makefile.am Sun Feb 19 21:50:40 2006 -0800 +++ b/man/Makefile.am Mon Feb 20 15:14:54 2006 -0800 @@ -1,2 +1,2 @@ -man_MANS = readpst.1 readpstlog.1 +man_MANS = readpst.1 readpstlog.1 pst2ldif.1 outlook.pst.5 EXTRA_DIST = $(man_MANS) diff -r f86fa9e378a2 -r f4643b609bdf package --- a/package Sun Feb 19 21:50:40 2006 -0800 +++ b/package Mon Feb 20 15:14:54 2006 -0800 @@ -1,8 +1,5 @@ #!/bin/bash -pst=outlook.pst -cp /home/ldap/$pst . - T=`grep AM_INIT_AUTOMAKE configure.in | cut -d'(' -f2` NAME=`echo $T | cut -d, -f1` VER=`echo $T | cut -d, -f2 | cut -d')' -f1` @@ -10,25 +7,68 @@ web=/home/httpd/html/510sg/$NAME distlog=/tmp/distcheck -echo $web $BALL - chown --recursive root:root * make -f *cvs ./configure >/dev/null (cd xml; make; make distclean) -chown --recursive carl:carl * +cp -a html/*html $web +## make +## pst=/home/ldap/outlook.pst +## rm -f pst2ldif.log my.log +## src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $pst >ams.ldif +## src/readpstlog pst2ldif.log | less >my.log +## hexdump -C -v $pst >pst.dump +## chown --recursive carl:carl * +## exit make distcheck >$distlog 2>&1 -exit +if [ $? -eq 0 ]; then + if [ -f $BALL ]; then + # expand locally to see the tarball + rm -rf junk + mkdir junk + cd junk + tar xfz ../$BALL + cd $NAME-$VER + ./configure >/dev/null + make >/dev/null + make install >/dev/null + cd .. + cd .. + + # build rpm on target + target=host62 + scp $BALL $target:/tmp + ssh $target "cd /tmp; rpmbuild -ta $BALL" -if [ -f pst2ldif ]; then - rm -f pst2ldif.log my.log - ./pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $pst >ams.ldif - ./readpstlog pst2ldif.log | less >my.log - hexdump -C $pst >pst.dump + # add packages to the web site + wp=$web/packages + wp4=$wp/centos4 + mkdir -p $wp4 + rp=/usr/src/redhat + mv -f $BALL $wp + scp $target:$rp/SRPMS/$NAME-$VER*rpm $wp + scp $target:$rp/RPMS/i386/$NAME-$VER*rpm $wp4 + (cd $web; chown --recursive web:web .; ls -alR) + rpm -ql -p $wp4/$NAME-$VER*6.rpm + fi +else + tail -10 $distlog +fi +chown --recursive carl:carl * +args="-z -e ssh --times --perms --links --relative --owner --group --numeric-ids --delete" +echo rcync $args $web ns1:/ - grep '^dn:' ams.ldif | sort >aaa - grep '^dn:' /home/ldap/ams.ldif | sort >bbb +if [ -f /usr/local/bin/pst2ldif ]; then + pushd /home/ldap + pst=outlook.pst + #rm -f pst2ldif.log my.log + /usr/local/bin/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $pst >ams.ldif2 + #./readpstlog pst2ldif.log | less >my.log + #hexdump -C $pst >pst.dump + + grep '^dn:' ams.ldif2 | sort >aaa + grep '^dn:' ams.ldif | sort >bbb echo 'differences from nightly ldap' diff aaa bbb echo 'end differences' diff -r f86fa9e378a2 -r f4643b609bdf src/libpst.c --- a/src/libpst.c Sun Feb 19 21:50:40 2006 -0800 +++ b/src/libpst.c Mon Feb 20 15:14:54 2006 -0800 @@ -472,7 +472,7 @@ #define BLOCK_SIZE 516 // index blocks -#define DESC_BLOCK_SIZE 520 // descriptor blocks +#define DESC_BLOCK_SIZE 516 // descriptor blocks was 520 but bogus #define ITEM_COUNT_OFFSET 0x1f0 // count byte #define LEVEL_INDICATOR_OFFSET 0x1f3 // node or leaf #define BACKLINK_OFFSET 0x1f8 // backlink u1 value @@ -1310,43 +1310,40 @@ } na_ptr->items[x]->type = 0; // checked later before it is set /* Reference Types - - 2 - 0x0002 - Signed 16bit value - 3 - 0x0003 - Signed 32bit value - 4 - 0x0004 - 4-byte floating point - 5 - 0x0005 - Floating point double - 6 - 0x0006 - Signed 64-bit int - 7 - 0x0007 - Application Time - 10 - 0x000A - 32-bit error value - 11 - 0x000B - Boolean (non-zero = true) - 13 - 0x000D - Embedded Object - 20 - 0x0014 - 8-byte signed integer (64-bit) - 30 - 0x001E - Null terminated String - 31 - 0x001F - Unicode string - 64 - 0x0040 - Systime - Filetime structure - 72 - 0x0048 - OLE Guid - 258 - 0x0102 - Binary data - - - 0x1003 - Array of 32bit values - - 0x1014 - Array of 64bit values - - 0x101E - Array of Strings - - 0x1102 - Array of Binary data + 0x0002 - Signed 16bit value + 0x0003 - Signed 32bit value + 0x0004 - 4-byte floating point + 0x0005 - Floating point double + 0x0006 - Signed 64-bit int + 0x0007 - Application Time + 0x000A - 32-bit error value + 0x000B - Boolean (non-zero = true) + 0x000D - Embedded Object + 0x0014 - 8-byte signed integer (64-bit) + 0x001E - Null terminated String + 0x001F - Unicode string + 0x0040 - Systime - Filetime structure + 0x0048 - OLE Guid + 0x0102 - Binary data + 0x1003 - Array of 32bit values + 0x1014 - Array of 64bit values + 0x101E - Array of Strings + 0x1102 - Array of Binary data */ - if (table_rec.ref_type == 0x0003 || table_rec.ref_type == 0x000b - || table_rec.ref_type == 0x0002) { //contains data + if (table_rec.ref_type == 0x0002 || table_rec.ref_type == 0x0003 || table_rec.ref_type == 0x000b) { + //contains data na_ptr->items[x]->data = xmalloc(sizeof(int32_t)); memcpy(na_ptr->items[x]->data, &(table_rec.value), sizeof(int32_t)); - na_ptr->items[x]->size = sizeof(int32_t); na_ptr->items[x]->type = table_rec.ref_type; } else if (table_rec.ref_type == 0x0005 || table_rec.ref_type == 0x000D - || table_rec.ref_type == 0x1003 || table_rec.ref_type == 0x0014 - || table_rec.ref_type == 0x001E || table_rec.ref_type == 0x0102 - || table_rec.ref_type == 0x0040 || table_rec.ref_type == 0x101E - || table_rec.ref_type == 0x0048 || table_rec.ref_type == 0x1102 - || table_rec.ref_type == 0x1014) { + || table_rec.ref_type == 0x1003 || table_rec.ref_type == 0x0014 + || table_rec.ref_type == 0x001E || table_rec.ref_type == 0x0102 + || table_rec.ref_type == 0x0040 || table_rec.ref_type == 0x101E + || table_rec.ref_type == 0x0048 || table_rec.ref_type == 0x1102 + || table_rec.ref_type == 0x1014) { //contains index_ref to data LE32_CPU(table_rec.value); if ((table_rec.value & 0x0000000F) == 0xF) { @@ -1399,12 +1396,9 @@ } } else { DEBUG_EMAIL(("Ignoring 0 value in offset\n")); - if (na_ptr->items[x]->data) - free (na_ptr->items[x]->data); + if (na_ptr->items[x]->data) free (na_ptr->items[x]->data); na_ptr->items[x]->data = NULL; - free(na_ptr->items[x]); - na_ptr->count_item--; // remove this item from the destination list continue; } @@ -2265,6 +2259,14 @@ (t==5?"Embedded Message":"OLE")))))),t)); //INC_CHECK_X(); break; + case 0x3707: // PR_ATTACH_LONG_FILENAME Attachment filename (long?) + DEBUG_EMAIL(("Attachment Filename long - ")); + NULL_CHECK(attach); + MOVE_NEXT(attach); + LIST_COPY(attach->filename2, (char*)); + DEBUG_EMAIL(("%s\n", attach->filename2)); + //INC_CHECK_X(); + break; case 0x370B: // PR_RENDERING_POSITION // position in characters that the attachment appears in the plain text body DEBUG_EMAIL(("Attachment Position - ")); @@ -2275,14 +2277,6 @@ DEBUG_EMAIL(("%i [%#x]\n", attach->position)); //INC_CHECK_X(); break; - case 0x3707: // PR_ATTACH_LONG_FILENAME Attachment filename (long?) - DEBUG_EMAIL(("Attachment Filename long - ")); - NULL_CHECK(attach); - MOVE_NEXT(attach); - LIST_COPY(attach->filename2, (char*)); - DEBUG_EMAIL(("%s\n", attach->filename2)); - //INC_CHECK_X(); - break; case 0x370E: // PR_ATTACH_MIME_TAG Mime type of encoding DEBUG_EMAIL(("Attachment mime encoding - ")); NULL_CHECK(attach); diff -r f86fa9e378a2 -r f4643b609bdf xml/libpst.in --- a/xml/libpst.in Sun Feb 19 21:50:40 2006 -0800 +++ b/xml/libpst.in Mon Feb 20 15:14:54 2006 -0800 @@ -7,12 +7,12 @@ The most recent documentation is available at http://www.five-ten-sg.com/@PACKAGE@/ + - - 2006-02-19 + 2006-02-20 @@ -40,13 +40,12 @@ - files... + files Description - This manual page briefly documents the readpst command. readpst is a program that can read an Outlook PST (Personal Folders) file and convert it into an mbox file, a format suitable for KMail, a recursive mbox structure, or separate emails. @@ -72,7 +71,7 @@ -h - Show summary of options. + Show summary of options. Subsequent options are then ignored. @@ -119,7 +118,7 @@ -V - Show version of program. Subsequent options are then ignored. + Show program version. Subsequent options are then ignored. @@ -181,7 +180,7 @@ - 2006-02-19 + 2006-02-20 @@ -192,26 +191,25 @@ readpstlog - convert a readpst logfile to text format + convert a readpst logfile to text format Synopsis - readpst + readpstlog - logfile + logfile Description - This manual page briefly documents the readpstlog command. readpstlog - is a program that reads the binary logfile generated - by readpst, and outputs it in a more desirable format (i.e. text). + is a program that converts the binary logfile generated + by readpst to a more desirable text format. @@ -244,8 +242,8 @@ Message Types - - readpstlog understands the following types of log messages: + readpstlog understands the following types of log + messages: @@ -346,4 +344,817 @@ + + + + + 2006-02-20 + + + + pst2ldif + 1 + pst2ldif @VERSION@ + + + + pst2ldif + extract contacts from a MS Outlook .pst file in .ldif format + + + + Synopsis + + pst2ldif + + + + + pstfilename + + + + + Options + + + -h + + Show summary of options. Subsequent options are then ignored. + + + + -V include-types + + Show program version. Subsequent options are then ignored. + + + + -b ldap-base + + Sets the ldap base value used in the dn records. You probably want to + use something like "o=organization, c=US". + + + + -c class + + Sets the objectClass values for the contact items. This class needs to be + defined in the schema used by your LDAP server, and at a minimum it must + contain the ldap attributes given below. + + + + + + + Description + pst2ldif + reads the contact information from a MS Outlook .pst file + and produces a .ldif file that may be used to import those contacts + into an LDAP database. The following ldap attributes are generated: + + cn + givenName + sn + personalTitle + company + mail + postalAddress + l + st + postalCode + c + homePhone + telephoneNumber + facsimileTelephoneNumber + mobile + description + + + + + + Copyright + + Copyright (C) 2006 by 510 Software Group <carl@five-ten-sg.com> + + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + + You should have received a copy of the GNU General Public License along + with this program; see the file COPYING. If not, please write to the + Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + + + + CVS Version + + $Id$ + + + + + + + + 2006-02-20 + + + + outlook.pst + 5 + + + + outlook.pst + format of MS Outlook .pst file + + + + Synopsis + + outlook.pst + + + + + Overview + + Each item in a .pst file is identified by two id values ID1 and ID2. + There are two separate b-trees indexed by these ID1 and ID2 values. + + + + + File Header + + The file header is located at offset 0 in the .pst file. + + + + We only support index type 0x0E and encryption type 0x01. + + + offset-index-1 is the file offset of the root of the + index1 b-tree, which contains (ID1, offset, size, unknown) tuples + for each item in the file. back-pointer-1 is the value that should + appear in the parent pointer of that root node. + + + offset-index-2 is the file offset of the root of the + index2 b-tree, which contains (ID2, DESC-ID1, LIST-ID1, PARENT-ID2) + tuples for each item in the file. back-pointer-2 is the value that should + appear in the parent pointer of that root node. + + + + + Index 1 Node + + The index1 b-tree nodes are 516 byte blocks with the following format. + + + + The item-count specifies the number of 12 byte records that + are active. The node-level is non-zero for this style of nodes. + The leaf nodes have a different format. The back-pointer must + match the back-pointer from the triple that pointed to this node. + + + Each item in this node is a triple of (ID, back-pointer, offset) + where the offset points to the next deeper node in the tree, the + back-pointer value must match the back-pointer in that deeper node, + and ID is the lowest ID value in the subtree. + + + + + Index 1 Leaf Node + + The index1 b-tree leaf nodes are 516 byte blocks with the following format. + + + + The item-count specifies the number of 12 byte records that + are active. The node-level is zero for these leaf nodes. + The back-pointer must match the back-pointer from the triple + that pointed to this node. + + + Each item in this node is a tuple of (ID1, offset, size, unknown) + + + + + Index 2 Node + + The index2 b-tree nodes are 516 byte blocks with the following format. + + + + The item-count specifies the number of 12 byte records that + are active. The node-level is non-zero for this style of nodes. + The leaf nodes have a different format. The back-pointer must + match the back-pointer from the triple that pointed to this node. + + + Each item in this node is a triple of (ID2, back-pointer, offset) + where the offset points to the next deeper node in the tree, the + back-pointer value must match the back-pointer in that deeper node, + and ID2 is the lowest ID2 value in the subtree. + + + + + Index 2 Leaf Node + + The index2 b-tree leaf nodes are 516 byte blocks with the following format. + + + + The item-count specifies the number of 16 byte records that + are active. The node-level is zero for these leaf nodes. + The back-pointer must match the back-pointer from the triple + that pointed to this node. + + + Each item in this node is a tuple of (ID2, DESC-ID1, LIST-ID1, PARENT-ID2) + + + + + Associated List Item + + Contains associations between id1 and id2 for the items controlled by the record. + In the above leaf node, we have a tuple of (0x61, 0x02a82c, 0x02a836, 0) + 0x02a836 is the ID1 of the associated list, and we can lookup that ID1 value + in the index1 b-tree to find the (offset,size) of the data in the .pst file. + + + + + + Associated Descriptor Item + + Contains information about the item, which may be email, contact, or other outlook types. + In the above leaf node, we have a tuple of (0x21, 0x00e638, 0, 0) + 0x00e638 is the ID1 of the associated descriptor, and we can lookup that ID1 value + in the index1 b-tree to find the (offset,size) of the data in the .pst file. + + + + Note the index-offset of 0x013c - starting at that position in the + descriptor block, we have an array of two byte integers. The first + integer (0x000b) is a count of the number of overlapping pairs + following the count. The first pair is (0, 0xc), the next pair is (0xc, 0x14) + and the last (11th) pair is (0x10b, 0x123). These pairs are (start,end+1) + offsets of items in this block. So we have count+1 integers following + the count value. + + + Note the offset of 0x0020, which needs to be right shifted by 4 bits + to become 0x0002, which is then a byte offset to be added to the above + index-offset plus two (to skip the count), so it points to the (0xc, 0x14) + pair. Finally, we have the offset and size of the "b5" block located at offset 0xc + with a size of 8 bytes in this descriptor block. The "b5" block has the + following format: + + + + Note the "b5" offset of 0x0040, which needs to be right shifted by 4 bits + to become 0x0004, which is then a byte offset to be added to the above + index-offset plus two (to skip the count), so it points to the (0x14, 0x7c) + pair. We now have the offset 0x14 of the descriptor array, composed of 8 byte + entries. Each descriptor entry has the following format: + + + + For some reference types (2, 3, 0xb) the value is used directly. Otherwise, + the value is generally a non-zero offset, to be right shifted by 4 bits and used to fetch + a pair from the index table to find the offset and size of the item in this + descriptor block. However, if (value AND 0xf) == 0xf, then the value is an ID2 index. + + + The following reference types are known, but not all of these + are implemented in the code yet. + + + + The following item types are known, but not all of these + are implemented in the code yet. + Note: it appears that some types can have a IPOS value or a ID2 value + depending on the size of the field in question. It is safer to check + every field than for me to say what the "usually" contain. Absolute + values though, are generally going to be constant. + + + + +