# HG changeset patch
# User carl
# Date 1140477294 28800
# Node ID f4643b609bdf13c790423c070131a8b40d0753c4
# Parent f86fa9e378a2d78b6cbd41c1bbc214c6d9115136
switch to automake/autoconf
diff -r f86fa9e378a2 -r f4643b609bdf ChangeLog
--- a/ChangeLog Sun Feb 19 21:50:40 2006 -0800
+++ b/ChangeLog Mon Feb 20 15:14:54 2006 -0800
@@ -1,10 +1,14 @@
-LibPST 0.5.3 (2006-02-19)
+LibPST 0.5.3 (2006-02-20)
===============================
- * switch to gnu autoconf/automake
+ * switch to gnu autoconf/automake. This breaks the MS VC++ projects
+ since the source code is now in the src subdirectory.
+
* documentation switched to xml, building man pages and html
from the master xml copy.
+ * include rpm .spec file for building src and binary rpms.
+
LibPST 0.5.2 (2006-02-18)
===============================
diff -r f86fa9e378a2 -r f4643b609bdf Makefile.am
--- a/Makefile.am Sun Feb 19 21:50:40 2006 -0800
+++ b/Makefile.am Mon Feb 20 15:14:54 2006 -0800
@@ -1,3 +1,3 @@
SUBDIRS = src man html info
CLEANFILES = xml/libpst xml/Makefile
-EXTRA_DIST = $(wildcard xml/M*) $(wildcard xml/h*) $(wildcard xml/lib*)
+EXTRA_DIST = libpst.spec $(wildcard xml/M*) $(wildcard xml/h*) $(wildcard xml/lib*)
diff -r f86fa9e378a2 -r f4643b609bdf libpst.spec.in
--- a/libpst.spec.in Sun Feb 19 21:50:40 2006 -0800
+++ b/libpst.spec.in Mon Feb 20 15:14:54 2006 -0800
@@ -44,11 +44,8 @@
datadir=$RPM_BUILD_ROOT%{_datadir} \
includedir=$RPM_BUILD_ROOT%{_includedir} \
sysconfdir=$RPM_BUILD_ROOT%{_sysconfdir} install
-mkdir -p $RPM_BUILD_ROOT/etc/rc.d/init.d
mkdir -p $RPM_BUILD_ROOT%{_datadir}/doc/@PACKAGE@-@VERSION@
-mv -f $RPM_BUILD_ROOT%{_sysconfdir}/dnsbl/dnsbl $RPM_BUILD_ROOT/etc/rc.d/init.d
-mv AUTHORS COPYING ChangeLog NEWS README $RPM_BUILD_ROOT%{_datadir}/doc/@PACKAGE@-@VERSION@
-mkdir -p %{buildroot}/var/run/@PACKAGE@
+mv AUTHORS COPYING ChangeLog NEWS README $RPM_BUILD_ROOT%{_datadir}/doc/@PACKAGE@-@VERSION@
%pre
@@ -63,15 +60,13 @@
%files
%defattr(-,root,root)
-%{_sbindir}/*
+%{_bindir}/*
%doc %{_mandir}/*
%docdir %{_datadir}/doc/@PACKAGE@-@VERSION@
%{_datadir}/doc/@PACKAGE@-@VERSION@
-%config(noreplace) %{_sysconfdir}/@PACKAGE@
-%config(noreplace) %{_sysconfdir}/@PACKAGE@/*.conf
-/etc/rc.d/init.d/@PACKAGE@
-%dir %attr(0750,@PACKAGE@,root) /var/run/@PACKAGE@
%changelog
* Sun Feb 19 2006 Carl Byington 1.0
+- initial spec file using autoconf and http://www.fedora.us/docs/rpm-packaging-guidelines.html
+
diff -r f86fa9e378a2 -r f4643b609bdf man/Makefile.am
--- a/man/Makefile.am Sun Feb 19 21:50:40 2006 -0800
+++ b/man/Makefile.am Mon Feb 20 15:14:54 2006 -0800
@@ -1,2 +1,2 @@
-man_MANS = readpst.1 readpstlog.1
+man_MANS = readpst.1 readpstlog.1 pst2ldif.1 outlook.pst.5
EXTRA_DIST = $(man_MANS)
diff -r f86fa9e378a2 -r f4643b609bdf package
--- a/package Sun Feb 19 21:50:40 2006 -0800
+++ b/package Mon Feb 20 15:14:54 2006 -0800
@@ -1,8 +1,5 @@
#!/bin/bash
-pst=outlook.pst
-cp /home/ldap/$pst .
-
T=`grep AM_INIT_AUTOMAKE configure.in | cut -d'(' -f2`
NAME=`echo $T | cut -d, -f1`
VER=`echo $T | cut -d, -f2 | cut -d')' -f1`
@@ -10,25 +7,68 @@
web=/home/httpd/html/510sg/$NAME
distlog=/tmp/distcheck
-echo $web $BALL
-
chown --recursive root:root *
make -f *cvs
./configure >/dev/null
(cd xml; make; make distclean)
-chown --recursive carl:carl *
+cp -a html/*html $web
+## make
+## pst=/home/ldap/outlook.pst
+## rm -f pst2ldif.log my.log
+## src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $pst >ams.ldif
+## src/readpstlog pst2ldif.log | less >my.log
+## hexdump -C -v $pst >pst.dump
+## chown --recursive carl:carl *
+## exit
make distcheck >$distlog 2>&1
-exit
+if [ $? -eq 0 ]; then
+ if [ -f $BALL ]; then
+ # expand locally to see the tarball
+ rm -rf junk
+ mkdir junk
+ cd junk
+ tar xfz ../$BALL
+ cd $NAME-$VER
+ ./configure >/dev/null
+ make >/dev/null
+ make install >/dev/null
+ cd ..
+ cd ..
+
+ # build rpm on target
+ target=host62
+ scp $BALL $target:/tmp
+ ssh $target "cd /tmp; rpmbuild -ta $BALL"
-if [ -f pst2ldif ]; then
- rm -f pst2ldif.log my.log
- ./pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $pst >ams.ldif
- ./readpstlog pst2ldif.log | less >my.log
- hexdump -C $pst >pst.dump
+ # add packages to the web site
+ wp=$web/packages
+ wp4=$wp/centos4
+ mkdir -p $wp4
+ rp=/usr/src/redhat
+ mv -f $BALL $wp
+ scp $target:$rp/SRPMS/$NAME-$VER*rpm $wp
+ scp $target:$rp/RPMS/i386/$NAME-$VER*rpm $wp4
+ (cd $web; chown --recursive web:web .; ls -alR)
+ rpm -ql -p $wp4/$NAME-$VER*6.rpm
+ fi
+else
+ tail -10 $distlog
+fi
+chown --recursive carl:carl *
+args="-z -e ssh --times --perms --links --relative --owner --group --numeric-ids --delete"
+echo rcync $args $web ns1:/
- grep '^dn:' ams.ldif | sort >aaa
- grep '^dn:' /home/ldap/ams.ldif | sort >bbb
+if [ -f /usr/local/bin/pst2ldif ]; then
+ pushd /home/ldap
+ pst=outlook.pst
+ #rm -f pst2ldif.log my.log
+ /usr/local/bin/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $pst >ams.ldif2
+ #./readpstlog pst2ldif.log | less >my.log
+ #hexdump -C $pst >pst.dump
+
+ grep '^dn:' ams.ldif2 | sort >aaa
+ grep '^dn:' ams.ldif | sort >bbb
echo 'differences from nightly ldap'
diff aaa bbb
echo 'end differences'
diff -r f86fa9e378a2 -r f4643b609bdf src/libpst.c
--- a/src/libpst.c Sun Feb 19 21:50:40 2006 -0800
+++ b/src/libpst.c Mon Feb 20 15:14:54 2006 -0800
@@ -472,7 +472,7 @@
#define BLOCK_SIZE 516 // index blocks
-#define DESC_BLOCK_SIZE 520 // descriptor blocks
+#define DESC_BLOCK_SIZE 516 // descriptor blocks was 520 but bogus
#define ITEM_COUNT_OFFSET 0x1f0 // count byte
#define LEVEL_INDICATOR_OFFSET 0x1f3 // node or leaf
#define BACKLINK_OFFSET 0x1f8 // backlink u1 value
@@ -1310,43 +1310,40 @@
}
na_ptr->items[x]->type = 0; // checked later before it is set
/* Reference Types
-
- 2 - 0x0002 - Signed 16bit value
- 3 - 0x0003 - Signed 32bit value
- 4 - 0x0004 - 4-byte floating point
- 5 - 0x0005 - Floating point double
- 6 - 0x0006 - Signed 64-bit int
- 7 - 0x0007 - Application Time
- 10 - 0x000A - 32-bit error value
- 11 - 0x000B - Boolean (non-zero = true)
- 13 - 0x000D - Embedded Object
- 20 - 0x0014 - 8-byte signed integer (64-bit)
- 30 - 0x001E - Null terminated String
- 31 - 0x001F - Unicode string
- 64 - 0x0040 - Systime - Filetime structure
- 72 - 0x0048 - OLE Guid
- 258 - 0x0102 - Binary data
-
- - 0x1003 - Array of 32bit values
- - 0x1014 - Array of 64bit values
- - 0x101E - Array of Strings
- - 0x1102 - Array of Binary data
+ 0x0002 - Signed 16bit value
+ 0x0003 - Signed 32bit value
+ 0x0004 - 4-byte floating point
+ 0x0005 - Floating point double
+ 0x0006 - Signed 64-bit int
+ 0x0007 - Application Time
+ 0x000A - 32-bit error value
+ 0x000B - Boolean (non-zero = true)
+ 0x000D - Embedded Object
+ 0x0014 - 8-byte signed integer (64-bit)
+ 0x001E - Null terminated String
+ 0x001F - Unicode string
+ 0x0040 - Systime - Filetime structure
+ 0x0048 - OLE Guid
+ 0x0102 - Binary data
+ 0x1003 - Array of 32bit values
+ 0x1014 - Array of 64bit values
+ 0x101E - Array of Strings
+ 0x1102 - Array of Binary data
*/
- if (table_rec.ref_type == 0x0003 || table_rec.ref_type == 0x000b
- || table_rec.ref_type == 0x0002) { //contains data
+ if (table_rec.ref_type == 0x0002 || table_rec.ref_type == 0x0003 || table_rec.ref_type == 0x000b) {
+ //contains data
na_ptr->items[x]->data = xmalloc(sizeof(int32_t));
memcpy(na_ptr->items[x]->data, &(table_rec.value), sizeof(int32_t));
-
na_ptr->items[x]->size = sizeof(int32_t);
na_ptr->items[x]->type = table_rec.ref_type;
} else if (table_rec.ref_type == 0x0005 || table_rec.ref_type == 0x000D
- || table_rec.ref_type == 0x1003 || table_rec.ref_type == 0x0014
- || table_rec.ref_type == 0x001E || table_rec.ref_type == 0x0102
- || table_rec.ref_type == 0x0040 || table_rec.ref_type == 0x101E
- || table_rec.ref_type == 0x0048 || table_rec.ref_type == 0x1102
- || table_rec.ref_type == 0x1014) {
+ || table_rec.ref_type == 0x1003 || table_rec.ref_type == 0x0014
+ || table_rec.ref_type == 0x001E || table_rec.ref_type == 0x0102
+ || table_rec.ref_type == 0x0040 || table_rec.ref_type == 0x101E
+ || table_rec.ref_type == 0x0048 || table_rec.ref_type == 0x1102
+ || table_rec.ref_type == 0x1014) {
//contains index_ref to data
LE32_CPU(table_rec.value);
if ((table_rec.value & 0x0000000F) == 0xF) {
@@ -1399,12 +1396,9 @@
}
} else {
DEBUG_EMAIL(("Ignoring 0 value in offset\n"));
- if (na_ptr->items[x]->data)
- free (na_ptr->items[x]->data);
+ if (na_ptr->items[x]->data) free (na_ptr->items[x]->data);
na_ptr->items[x]->data = NULL;
-
free(na_ptr->items[x]);
-
na_ptr->count_item--; // remove this item from the destination list
continue;
}
@@ -2265,6 +2259,14 @@
(t==5?"Embedded Message":"OLE")))))),t));
//INC_CHECK_X();
break;
+ case 0x3707: // PR_ATTACH_LONG_FILENAME Attachment filename (long?)
+ DEBUG_EMAIL(("Attachment Filename long - "));
+ NULL_CHECK(attach);
+ MOVE_NEXT(attach);
+ LIST_COPY(attach->filename2, (char*));
+ DEBUG_EMAIL(("%s\n", attach->filename2));
+ //INC_CHECK_X();
+ break;
case 0x370B: // PR_RENDERING_POSITION
// position in characters that the attachment appears in the plain text body
DEBUG_EMAIL(("Attachment Position - "));
@@ -2275,14 +2277,6 @@
DEBUG_EMAIL(("%i [%#x]\n", attach->position));
//INC_CHECK_X();
break;
- case 0x3707: // PR_ATTACH_LONG_FILENAME Attachment filename (long?)
- DEBUG_EMAIL(("Attachment Filename long - "));
- NULL_CHECK(attach);
- MOVE_NEXT(attach);
- LIST_COPY(attach->filename2, (char*));
- DEBUG_EMAIL(("%s\n", attach->filename2));
- //INC_CHECK_X();
- break;
case 0x370E: // PR_ATTACH_MIME_TAG Mime type of encoding
DEBUG_EMAIL(("Attachment mime encoding - "));
NULL_CHECK(attach);
diff -r f86fa9e378a2 -r f4643b609bdf xml/libpst.in
--- a/xml/libpst.in Sun Feb 19 21:50:40 2006 -0800
+++ b/xml/libpst.in Mon Feb 20 15:14:54 2006 -0800
@@ -7,12 +7,12 @@
The most recent documentation is available at http://www.five-ten-sg.com/@PACKAGE@/
+
-
- 2006-02-19
+ 2006-02-20
@@ -40,13 +40,12 @@
- files...
+ filesDescription
- This manual page briefly documents the readpst command.readpst is a program that can read an Outlook PST (Personal Folders) file
and convert it into an mbox file, a format suitable for KMail, a recursive mbox
structure, or separate emails.
@@ -72,7 +71,7 @@
-h
- Show summary of options.
+ Show summary of options. Subsequent options are then ignored.
@@ -119,7 +118,7 @@
-V
- Show version of program. Subsequent options are then ignored.
+ Show program version. Subsequent options are then ignored.
@@ -181,7 +180,7 @@
- 2006-02-19
+ 2006-02-20
@@ -192,26 +191,25 @@
readpstlog
- convert a readpst logfile to text format
+ convert a readpst logfile to text formatSynopsis
- readpst
+ readpstlog
- logfile
+ logfileDescription
- This manual page briefly documents the readpstlog command.readpstlog
- is a program that reads the binary logfile generated
- by readpst, and outputs it in a more desirable format (i.e. text).
+ is a program that converts the binary logfile generated
+ by readpst to a more desirable text format.
@@ -244,8 +242,8 @@
Message Types
-
- readpstlog understands the following types of log messages:
+ readpstlog understands the following types of log
+ messages:
@@ -346,4 +344,817 @@
+
+
+
+
+ 2006-02-20
+
+
+
+ pst2ldif
+ 1
+ pst2ldif @VERSION@
+
+
+
+ pst2ldif
+ extract contacts from a MS Outlook .pst file in .ldif format
+
+
+
+ Synopsis
+
+ pst2ldif
+
+
+
+
+ pstfilename
+
+
+
+
+ Options
+
+
+ -h
+
+ Show summary of options. Subsequent options are then ignored.
+
+
+
+ -V include-types
+
+ Show program version. Subsequent options are then ignored.
+
+
+
+ -b ldap-base
+
+ Sets the ldap base value used in the dn records. You probably want to
+ use something like "o=organization, c=US".
+
+
+
+ -c class
+
+ Sets the objectClass values for the contact items. This class needs to be
+ defined in the schema used by your LDAP server, and at a minimum it must
+ contain the ldap attributes given below.
+
+
+
+
+
+
+ Description
+ pst2ldif
+ reads the contact information from a MS Outlook .pst file
+ and produces a .ldif file that may be used to import those contacts
+ into an LDAP database. The following ldap attributes are generated:
+
+ cn
+ givenName
+ sn
+ personalTitle
+ company
+ mail
+ postalAddress
+ l
+ st
+ postalCode
+ c
+ homePhone
+ telephoneNumber
+ facsimileTelephoneNumber
+ mobile
+ description
+
+
+
+
+
+ Copyright
+
+ Copyright (C) 2006 by 510 Software Group <carl@five-ten-sg.com>
+
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+
+ You should have received a copy of the GNU General Public License along
+ with this program; see the file COPYING. If not, please write to the
+ Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+
+
+ CVS Version
+
+ $Id$
+
+
+
+
+
+
+
+ 2006-02-20
+
+
+
+ outlook.pst
+ 5
+
+
+
+ outlook.pst
+ format of MS Outlook .pst file
+
+
+
+ Synopsis
+
+ outlook.pst
+
+
+
+
+ Overview
+
+ Each item in a .pst file is identified by two id values ID1 and ID2.
+ There are two separate b-trees indexed by these ID1 and ID2 values.
+
+
+
+
+ File Header
+
+ The file header is located at offset 0 in the .pst file.
+
+
+
+ We only support index type 0x0E and encryption type 0x01.
+
+
+ offset-index-1 is the file offset of the root of the
+ index1 b-tree, which contains (ID1, offset, size, unknown) tuples
+ for each item in the file. back-pointer-1 is the value that should
+ appear in the parent pointer of that root node.
+
+
+ offset-index-2 is the file offset of the root of the
+ index2 b-tree, which contains (ID2, DESC-ID1, LIST-ID1, PARENT-ID2)
+ tuples for each item in the file. back-pointer-2 is the value that should
+ appear in the parent pointer of that root node.
+
+
+
+
+ Index 1 Node
+
+ The index1 b-tree nodes are 516 byte blocks with the following format.
+
+
+
+ The item-count specifies the number of 12 byte records that
+ are active. The node-level is non-zero for this style of nodes.
+ The leaf nodes have a different format. The back-pointer must
+ match the back-pointer from the triple that pointed to this node.
+
+
+ Each item in this node is a triple of (ID, back-pointer, offset)
+ where the offset points to the next deeper node in the tree, the
+ back-pointer value must match the back-pointer in that deeper node,
+ and ID is the lowest ID value in the subtree.
+
+
+
+
+ Index 1 Leaf Node
+
+ The index1 b-tree leaf nodes are 516 byte blocks with the following format.
+
+
+
+ The item-count specifies the number of 12 byte records that
+ are active. The node-level is zero for these leaf nodes.
+ The back-pointer must match the back-pointer from the triple
+ that pointed to this node.
+
+
+ Each item in this node is a tuple of (ID1, offset, size, unknown)
+
+
+
+
+ Index 2 Node
+
+ The index2 b-tree nodes are 516 byte blocks with the following format.
+
+
+
+ The item-count specifies the number of 12 byte records that
+ are active. The node-level is non-zero for this style of nodes.
+ The leaf nodes have a different format. The back-pointer must
+ match the back-pointer from the triple that pointed to this node.
+
+
+ Each item in this node is a triple of (ID2, back-pointer, offset)
+ where the offset points to the next deeper node in the tree, the
+ back-pointer value must match the back-pointer in that deeper node,
+ and ID2 is the lowest ID2 value in the subtree.
+
+
+
+
+ Index 2 Leaf Node
+
+ The index2 b-tree leaf nodes are 516 byte blocks with the following format.
+
+
+
+ The item-count specifies the number of 16 byte records that
+ are active. The node-level is zero for these leaf nodes.
+ The back-pointer must match the back-pointer from the triple
+ that pointed to this node.
+
+
+ Each item in this node is a tuple of (ID2, DESC-ID1, LIST-ID1, PARENT-ID2)
+
+
+
+
+ Associated List Item
+
+ Contains associations between id1 and id2 for the items controlled by the record.
+ In the above leaf node, we have a tuple of (0x61, 0x02a82c, 0x02a836, 0)
+ 0x02a836 is the ID1 of the associated list, and we can lookup that ID1 value
+ in the index1 b-tree to find the (offset,size) of the data in the .pst file.
+
+
+
+
+
+ Associated Descriptor Item
+
+ Contains information about the item, which may be email, contact, or other outlook types.
+ In the above leaf node, we have a tuple of (0x21, 0x00e638, 0, 0)
+ 0x00e638 is the ID1 of the associated descriptor, and we can lookup that ID1 value
+ in the index1 b-tree to find the (offset,size) of the data in the .pst file.
+
+
+
+ Note the index-offset of 0x013c - starting at that position in the
+ descriptor block, we have an array of two byte integers. The first
+ integer (0x000b) is a count of the number of overlapping pairs
+ following the count. The first pair is (0, 0xc), the next pair is (0xc, 0x14)
+ and the last (11th) pair is (0x10b, 0x123). These pairs are (start,end+1)
+ offsets of items in this block. So we have count+1 integers following
+ the count value.
+
+
+ Note the offset of 0x0020, which needs to be right shifted by 4 bits
+ to become 0x0002, which is then a byte offset to be added to the above
+ index-offset plus two (to skip the count), so it points to the (0xc, 0x14)
+ pair. Finally, we have the offset and size of the "b5" block located at offset 0xc
+ with a size of 8 bytes in this descriptor block. The "b5" block has the
+ following format:
+
+
+
+ Note the "b5" offset of 0x0040, which needs to be right shifted by 4 bits
+ to become 0x0004, which is then a byte offset to be added to the above
+ index-offset plus two (to skip the count), so it points to the (0x14, 0x7c)
+ pair. We now have the offset 0x14 of the descriptor array, composed of 8 byte
+ entries. Each descriptor entry has the following format:
+
+
+
+ For some reference types (2, 3, 0xb) the value is used directly. Otherwise,
+ the value is generally a non-zero offset, to be right shifted by 4 bits and used to fetch
+ a pair from the index table to find the offset and size of the item in this
+ descriptor block. However, if (value AND 0xf) == 0xf, then the value is an ID2 index.
+
+
+ The following reference types are known, but not all of these
+ are implemented in the code yet.
+
+
+
+ The following item types are known, but not all of these
+ are implemented in the code yet.
+ Note: it appears that some types can have a IPOS value or a ID2 value
+ depending on the size of the field in question. It is safer to check
+ every field than for me to say what the "usually" contain. Absolute
+ values though, are generally going to be constant.
+
+
+
+
+