# HG changeset patch # User Carl Byington # Date 1212519658 25200 # Node ID 987aa872294e0a7c5594eff83e98f28e3cf634dc # Parent 6f82d13e93002b71ff833e7a55f679358464a861 Use ftello/fseeko to properly handle large files. Document and properly use datasize field in b5 blocks. Fix some MSVC compile issues and collect MSVC dependencies into one place. diff -r 6f82d13e9300 -r 987aa872294e ChangeLog --- a/ChangeLog Thu May 29 18:59:38 2008 -0700 +++ b/ChangeLog Tue Jun 03 12:00:58 2008 -0700 @@ -1,3 +1,9 @@ +LibPST 0.6.11 (2008-06-03) +=============================== + * Use ftello/fseeko to properly handle large files. + * Document and properly use datasize field in b5 blocks. + * Fix some MSVC compile issues and collect MSVC dependencies into one place. + LibPST 0.6.10 (2008-05-29) =============================== * Patch from Robert Simpson diff -r 6f82d13e9300 -r 987aa872294e NEWS --- a/NEWS Thu May 29 18:59:38 2008 -0700 +++ b/NEWS Tue Jun 03 12:00:58 2008 -0700 @@ -1,3 +1,4 @@ +0.6.11 2008-06-03 Use ftello/fseeko to properly handle large files. 0.6.10 2008-05-29 Patch from Robert Simpson for doubly-linked list and arrays of unicode strings. 0.6.9 2008-05-16 Patch from Joachim Metz for 64 bit compile. 0.6.8 2008-03-05 Initial version of pst2dii to convert to Summation dii load file format. diff -r 6f82d13e9300 -r 987aa872294e configure.in --- a/configure.in Thu May 29 18:59:38 2008 -0700 +++ b/configure.in Tue Jun 03 12:00:58 2008 -0700 @@ -1,5 +1,5 @@ AC_PREREQ(2.59) -AC_INIT(libpst,0.6.10,carl@five-ten-sg.com) +AC_INIT(libpst,0.6.11,carl@five-ten-sg.com) AC_CONFIG_SRCDIR([config.h.in]) AC_CONFIG_HEADER([config.h]) @@ -24,6 +24,7 @@ AC_PROG_RANLIB AC_SYS_LARGEFILE +AC_CHECK_SIZEOF(off_t) # Checks for header files. AC_CHECK_HEADER([unistd.h], @@ -45,6 +46,7 @@ AC_STRUCT_TM # Checks for library functions. +AC_FUNC_FSEEKO AC_FUNC_LSTAT AC_FUNC_LSTAT_FOLLOWS_SLASHED_SYMLINK AC_FUNC_MALLOC diff -r 6f82d13e9300 -r 987aa872294e libpst.spec.in --- a/libpst.spec.in Thu May 29 18:59:38 2008 -0700 +++ b/libpst.spec.in Tue Jun 03 12:00:58 2008 -0700 @@ -47,6 +47,11 @@ %changelog +* Tue Jun 03 2008 Carl Byington - 0.6.11-1 +- Use ftello/fseeko to properly handle large files. +- Document and properly use datasize field in b5 blocks. +- Fix some MSVC compile issues and collect MSVC dependencies into one place. + * Thu May 29 2008 Carl Byington - 0.6.10-1 - Patch from Robert Simpson for doubly-linked list code and arrays of unicode strings. diff -r 6f82d13e9300 -r 987aa872294e src/debug.c --- a/src/debug.c Thu May 29 18:59:38 2008 -0700 +++ b/src/debug.c Tue Jun 03 12:00:58 2008 -0700 @@ -8,10 +8,6 @@ #include #include -#ifdef _WIN32 -# define vsnprintf _vsnprintf -#endif - struct pst_debug_item { int type; char * function; @@ -129,9 +125,9 @@ va_list ap; int f, g; char x[2]; + char *buf = NULL; struct pst_debug_item *temp; if (!debug_fp) return; // no file - va_start(ap, fmt); // get the record off of the temp_list info_ptr = temp_list; if (info_ptr) @@ -140,23 +136,32 @@ fprintf(stderr, "NULL info_ptr. ERROR!!\n"); exit(-2); } - // according to glibc 2.1, this should return the req. number of bytes for - // the string - #ifdef _WIN32 - // vsnprintf trick doesn't work. must use function called _vscprintf - // cannot find much documentation about this on internet or anywhere. - // I assume it isn't a standard function, but only in VisualC++ - f = _vscprintf(fmt, ap); - #else - f = vsnprintf(x, 1, fmt, ap); - #endif - va_end(ap); // must be called after vsnprintf() + + #ifdef _WIN32 + // vsnprintf trick doesn't work on msvc. + g = 2000; + f = -1; + while (f < 0) { + buf = realloc(buf, g+1); + va_start(ap, fmt); + f = vsnprintf(buf, g, fmt, ap); + va_end(ap); + g += g/2; + } + free(buf); + #else + // according to glibc 2.1, this should return the req. number of bytes for + // the string + va_start(ap, fmt); + f = vsnprintf(x, 1, fmt, ap); + va_end(ap); + #endif if (f > 0 && f < MAX_MESSAGE_SIZE) { info_ptr->text = (char*) xmalloc(f+1); va_start(ap, fmt); if ((g = vsnprintf(info_ptr->text, f, fmt, ap)) == -1) { - fprintf(stderr, "_debug_msg: Dieing! vsnprintf returned -1 for format \"%s\"\n", fmt); + fprintf(stderr, "_debug_msg: Dying! vsnprintf returned -1 for format \"%s\"\n", fmt); exit(-2); } va_end(ap); @@ -252,7 +257,7 @@ size_t size, ptr, funcname, filename, text, end; char *buf = NULL, rec_type; if (!debug_fp) return; // no file - off_t index_pos = ftell(debug_fp); + off_t index_pos = ftello(debug_fp); off_t file_pos = index_pos; // add 2. One for the pointer to the next index, // one for the count of this index @@ -274,7 +279,7 @@ item_ptr = item_head; while (item_ptr) { - file_pos = ftell(debug_fp); + file_pos = ftello(debug_fp); index[index_ptr++] = file_pos; size = strlen(item_ptr->function) + strlen(item_ptr->file) + @@ -320,12 +325,12 @@ item_ptr = item_head; } curr_items = 0; - index[index_ptr] = ftell(debug_fp); + index[index_ptr] = ftello(debug_fp); // we should now have a complete index - fseek(debug_fp, index_pos, SEEK_SET); + fseeko(debug_fp, index_pos, SEEK_SET); pst_debug_fwrite(index, index_size, 1, debug_fp); - fseek(debug_fp, 0, SEEK_END); + fseeko(debug_fp, 0, SEEK_END); item_ptr = item_head = item_tail = NULL; free(index); if (buf) free(buf); @@ -343,10 +348,10 @@ unsigned int end; if (!debug_fp) return; // no file index[0] = 1; //only one item in this index - index_pos = ftell(debug_fp); + index_pos = ftello(debug_fp); pst_debug_fwrite(index, index_size, 1, debug_fp); - index[1] = ftell(debug_fp); + index[1] = ftello(debug_fp); if (size > USHRT_MAX) { // bigger than can be stored in a short rec_type = 'L'; @@ -367,16 +372,16 @@ mfile_rec.text = mfile_rec.filename+strlen(item->file)+1; pst_debug_fwrite(&mfile_rec, sizeof(mfile_rec), 1, debug_fp); } - file_pos = ftell(debug_fp); + file_pos = ftello(debug_fp); pst_debug_fwrite(item->function, strlen(item->function)+1, 1, debug_fp); pst_debug_fwrite(item->file, strlen(item->file)+1, 1, debug_fp); vfprintf(debug_fp, fmt, *ap); pst_debug_fwrite(&zero, 1, 1, debug_fp); - end = ftell(debug_fp)-file_pos; + end = (unsigned int) (ftello(debug_fp) - file_pos); - index[2] = ftell(debug_fp); - fseek(debug_fp, index_pos, SEEK_SET); + index[2] = ftello(debug_fp); + fseeko(debug_fp, index_pos, SEEK_SET); pst_debug_fwrite(index, index_size, 1, debug_fp); if (size > USHRT_MAX) { pst_debug_fwrite(&rec_type, 1, sizeof(char), debug_fp); @@ -387,7 +392,7 @@ mfile_rec.end = end; pst_debug_fwrite(&mfile_rec, sizeof(mfile_rec), 1, debug_fp); } - fseek(debug_fp, 0, SEEK_END); + fseeko(debug_fp, 0, SEEK_END); } @@ -401,9 +406,9 @@ index[0] = 1; // only one item in this index run index[1] = 0; // valgrind, avoid writing uninitialized data index[2] = 0; // "" - index_pos = ftell(debug_fp); + index_pos = ftello(debug_fp); pst_debug_fwrite(index, index_size, 1, debug_fp); - index[1] = ftell(debug_fp); + index[1] = ftello(debug_fp); // always use the long rec_type = 'L'; @@ -416,20 +421,20 @@ lfile_rec.type = item->type; pst_debug_fwrite(&lfile_rec, sizeof(lfile_rec), 1, debug_fp); - file_pos = ftell(debug_fp); + file_pos = ftello(debug_fp); pst_debug_fwrite(item->function, strlen(item->function)+1, 1, debug_fp); pst_debug_fwrite(item->file, strlen(item->file)+1, 1, debug_fp); pst_debug_hexdumper(debug_fp, buf, size, col, 0); pst_debug_fwrite(&zero, 1, 1, debug_fp); - lfile_rec.end = ftell(debug_fp) - file_pos; + lfile_rec.end = ftello(debug_fp) - file_pos; - index[2] = ftell(debug_fp); - fseek(debug_fp, index_pos, SEEK_SET); + index[2] = ftello(debug_fp); + fseeko(debug_fp, index_pos, SEEK_SET); pst_debug_fwrite(index, index_size, 1, debug_fp); pst_debug_fwrite(&rec_type, 1, sizeof(char), debug_fp); pst_debug_fwrite(&lfile_rec, sizeof(lfile_rec), 1, debug_fp); - fseek(debug_fp, 0, SEEK_END); + fseeko(debug_fp, 0, SEEK_END); } diff -r 6f82d13e9300 -r 987aa872294e src/define.h --- a/src/define.h Thu May 29 18:59:38 2008 -0700 +++ b/src/define.h Tue Jun 03 12:00:58 2008 -0700 @@ -7,6 +7,12 @@ #ifdef HAVE_CONFIG_H #include "config.h" +#else + #ifdef _MSC_VER + #undef HAVE_UNISTD_H + #define HAVE_DIRECT_H + #define HAVE_WINDOWS_H + #endif #endif #include "version.h" @@ -51,32 +57,55 @@ #include #include +#define PERM_DIRS 0777 + #ifdef HAVE_UNISTD_H #include + #define D_MKDIR(x) mkdir(x, PERM_DIRS) #else #include "XGetopt.h" #ifdef HAVE_DIRECT_H #include // win32 - #define chdir _chdir - #define int32_t __int32 + #define D_MKDIR(x) mkdir(x) + #define chdir _chdir #endif #ifdef HAVE_WINDOWS_H - #include // win32 + #include + #endif + + #ifdef _MSC_VER + #define vsnprintf _vsnprintf + #define snprintf _snprintf + #define ftello _ftelli64 + #define fseeko _fseeki64 + #define strcasecmp _stricmp + #define off_t __int64 + #define size_t __int64 + #define int64_t __int64 + #define uint64_t unsigned __int64 + #define int32_t __int32 + #define uint32_t unsigned int + #define int16_t short int + #define uint16_t unsigned short int + #define int8_t signed char + #define uint8_t unsigned char + #define UINT64_MAX ((uint64_t)0xffffffffffffffff) + int __cdecl _fseeki64(FILE *, __int64, int); + __int64 __cdecl _ftelli64(FILE *); #endif #endif #ifdef HAVE_SYS_STAT_H -# include //mkdir + #include #endif -// for reading of directory and clearing in function mk_seperate_dir #ifdef HAVE_SYS_TYPES_H -# include + #include #endif #ifdef HAVE_DIRENT_H -# include + #include #endif diff -r 6f82d13e9300 -r 987aa872294e src/getidblock.c --- a/src/getidblock.c Thu May 29 18:59:38 2008 -0700 +++ b/src/getidblock.c Tue Jun 03 12:00:58 2008 -0700 @@ -1,16 +1,5 @@ + #include "define.h" - -#include -#include - -#ifndef __GNUC__ -# include "XGetopt.h" -#endif - -#ifndef _WIN32 -# include -#endif - #include "libpst.h" static void usage(); diff -r 6f82d13e9300 -r 987aa872294e src/libpst.c --- a/src/libpst.c Thu May 29 18:59:38 2008 -0700 +++ b/src/libpst.c Tue Jun 03 12:00:58 2008 -0700 @@ -973,12 +973,13 @@ // When duplicates found, just update the info.... perhaps this is correct functionality DEBUG_INDEX(("Searching for existing record\n")); if (desc_rec.d_id <= *high_id && (d_ptr = pst_getDptr(pf, desc_rec.d_id))) { + uint64_t bigzero = 0; DEBUG_INDEX(("Updating Existing Values\n")); d_ptr->list_index = pst_getID(pf, desc_rec.list_id); d_ptr->desc = pst_getID(pf, desc_rec.desc_id); DEBUG_INDEX(("\tdesc = %#llx\tlist_index=%#llx\n", - (d_ptr->desc==NULL?0LL:d_ptr->desc->id), - (d_ptr->list_index==NULL?0LL:d_ptr->list_index->id))); + (d_ptr->desc==NULL ? bigzero : d_ptr->desc->id), + (d_ptr->list_index==NULL ? bigzero : d_ptr->list_index->id))); if (d_ptr->parent && desc_rec.parent_id != d_ptr->parent->id) { DEBUG_INDEX(("WARNING -- Parent of record has changed. Moving it\n")); //hmmm, we must move the record. @@ -1406,9 +1407,8 @@ LE32_CPU(table_rec.value); DEBUG_EMAIL(("table_rec (type=%#hx, ref_type=%#hx, value=%#x)\n", table_rec.type, table_rec.ref_type, table_rec.value)); - if (table_rec.type != (uint16_t)0x02B5) { - WARN(("Unknown second block constant - %#hx for id %#llx\n", table_rec.type, block_id)); - DEBUG_HEXDUMPC(buf, sizeof(table_rec), 0x10); + if ((table_rec.type != (uint16_t)0x02B5) || (table_rec.ref_type != 6)) { + WARN(("Unknown second block constant - %#hx %#hx for id %#llx\n", table_rec.type, table_rec.ref_type, block_id)); freeall(&subblocks, &block_offset1, &block_offset2, &block_offset3, &block_offset4, &block_offset5, &block_offset6, &block_offset7); DEBUG_RET(); return NULL; @@ -1468,6 +1468,7 @@ LE16_CPU(table_rec.type); LE16_CPU(table_rec.ref_type); LE32_CPU(table_rec.value); + DEBUG_EMAIL(("table_rec (type=%#hx, ref_type=%#hx, value=%#x)\n", table_rec.type, table_rec.ref_type, table_rec.value)); if (table_rec.type != (uint16_t)0x04B5) { // different constant than a type 1 record WARN(("Unknown second block constant - %#hx for id %#llx\n", table_rec.type, block_id)); @@ -1482,7 +1483,9 @@ DEBUG_RET(); return NULL; } - num_recs = (block_offset5.to - block_offset5.from) / 6; // this will give the number of records in this block + + // this will give the number of records in this block + num_recs = (block_offset5.to - block_offset5.from) / (4 + table_rec.ref_type); if (pst_getBlockOffsetPointer(pf, i2_head, &subblocks, seven_c_blk.ind2_offset, &block_offset6)) { DEBUG_WARN(("internal error (7c.ind2 offset %#x) in reading block id %#x\n", seven_c_blk.ind2_offset, block_id)); @@ -1495,7 +1498,6 @@ } else { WARN(("ERROR: Unknown block constant - %#hx for id %#llx\n", block_hdr.type, block_id)); - DEBUG_HEXDUMPC(buf, read_size,0x10); freeall(&subblocks, &block_offset1, &block_offset2, &block_offset3, &block_offset4, &block_offset5, &block_offset6, &block_offset7); DEBUG_RET(); return NULL; @@ -4285,7 +4287,7 @@ // DEBUG_MAIN(("pst file old offset %#llx old size %#x read count %i offset %#llx size %#x\n", // p->offset, p->size, p->readcount, pos, size)); - if (fseek(pf->fp, pos, SEEK_SET) == -1) { + if (fseeko(pf->fp, pos, SEEK_SET) == -1) { DEBUG_RET(); return 0; } diff -r 6f82d13e9300 -r 987aa872294e src/libpst.h --- a/src/libpst.h Thu May 29 18:59:38 2008 -0700 +++ b/src/libpst.h Tue Jun 03 12:00:58 2008 -0700 @@ -10,19 +10,17 @@ #ifndef LIBPST_H #define LIBPST_H -#include - #ifndef _MSC_VER - -#ifndef FILETIME_DEFINED -#define FILETIME_DEFINED -//Win32 Filetime struct - copied from WINE -typedef struct { - uint32_t dwLowDateTime; - uint32_t dwHighDateTime; -} FILETIME; -#endif //ifndef FILETIME_DEFINED -#endif //ifndef _MSC_VER + #include + #ifndef FILETIME_DEFINED + #define FILETIME_DEFINED + //Win32 Filetime struct - copied from WINE + typedef struct { + uint32_t dwLowDateTime; + uint32_t dwHighDateTime; + } FILETIME; + #endif +#endif // According to Jan Wolter, sys/param.h is the most portable source of endian // information on UNIX systems. see http://www.unixpapa.com/incnote/byteorder.html @@ -59,14 +57,6 @@ #endif // BYTE_ORDER -#ifdef _MSC_VER -#include "windows.h" -#define int32_t int -#define uint32_t unsigned int -#define int16_t short int -#define uint16_t unsigned short int -#endif // _MSC_VER - #define PST_TYPE_NOTE 1 #define PST_TYPE_APPOINTMENT 8 #define PST_TYPE_CONTACT 9 diff -r 6f82d13e9300 -r 987aa872294e src/lzfu.h --- a/src/lzfu.h Thu May 29 18:59:38 2008 -0700 +++ b/src/lzfu.h Tue Jun 03 12:00:58 2008 -0700 @@ -1,8 +1,6 @@ #ifndef LZFU_H #define LZFU_H -#include - char* lzfu_decompress (char* rtfcomp, uint32_t compsize, size_t *size); #endif diff -r 6f82d13e9300 -r 987aa872294e src/readpst.c --- a/src/readpst.c Thu May 29 18:59:38 2008 -0700 +++ b/src/readpst.c Tue Jun 03 12:00:58 2008 -0700 @@ -6,7 +6,6 @@ */ #include "define.h" #include "libstrfunc.h" -//#include "vbuf.h" #include "libpst.h" #include "common.h" #include "timeconv.h" @@ -19,14 +18,6 @@ // max size of the c_time char*. It will store the date of the email #define C_TIME_SIZE 500 -#define PERM_DIRS 0777 - -// macro used for creating directories -#ifndef WIN32 -#define D_MKDIR(x) mkdir(x, PERM_DIRS) -#else -#define D_MKDIR(x) mkdir(x) -#endif struct file_ll { char *name; @@ -573,7 +564,7 @@ if (chdir(dir_name)) { x = errno; - DIE(("mk_recurse_dir: Cannot change to directory %s: %s\n", dir, strerror(x))); + DIE(("mk_seperate_dir: Cannot change to directory %s: %s\n", dir, strerror(x))); } if (overwrite) { diff -r 6f82d13e9300 -r 987aa872294e src/readpstlog.c --- a/src/readpstlog.c Thu May 29 18:59:38 2008 -0700 +++ b/src/readpstlog.c Tue Jun 03 12:00:58 2008 -0700 @@ -1,18 +1,5 @@ #include "define.h" -#include -#include -#include - -#ifndef _WIN32 -# include -#endif - -#ifndef __GNUC__ -# include "XGetopt.h" -#endif - - #define BUF_SIZE 4096 int usage(); @@ -23,7 +10,6 @@ int main(int argc, char** argv) { int identity = 0; int level = 0; - off_t *i = NULL; int x, ptr, stop=0, flag; char *fname, *buf, rec_type; unsigned char version; @@ -84,16 +70,14 @@ x = (int)temp; ptr = 0; if (x > 0) { - if (i) free(i); - i = (off_t*)xmalloc(sizeof(off_t)*(x+1)); - // plus 1 cause we want to read the offset of the next index + off_t i[x+1]; // plus 1 because we want to read the offset of the next index if (get(i, sizeof(off_t), x+1, fp)==0) { // we have reached the end of the debug file printf("oh dear. we must now end\n"); break; } while (ptr < x) { - fseek(fp, i[ptr++], SEEK_SET); + fseeko(fp, i[ptr++], SEEK_SET); get(&rec_type, 1, sizeof(char), fp); if (rec_type == 'L') { get(&lfile_rec, sizeof(lfile_rec), 1, fp); @@ -182,7 +166,7 @@ } if (dtype == DEBUG_FUNCRET_NO) level--; } - if (fseek(fp, i[ptr], SEEK_SET)==-1) { + if (fseeko(fp, i[ptr], SEEK_SET)==-1) { printf("finished\n"); break; } diff -r 6f82d13e9300 -r 987aa872294e src/timeconv.c --- a/src/timeconv.c Thu May 29 18:59:38 2008 -0700 +++ b/src/timeconv.c Tue Jun 03 12:00:58 2008 -0700 @@ -12,7 +12,6 @@ */ #include -#include "common.h" #include "timeconv.h" char * fileTimeToAscii (const FILETIME *filetime) { diff -r 6f82d13e9300 -r 987aa872294e xml/libpst.in --- a/xml/libpst.in Thu May 29 18:59:38 2008 -0700 +++ b/xml/libpst.in Tue Jun 03 12:00:58 2008 -0700 @@ -33,7 +33,7 @@ - 2008-05-16 + 2008-06-03 @@ -226,7 +226,7 @@ - 2008-05-16 + 2008-06-03 @@ -329,7 +329,7 @@ - 2008-05-16 + 2008-06-03 @@ -504,7 +504,7 @@ - 2008-05-16 + 2008-06-03 @@ -628,7 +628,7 @@ - 2008-05-16 + 2008-06-03 @@ -762,7 +762,7 @@ - 2008-05-16 + 2008-06-03 @@ -1577,7 +1577,7 @@ @@ -1585,8 +1585,9 @@ case, it is an internal pointer reference, which needs to be right shifted by 4 bits to become 0x0004, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the (0x14, 0x7c) - pair. We now have the offset 0x14 of the descriptor array, composed of 8 byte - entries. Each descriptor entry has the following format: + pair. The datasize (6) plus the b5 code (02) gives size of the entries, + in this case 8 bytes. We now have the offset 0x14 of the descriptor array, + composed of 8 byte entries. Each descriptor entry has the following format: @@ -1957,9 +1958,10 @@ case, it is an internal pointer reference, which needs to be right shifted by 4 bits to become 0x0006, which is then a byte offset to be added to the above indexOffset plus two (to skip the count), so it points to the - (0xea, 0xf0) pair. That gives us (0xf0 - 0xea)/6 = 1, so we have a - recordCount of one. The actual data between 0xea and 0xf0 is unknown - and unused here. + (0xea, 0xf0) pair. The datasize (2) plus the b5 code (04) gives the size + of the entries, in this case 6 bytes. We now have the offset 0xea of an + unused block of data in an unknown format, composed of 6 byte entries. + That gives us (0xf0 - 0xea)/6 = 1, so we have a recordCount of one. We have seen cases where the descoffset in the b5 block is zero, and