annotate src/lzfu.c @ 355:d1f930be4711

From Jeffrey Morlan: pst_build_id_ptr and pst_build_desc_ptr require that the first child of a BTree page have the same starting ID as itself. This is not required by the spec, and is not true in many real-world PSTs (presumably, the original first child of the page got deleted). Because of this, many emails are not being extracted from these PSTs. It also triggers an infinite loop in lspst (a separate bug, also fixed)
author Carl Byington <carl@five-ten-sg.com>
date Wed, 06 Jul 2016 10:12:22 -0700
parents cf3df962f1e5
children 5c0ce43c7532
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
1 /*
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
2 This program is free software; you can redistribute it and/or modify
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
3 it under the terms of the GNU General Public License as published by
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
4 the Free Software Foundation; either version 2 of the License, or
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
5 (at your option) any later version.
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
6
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
7 You should have received a copy of the GNU General Public License
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
8 along with this program; if not, write to the Free Software Foundation,
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
9 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
10 */
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
11
122
bdb38b434c0a more changes from Fridrich Strba to avoid installing our config.h
Carl Byington <carl@five-ten-sg.com>
parents: 120
diff changeset
12 #include "define.h"
120
6395ced2b8b2 disable building pst2dii on cygwin
Carl Byington <carl@five-ten-sg.com>
parents: 85
diff changeset
13 #include "lzfu.h"
48
f66078abed38 more fixes for 64 bit format
carl
parents: 43
diff changeset
14
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
15
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
16 #define LZFU_COMPRESSED 0x75465a4c
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
17 #define LZFU_UNCOMPRESSED 0x414c454d
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
18
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
19 // initital dictionary
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
20 #define LZFU_INITDICT "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
21 "{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscrip" \
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
22 "t \\fdecor MS Sans SerifSymbolArialTimes Ne" \
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
23 "w RomanCourier{\\colortbl\\red0\\green0\\blue0" \
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
24 "\r\n\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab" \
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
25 "\\tx"
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
26 // initial length of dictionary
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
27 #define LZFU_INITLENGTH 207
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
28
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
29 // header for compressed rtf
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
30 typedef struct _lzfuheader {
43
f6db1f060a95 start on outlook 2003 64 bit format
carl
parents: 41
diff changeset
31 uint32_t cbSize;
f6db1f060a95 start on outlook 2003 64 bit format
carl
parents: 41
diff changeset
32 uint32_t cbRawSize;
f6db1f060a95 start on outlook 2003 64 bit format
carl
parents: 41
diff changeset
33 uint32_t dwMagic;
f6db1f060a95 start on outlook 2003 64 bit format
carl
parents: 41
diff changeset
34 uint32_t dwCRC;
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
35 } lzfuheader;
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
36
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
37
172
6954d315aaa8 move version-info into main configure.in, and set it properly.
Carl Byington <carl@five-ten-sg.com>
parents: 130
diff changeset
38 char* pst_lzfu_decompress(char* rtfcomp, uint32_t compsize, size_t *size) {
79
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
39 unsigned char dict[4096]; // the dictionary buffer
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
40 unsigned int dict_length = 0; // the dictionary pointer
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
41 lzfuheader lzfuhdr; // the header of the lzfu block
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
42 unsigned char flags; // 8 bits of flags (1=2byte block pointer into the dict, 0=1 byte literal)
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
43 unsigned char flag_mask; // look at one flag bit each time thru the loop
43
f6db1f060a95 start on outlook 2003 64 bit format
carl
parents: 41
diff changeset
44 uint32_t i;
79
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
45 char *out_buf;
193
cf3df962f1e5 prep for fedora build
Carl Byington <carl@five-ten-sg.com>
parents: 172
diff changeset
46 uint32_t out_ptr = 0;
43
f6db1f060a95 start on outlook 2003 64 bit format
carl
parents: 41
diff changeset
47 uint32_t out_size;
f6db1f060a95 start on outlook 2003 64 bit format
carl
parents: 41
diff changeset
48 uint32_t in_ptr;
f6db1f060a95 start on outlook 2003 64 bit format
carl
parents: 41
diff changeset
49 uint32_t in_size;
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
50
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
51 memcpy(dict, LZFU_INITDICT, LZFU_INITLENGTH);
79
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
52 memset(dict + LZFU_INITLENGTH, 0, sizeof(dict) - LZFU_INITLENGTH);
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
53 dict_length = LZFU_INITLENGTH;
79
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
54
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
55 memcpy(&lzfuhdr, rtfcomp, sizeof(lzfuhdr));
37
ddfb25318812 more valgrind fixes
carl
parents: 36
diff changeset
56 LE32_CPU(lzfuhdr.cbSize);
ddfb25318812 more valgrind fixes
carl
parents: 36
diff changeset
57 LE32_CPU(lzfuhdr.cbRawSize);
ddfb25318812 more valgrind fixes
carl
parents: 36
diff changeset
58 LE32_CPU(lzfuhdr.dwMagic);
ddfb25318812 more valgrind fixes
carl
parents: 36
diff changeset
59 LE32_CPU(lzfuhdr.dwCRC);
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
60 //printf("total size: %d\n", lzfuhdr.cbSize+4);
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
61 //printf("raw size : %d\n", lzfuhdr.cbRawSize);
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
62 //printf("compressed: %s\n", (lzfuhdr.dwMagic == LZFU_COMPRESSED ? "yes" : "no"));
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
63 //printf("CRC : %#x\n", lzfuhdr.dwCRC);
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
64 //printf("\n");
78
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
65 out_size = lzfuhdr.cbRawSize;
172
6954d315aaa8 move version-info into main configure.in, and set it properly.
Carl Byington <carl@five-ten-sg.com>
parents: 130
diff changeset
66 out_buf = (char*)pst_malloc(out_size);
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
67 in_ptr = sizeof(lzfuhdr);
78
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
68 // Make sure to correct lzfuhdr.cbSize with 4 bytes before comparing
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
69 // to compsize
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
70 in_size = (lzfuhdr.cbSize + 4 < compsize) ? lzfuhdr.cbSize + 4 : compsize;
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
71 while (in_ptr < in_size) {
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 48
diff changeset
72 flags = (unsigned char)(rtfcomp[in_ptr++]);
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
73 flag_mask = 1;
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
74 while (flag_mask) {
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
75 if (flag_mask & flags) {
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
76 // two bytes available?
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
77 if (in_ptr+1 < in_size) {
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
78 // read 2 bytes from input
130
e35fd42bac05 more cleanup of include files
Carl Byington <carl@five-ten-sg.com>
parents: 129
diff changeset
79 uint16_t blkhdr, offset, length;
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
80 memcpy(&blkhdr, rtfcomp+in_ptr, 2);
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
81 LE16_CPU(blkhdr);
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
82 in_ptr += 2;
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
83 /* swap the upper and lower bytes of blkhdr */
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
84 blkhdr = (((blkhdr&0xFF00)>>8)+
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
85 ((blkhdr&0x00FF)<<8));
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
86 /* the offset is the first 12 bits of the 16 bit value */
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
87 offset = (blkhdr&0xFFF0)>>4;
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
88 /* the length of the dict entry are the last 4 bits */
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
89 length = (blkhdr&0x000F)+2;
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
90 // add the value we are about to print to the dictionary
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
91 for (i=0; i < length; i++) {
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
92 unsigned char c1;
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
93 c1 = dict[(offset+i)%4096];
79
56fa05fd5271 Patch from Robert Simpson for encryption type 2.
Carl Byington <carl@five-ten-sg.com>
parents: 78
diff changeset
94 dict[dict_length] = c1;
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
95 dict_length = (dict_length+1) % 4096;
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 48
diff changeset
96 if (out_ptr < out_size) out_buf[out_ptr++] = (char)c1;
78
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
97 // required for dictionary wrap around
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
98 // otherwise 0 byte values are referenced incorrectly
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
99 dict[dict_length] = 0;
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
100 }
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
101 }
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
102 } else {
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
103 // one byte available?
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
104 if (in_ptr < in_size) {
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
105 // uncompressed chunk (single byte)
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
106 char c1 = rtfcomp[in_ptr++];
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
107 dict[dict_length] = c1;
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
108 dict_length = (dict_length+1)%4096;
73
3cb02cb1e6cd Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents: 48
diff changeset
109 if (out_ptr < out_size) out_buf[out_ptr++] = (char)c1;
78
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
110 // required for dictionary wrap around
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
111 // otherwise 0 byte values are referenced incorrect
535075b4d261 Patch from Joachim Metz for debian packaging, and fix for incorrect length on lz decompression.
Carl Byington <carl@five-ten-sg.com>
parents: 73
diff changeset
112 dict[dict_length] = 0;
41
183ae993b9ad security fix for potential buffer overrun in lz decompress
carl
parents: 37
diff changeset
113 }
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
114 }
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
115 flag_mask <<= 1;
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
116 }
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
117 }
85
582e927756d3 Patch from Robert Simpson for file handle leak in error case.
Carl Byington <carl@five-ten-sg.com>
parents: 79
diff changeset
118 *size = out_ptr;
36
6fe121a971c9 valgrind fixes
carl
parents: 16
diff changeset
119 return out_buf;
16
c508ee15dfca switch to automake/autoconf
carl
parents:
diff changeset
120 }