Mercurial > libpst
annotate src/lspst.c @ 363:3a1d25c579c6 stable-0-6-68
allow folders containing multiple item types; better detection of valid internet headers
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Mon, 29 Aug 2016 09:50:24 -0700 |
parents | d1f930be4711 |
children | ad7b880ad3d1 |
rev | line source |
---|---|
16 | 1 /*** |
2 * lspst.c | |
3 * Part of the LibPST project | |
4 * Author: Joe Nahmias <joe@nahmias.net> | |
5 * Based on readpst.c by by David Smith <dave.s@earthcorp.com> | |
6 * | |
7 */ | |
8 | |
122
bdb38b434c0a
more changes from Fridrich Strba to avoid installing our config.h
Carl Byington <carl@five-ten-sg.com>
parents:
120
diff
changeset
|
9 #include "define.h" |
bdb38b434c0a
more changes from Fridrich Strba to avoid installing our config.h
Carl Byington <carl@five-ten-sg.com>
parents:
120
diff
changeset
|
10 |
16 | 11 struct file_ll { |
43 | 12 char *dname; |
13 int32_t stored_count; | |
167
40e9de445038
improve consistency checking when fetching items from the pst file.
Carl Byington <carl@five-ten-sg.com>
parents:
164
diff
changeset
|
14 int32_t item_count; |
43 | 15 int32_t skip_count; |
16 int32_t type; | |
16 | 17 }; |
43 | 18 |
19 | |
16 | 20 void canonicalize_filename(char *fname); |
21 void debug_print(char *fmt, ...); | |
118
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
22 void usage(char *prog_name); |
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
23 void version(); |
43 | 24 |
25 // global settings | |
26 pst_file pstfile; | |
27 | |
16 | 28 |
43 | 29 void create_enter_dir(struct file_ll* f, pst_item *item) |
30 { | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
31 pst_convert_utf8(item, &item->file_as); |
167
40e9de445038
improve consistency checking when fetching items from the pst file.
Carl Byington <carl@five-ten-sg.com>
parents:
164
diff
changeset
|
32 f->item_count = 0; |
43 | 33 f->skip_count = 0; |
34 f->type = item->type; | |
167
40e9de445038
improve consistency checking when fetching items from the pst file.
Carl Byington <carl@five-ten-sg.com>
parents:
164
diff
changeset
|
35 f->stored_count = (item->folder) ? item->folder->item_count : 0; |
172
6954d315aaa8
move version-info into main configure.in, and set it properly.
Carl Byington <carl@five-ten-sg.com>
parents:
167
diff
changeset
|
36 f->dname = strdup(item->file_as.str); |
43 | 37 } |
16 | 38 |
39 | |
43 | 40 void close_enter_dir(struct file_ll *f) |
41 { | |
42 free(f->dname); | |
43 } | |
16 | 44 |
45 | |
186
0a4f7ecd7452
more cleanup of external names in the shared library
Carl Byington <carl@five-ten-sg.com>
parents:
172
diff
changeset
|
46 void process(pst_item *outeritem, pst_desc_tree *d_ptr) |
43 | 47 { |
48 struct file_ll ff; | |
49 pst_item *item = NULL; | |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
50 char *result = NULL; |
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
51 size_t resultlen = 0; |
43 | 52 |
53 DEBUG_ENT("process"); | |
54 memset(&ff, 0, sizeof(ff)); | |
55 create_enter_dir(&ff, outeritem); | |
16 | 56 |
43 | 57 while (d_ptr) { |
58 if (!d_ptr->desc) { | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
59 DEBUG_WARN(("ERROR item's desc record is NULL\n")); |
43 | 60 ff.skip_count++; |
61 } | |
62 else { | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
63 DEBUG_INFO(("Desc Email ID %"PRIx64" [d_ptr->d_id = %"PRIx64"]\n", d_ptr->desc->i_id, d_ptr->d_id)); |
16 | 64 |
143
fdc58ad2c758
fix embedded rfc822 messages with attachments
Carl Byington <carl@five-ten-sg.com>
parents:
129
diff
changeset
|
65 item = pst_parse_item(&pstfile, d_ptr, NULL); |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
66 DEBUG_INFO(("About to process item @ %p.\n", item)); |
43 | 67 if (item) { |
68 if (item->message_store) { | |
69 // there should only be one message_store, and we have already done it | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
70 DIE(("A second message_store has been found. Sorry, this must be an error.\n")); |
43 | 71 } |
16 | 72 |
43 | 73 if (item->folder && d_ptr->child) { |
74 // if this is a folder, we want to recurse into it | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
75 pst_convert_utf8(item, &item->file_as); |
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
76 printf("Folder \"%s\"\n", item->file_as.str); |
43 | 77 process(item, d_ptr->child); |
16 | 78 |
43 | 79 } else if (item->contact && (item->type == PST_TYPE_CONTACT)) { |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
80 if (!ff.type) ff.type = item->type; |
43 | 81 // Process Contact item |
82 if (ff.type != PST_TYPE_CONTACT) { | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
83 DEBUG_INFO(("I have a contact, but the folder isn't a contacts folder. Processing anyway\n")); |
43 | 84 } |
85 printf("Contact"); | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
86 if (item->contact->fullname.str) |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
87 printf("\t%s", pst_rfc2426_escape(item->contact->fullname.str, &result, &resultlen)); |
43 | 88 printf("\n"); |
16 | 89 |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
90 } else if (item->email && ((item->type == PST_TYPE_NOTE) || (item->type == PST_TYPE_SCHEDULE) || (item->type == PST_TYPE_REPORT))) { |
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
91 if (!ff.type) ff.type = item->type; |
43 | 92 // Process Email item |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
93 if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) { |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
94 DEBUG_INFO(("I have an email, but the folder isn't an email folder. Processing anyway\n")); |
43 | 95 } |
96 printf("Email"); | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
97 if (item->email->outlook_sender_name.str) |
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
98 printf("\tFrom: %s", item->email->outlook_sender_name.str); |
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
99 if (item->subject.str) |
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
100 printf("\tSubject: %s", item->subject.str); |
43 | 101 printf("\n"); |
16 | 102 |
43 | 103 } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) { |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
104 if (!ff.type) ff.type = item->type; |
43 | 105 // Process Journal item |
106 if (ff.type != PST_TYPE_JOURNAL) { | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
107 DEBUG_INFO(("I have a journal entry, but folder isn't specified as a journal type. Processing...\n")); |
43 | 108 } |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
109 if (item->subject.str) |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
110 printf("Journal\t%s\n", pst_rfc2426_escape(item->subject.str, &result, &resultlen)); |
16 | 111 |
43 | 112 } else if (item->appointment && (item->type == PST_TYPE_APPOINTMENT)) { |
199
e3a46f66332b
more changes in recurrence decoding
Carl Byington <carl@five-ten-sg.com>
parents:
198
diff
changeset
|
113 char time_buffer[30]; |
198
7c60d6d1c681
decode more recurrence mapi elements
Carl Byington <carl@five-ten-sg.com>
parents:
195
diff
changeset
|
114 if (!ff.type) ff.type = item->type; |
43 | 115 // Process Calendar Appointment item |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
116 DEBUG_INFO(("Processing Appointment Entry\n")); |
43 | 117 if (ff.type != PST_TYPE_APPOINTMENT) { |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
118 DEBUG_INFO(("I have an appointment, but folder isn't specified as an appointment type. Processing...\n")); |
43 | 119 } |
120 printf("Appointment"); | |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
121 if (item->subject.str) |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
122 printf("\tSUMMARY: %s", pst_rfc2426_escape(item->subject.str, &result, &resultlen)); |
50 | 123 if (item->appointment->start) |
199
e3a46f66332b
more changes in recurrence decoding
Carl Byington <carl@five-ten-sg.com>
parents:
198
diff
changeset
|
124 printf("\tSTART: %s", pst_rfc2445_datetime_format(item->appointment->start, sizeof(time_buffer), time_buffer)); |
50 | 125 if (item->appointment->end) |
199
e3a46f66332b
more changes in recurrence decoding
Carl Byington <carl@five-ten-sg.com>
parents:
198
diff
changeset
|
126 printf("\tEND: %s", pst_rfc2445_datetime_format(item->appointment->end, sizeof(time_buffer), time_buffer)); |
50 | 127 printf("\tALL DAY: %s", (item->appointment->all_day==1 ? "Yes" : "No")); |
43 | 128 printf("\n"); |
16 | 129 |
43 | 130 } else { |
131 ff.skip_count++; | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
132 DEBUG_INFO(("Unknown item type. %i. Ascii1=\"%s\"\n", |
43 | 133 item->type, item->ascii_type)); |
134 } | |
46 | 135 pst_freeItem(item); |
43 | 136 } else { |
137 ff.skip_count++; | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
138 DEBUG_INFO(("A NULL item was seen\n")); |
43 | 139 } |
140 } | |
355 | 141 d_ptr = d_ptr->next; |
43 | 142 } |
143 close_enter_dir(&ff); | |
211
94bde95d7e18
the shared library interface should now be thread safe
Carl Byington <carl@five-ten-sg.com>
parents:
202
diff
changeset
|
144 if (result) free(result); |
52 | 145 DEBUG_RET(); |
43 | 146 } |
16 | 147 |
43 | 148 |
118
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
149 void usage(char *prog_name) { |
50 | 150 DEBUG_ENT("usage"); |
151 version(); | |
152 printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); | |
153 printf("OPTIONS:\n"); | |
154 printf("\t-d <filename> \t- Debug to file. This is a binary log. Use readlog to print it\n"); | |
155 printf("\t-h\t- Help. This screen\n"); | |
156 printf("\t-V\t- Version. Display program version\n"); | |
157 DEBUG_RET(); | |
158 } | |
159 | |
160 | |
118
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
161 void version() { |
50 | 162 DEBUG_ENT("version"); |
163 printf("lspst / LibPST v%s\n", VERSION); | |
164 #if BYTE_ORDER == BIG_ENDIAN | |
165 printf("Big Endian implementation being used.\n"); | |
166 #elif BYTE_ORDER == LITTLE_ENDIAN | |
167 printf("Little Endian implementation being used.\n"); | |
168 #else | |
169 # error "Byte order not supported by this library" | |
170 #endif | |
171 DEBUG_RET(); | |
172 } | |
173 | |
174 | |
118
0f1492b7fe8b
patch from Fridrich Strba for building on mingw and general cleanup of autoconf files
Carl Byington <carl@five-ten-sg.com>
parents:
110
diff
changeset
|
175 int main(int argc, char* const* argv) { |
43 | 176 pst_item *item = NULL; |
186
0a4f7ecd7452
more cleanup of external names in the shared library
Carl Byington <carl@five-ten-sg.com>
parents:
172
diff
changeset
|
177 pst_desc_tree *d_ptr; |
43 | 178 char *temp = NULL; //temporary char pointer |
50 | 179 int c; |
43 | 180 char *d_log = NULL; |
16 | 181 |
50 | 182 while ((c = getopt(argc, argv, "d:hV"))!= -1) { |
183 switch (c) { | |
184 case 'd': | |
185 d_log = optarg; | |
186 break; | |
187 case 'h': | |
188 usage(argv[0]); | |
189 exit(0); | |
190 break; | |
191 case 'V': | |
192 version(); | |
193 exit(0); | |
194 break; | |
195 default: | |
196 usage(argv[0]); | |
197 exit(1); | |
198 break; | |
199 } | |
200 } | |
43 | 201 |
48 | 202 #ifdef DEBUG_ALL |
203 // force a log file | |
204 if (!d_log) d_log = "lspst.log"; | |
205 #endif // defined DEBUG_ALL | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
206 DEBUG_INIT(d_log, NULL); |
43 | 207 DEBUG_ENT("main"); |
208 | |
50 | 209 if (argc <= optind) { |
210 usage(argv[0]); | |
211 exit(2); | |
212 } | |
213 | |
43 | 214 // Open PST file |
298
201464dd356e
add default character set for items where the pst file does not specify a character set
Carl Byington <carl@five-ten-sg.com>
parents:
285
diff
changeset
|
215 if (pst_open(&pstfile, argv[optind], NULL)) DIE(("Error opening File\n")); |
16 | 216 |
43 | 217 // Load PST index |
218 if (pst_load_index(&pstfile)) DIE(("Index Error\n")); | |
219 | |
220 pst_load_extended_attributes(&pstfile); | |
16 | 221 |
43 | 222 d_ptr = pstfile.d_head; // first record is main record |
143
fdc58ad2c758
fix embedded rfc822 messages with attachments
Carl Byington <carl@five-ten-sg.com>
parents:
129
diff
changeset
|
223 item = pst_parse_item(&pstfile, d_ptr, NULL); |
43 | 224 if (!item || !item->message_store) { |
225 DEBUG_RET(); | |
202
2f38c4ce606f
remove readpstlog, switch to plain ascii debug log files
Carl Byington <carl@five-ten-sg.com>
parents:
199
diff
changeset
|
226 DIE(("Could not get root record\n")); |
43 | 227 } |
16 | 228 |
43 | 229 // default the file_as to the same as the main filename if it doesn't exist |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
230 if (!item->file_as.str) { |
43 | 231 if (!(temp = strrchr(argv[1], '/'))) |
232 if (!(temp = strrchr(argv[1], '\\'))) | |
233 temp = argv[1]; | |
234 else | |
235 temp++; // get past the "\\" | |
236 else | |
237 temp++; // get past the "/" | |
172
6954d315aaa8
move version-info into main configure.in, and set it properly.
Carl Byington <carl@five-ten-sg.com>
parents:
167
diff
changeset
|
238 item->file_as.str = strdup(temp); |
151
cda7c812ec01
track character set individually for each mapi element
Carl Byington <carl@five-ten-sg.com>
parents:
150
diff
changeset
|
239 item->file_as.is_utf8 = 1; |
43 | 240 } |
16 | 241 |
43 | 242 d_ptr = pst_getTopOfFolders(&pstfile, item); |
243 if (!d_ptr) DIE(("Top of folders record not found. Cannot continue\n")); | |
16 | 244 |
43 | 245 process(item, d_ptr->child); // do the childred of TOPF |
46 | 246 pst_freeItem(item); |
43 | 247 pst_close(&pstfile); |
16 | 248 |
43 | 249 DEBUG_RET(); |
250 return 0; | |
16 | 251 } |
43 | 252 |
253 | |
254 // This function will make sure that a filename is in cannonical form. That | |
16 | 255 // is, it will replace any slashes, backslashes, or colons with underscores. |
256 void canonicalize_filename(char *fname) { | |
43 | 257 DEBUG_ENT("canonicalize_filename"); |
258 if (fname == NULL) { | |
259 DEBUG_RET(); | |
260 return; | |
261 } | |
73
3cb02cb1e6cd
Patch from Robert Simpson to fix doubly-linked list in the cache_ptr code, and allow arrays of unicode strings (without converting them).
Carl Byington <carl@five-ten-sg.com>
parents:
59
diff
changeset
|
262 while ((fname = strpbrk(fname, "/\\:"))) |
43 | 263 *fname = '_'; |
264 DEBUG_RET(); | |
16 | 265 } |
43 | 266 |
267 |