Mercurial > libpst
view src/lspst.c @ 355:d1f930be4711
From Jeffrey Morlan:
pst_build_id_ptr and pst_build_desc_ptr require that the first child
of a BTree page have the same starting ID as itself. This is not
required by the spec, and is not true in many real-world PSTs
(presumably, the original first child of the page got
deleted). Because of this, many emails are not being extracted from
these PSTs. It also triggers an infinite loop in lspst (a separate
bug, also fixed)
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Wed, 06 Jul 2016 10:12:22 -0700 |
parents | 201464dd356e |
children | ad7b880ad3d1 |
line wrap: on
line source
/*** * lspst.c * Part of the LibPST project * Author: Joe Nahmias <joe@nahmias.net> * Based on readpst.c by by David Smith <dave.s@earthcorp.com> * */ #include "define.h" struct file_ll { char *dname; int32_t stored_count; int32_t item_count; int32_t skip_count; int32_t type; }; void canonicalize_filename(char *fname); void debug_print(char *fmt, ...); void usage(char *prog_name); void version(); // global settings pst_file pstfile; void create_enter_dir(struct file_ll* f, pst_item *item) { pst_convert_utf8(item, &item->file_as); f->item_count = 0; f->skip_count = 0; f->type = item->type; f->stored_count = (item->folder) ? item->folder->item_count : 0; f->dname = strdup(item->file_as.str); } void close_enter_dir(struct file_ll *f) { free(f->dname); } void process(pst_item *outeritem, pst_desc_tree *d_ptr) { struct file_ll ff; pst_item *item = NULL; char *result = NULL; size_t resultlen = 0; DEBUG_ENT("process"); memset(&ff, 0, sizeof(ff)); create_enter_dir(&ff, outeritem); while (d_ptr) { if (!d_ptr->desc) { DEBUG_WARN(("ERROR item's desc record is NULL\n")); ff.skip_count++; } else { DEBUG_INFO(("Desc Email ID %"PRIx64" [d_ptr->d_id = %"PRIx64"]\n", d_ptr->desc->i_id, d_ptr->d_id)); item = pst_parse_item(&pstfile, d_ptr, NULL); DEBUG_INFO(("About to process item @ %p.\n", item)); if (item) { if (item->message_store) { // there should only be one message_store, and we have already done it DIE(("A second message_store has been found. Sorry, this must be an error.\n")); } if (item->folder && d_ptr->child) { // if this is a folder, we want to recurse into it pst_convert_utf8(item, &item->file_as); printf("Folder \"%s\"\n", item->file_as.str); process(item, d_ptr->child); } else if (item->contact && (item->type == PST_TYPE_CONTACT)) { if (!ff.type) ff.type = item->type; // Process Contact item if (ff.type != PST_TYPE_CONTACT) { DEBUG_INFO(("I have a contact, but the folder isn't a contacts folder. Processing anyway\n")); } printf("Contact"); if (item->contact->fullname.str) printf("\t%s", pst_rfc2426_escape(item->contact->fullname.str, &result, &resultlen)); printf("\n"); } else if (item->email && ((item->type == PST_TYPE_NOTE) || (item->type == PST_TYPE_SCHEDULE) || (item->type == PST_TYPE_REPORT))) { if (!ff.type) ff.type = item->type; // Process Email item if ((ff.type != PST_TYPE_NOTE) && (ff.type != PST_TYPE_SCHEDULE) && (ff.type != PST_TYPE_REPORT)) { DEBUG_INFO(("I have an email, but the folder isn't an email folder. Processing anyway\n")); } printf("Email"); if (item->email->outlook_sender_name.str) printf("\tFrom: %s", item->email->outlook_sender_name.str); if (item->subject.str) printf("\tSubject: %s", item->subject.str); printf("\n"); } else if (item->journal && (item->type == PST_TYPE_JOURNAL)) { if (!ff.type) ff.type = item->type; // Process Journal item if (ff.type != PST_TYPE_JOURNAL) { DEBUG_INFO(("I have a journal entry, but folder isn't specified as a journal type. Processing...\n")); } if (item->subject.str) printf("Journal\t%s\n", pst_rfc2426_escape(item->subject.str, &result, &resultlen)); } else if (item->appointment && (item->type == PST_TYPE_APPOINTMENT)) { char time_buffer[30]; if (!ff.type) ff.type = item->type; // Process Calendar Appointment item DEBUG_INFO(("Processing Appointment Entry\n")); if (ff.type != PST_TYPE_APPOINTMENT) { DEBUG_INFO(("I have an appointment, but folder isn't specified as an appointment type. Processing...\n")); } printf("Appointment"); if (item->subject.str) printf("\tSUMMARY: %s", pst_rfc2426_escape(item->subject.str, &result, &resultlen)); if (item->appointment->start) printf("\tSTART: %s", pst_rfc2445_datetime_format(item->appointment->start, sizeof(time_buffer), time_buffer)); if (item->appointment->end) printf("\tEND: %s", pst_rfc2445_datetime_format(item->appointment->end, sizeof(time_buffer), time_buffer)); printf("\tALL DAY: %s", (item->appointment->all_day==1 ? "Yes" : "No")); printf("\n"); } else { ff.skip_count++; DEBUG_INFO(("Unknown item type. %i. Ascii1=\"%s\"\n", item->type, item->ascii_type)); } pst_freeItem(item); } else { ff.skip_count++; DEBUG_INFO(("A NULL item was seen\n")); } } d_ptr = d_ptr->next; } close_enter_dir(&ff); if (result) free(result); DEBUG_RET(); } void usage(char *prog_name) { DEBUG_ENT("usage"); version(); printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); printf("OPTIONS:\n"); printf("\t-d <filename> \t- Debug to file. This is a binary log. Use readlog to print it\n"); printf("\t-h\t- Help. This screen\n"); printf("\t-V\t- Version. Display program version\n"); DEBUG_RET(); } void version() { DEBUG_ENT("version"); printf("lspst / LibPST v%s\n", VERSION); #if BYTE_ORDER == BIG_ENDIAN printf("Big Endian implementation being used.\n"); #elif BYTE_ORDER == LITTLE_ENDIAN printf("Little Endian implementation being used.\n"); #else # error "Byte order not supported by this library" #endif DEBUG_RET(); } int main(int argc, char* const* argv) { pst_item *item = NULL; pst_desc_tree *d_ptr; char *temp = NULL; //temporary char pointer int c; char *d_log = NULL; while ((c = getopt(argc, argv, "d:hV"))!= -1) { switch (c) { case 'd': d_log = optarg; break; case 'h': usage(argv[0]); exit(0); break; case 'V': version(); exit(0); break; default: usage(argv[0]); exit(1); break; } } #ifdef DEBUG_ALL // force a log file if (!d_log) d_log = "lspst.log"; #endif // defined DEBUG_ALL DEBUG_INIT(d_log, NULL); DEBUG_ENT("main"); if (argc <= optind) { usage(argv[0]); exit(2); } // Open PST file if (pst_open(&pstfile, argv[optind], NULL)) DIE(("Error opening File\n")); // Load PST index if (pst_load_index(&pstfile)) DIE(("Index Error\n")); pst_load_extended_attributes(&pstfile); d_ptr = pstfile.d_head; // first record is main record item = pst_parse_item(&pstfile, d_ptr, NULL); if (!item || !item->message_store) { DEBUG_RET(); DIE(("Could not get root record\n")); } // default the file_as to the same as the main filename if it doesn't exist if (!item->file_as.str) { if (!(temp = strrchr(argv[1], '/'))) if (!(temp = strrchr(argv[1], '\\'))) temp = argv[1]; else temp++; // get past the "\\" else temp++; // get past the "/" item->file_as.str = strdup(temp); item->file_as.is_utf8 = 1; } d_ptr = pst_getTopOfFolders(&pstfile, item); if (!d_ptr) DIE(("Top of folders record not found. Cannot continue\n")); process(item, d_ptr->child); // do the childred of TOPF pst_freeItem(item); pst_close(&pstfile); DEBUG_RET(); return 0; } // This function will make sure that a filename is in cannonical form. That // is, it will replace any slashes, backslashes, or colons with underscores. void canonicalize_filename(char *fname) { DEBUG_ENT("canonicalize_filename"); if (fname == NULL) { DEBUG_RET(); return; } while ((fname = strpbrk(fname, "/\\:"))) *fname = '_'; DEBUG_RET(); }