Mercurial > libpst
diff src/libpst.c @ 101:1fc33da23175
fix for orphan children when building descriptor tree, avoid writing uninitialized data to debug log file
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Sun, 05 Oct 2008 09:56:32 -0700 |
parents | 1e4a7610d525 |
children | 8c4482be0b4c |
line wrap: on
line diff
--- a/src/libpst.c Thu Oct 02 15:29:36 2008 -0700 +++ b/src/libpst.c Sun Oct 05 09:56:32 2008 -0700 @@ -821,108 +821,88 @@ } -/** this list node type is used for a quick cache - of the descriptor tree nodes (rooted at pf->d_head) - and for a "lost and found" list. - If the parent isn't found yet, put it on the lost and found - list and check it each time you read a new item. -*/ -struct cache_list_node { - pst_desc_ll *ptr; - /** only used for lost and found lists */ - uint64_t parent; - struct cache_list_node *next; - struct cache_list_node *prev; -}; -static struct cache_list_node *cache_head; -static struct cache_list_node *cache_tail; -static struct cache_list_node *lostfound_head; -static int cache_count; +/** + * add a pst descriptor node to a linked list of such nodes. + * + * @param node pointer to the node to be added to the list + * @param head pointer to the list head pointer + * @param tail pointer to the list tail pointer + */ +static void add_descriptor_to_list(pst_desc_ll *node, pst_desc_ll **head, pst_desc_ll **tail); +static void add_descriptor_to_list(pst_desc_ll *node, pst_desc_ll **head, pst_desc_ll **tail) +{ + DEBUG_ENT("add_descriptor_to_list"); + //DEBUG_INDEX(("Added node %#"PRIx64" parent %#"PRIx64" real parent %#"PRIx64" prev %#"PRIx64" next %#"PRIx64"\n", + // node->id, node->parent_id, + // (node->parent ? node->parent->id : (uint64_t)0), + // (node->prev ? node->prev->id : (uint64_t)0), + // (node->next ? node->next->id : (uint64_t)0))); + if (*tail) (*tail)->next = node; + if (!(*head)) *head = node; + node->prev = *tail; + *tail = node; + DEBUG_RET(); +} /** - add the d_ptr descriptor into the global tree -*/ -static void record_descriptor(pst_file *pf, pst_desc_ll *d_ptr, uint64_t parent_id); -static void record_descriptor(pst_file *pf, pst_desc_ll *d_ptr, uint64_t parent_id) { - struct cache_list_node *lostfound_ptr = NULL; - struct cache_list_node *cache_ptr = NULL; - pst_desc_ll *parent = NULL; - - if (parent_id == 0 || parent_id == d_ptr->id) { - // add top level node to the descriptor tree - if (parent_id == 0) { - DEBUG_INDEX(("No Parent\n")); - } else { - DEBUG_INDEX(("Record is its own parent. What is this world coming to?\n")); - } - if (pf->d_tail) pf->d_tail->next = d_ptr; - if (!pf->d_head) pf->d_head = d_ptr; - d_ptr->prev = pf->d_tail; - pf->d_tail = d_ptr; - } else { - DEBUG_INDEX(("Searching for parent\n")); - // check in the cache for the parent - cache_ptr = cache_head; - while (cache_ptr && (cache_ptr->ptr->id != parent_id)) { - cache_ptr = cache_ptr->next; + * add a pst descriptor node into the global tree. + * + * @param pf global pst file pointer + * @param node pointer to the node to be added to the tree + */ +static void record_descriptor(pst_file *pf, pst_desc_ll *node); +static void record_descriptor(pst_file *pf, pst_desc_ll *node) +{ + // find any orphan children of this node, and collect them + pst_desc_ll *n = pf->d_head; + while (n) { + if (n->parent_id == node->id) { + // found a child of this node + DEBUG_INDEX(("Found orphan child %#"PRIx64" of parent %#"PRIx64"\n", n->id, node->id)); + pst_desc_ll *nn = n->next; + pst_desc_ll *pp = n->prev; + node->no_child++; + n->parent = node; + n->prev = NULL; + n->next = NULL; + add_descriptor_to_list(n, &node->child, &node->child_tail); + if (pp) pp->next = nn; else pf->d_head = nn; + if (nn) nn->prev = pp; else pf->d_tail = pp; + n = nn; } - if (!cache_ptr && (parent = pst_getDptr(pf, parent_id)) == NULL) { - // check in the lost/found list - lostfound_ptr = lostfound_head; - while (lostfound_ptr && (lostfound_ptr->ptr->id != parent_id)) { - lostfound_ptr = lostfound_ptr->next; - } - if (!lostfound_ptr) { - DEBUG_WARN(("ERROR -- cannot find parent with id %#"PRIx64". Adding id %#"PRIx64" to lost/found\n", parent_id, d_ptr->id)); - lostfound_ptr = (struct cache_list_node*) xmalloc(sizeof(struct cache_list_node)); - lostfound_ptr->prev = NULL; - lostfound_ptr->next = lostfound_head; - lostfound_ptr->parent = parent_id; - lostfound_ptr->ptr = d_ptr; - lostfound_head = lostfound_ptr; - } else { - parent = lostfound_ptr->ptr; - DEBUG_INDEX(("Found parent (%#"PRIx64") in lost/found\n", parent->id)); - } + else { + n = n->next; } - - if (cache_ptr || parent) { - if (cache_ptr) - // parent is already in the cache - parent = cache_ptr->ptr; - else { - //add the parent to the cache - DEBUG_INDEX(("Cache addition\n")); - cache_ptr = (struct cache_list_node*) xmalloc(sizeof(struct cache_list_node)); - cache_ptr->prev = NULL; - cache_ptr->next = cache_head; - cache_ptr->ptr = parent; - if (cache_head) cache_head->prev = cache_ptr; - if (!cache_tail) cache_tail = cache_ptr; - cache_head = cache_ptr; - cache_count++; - if (cache_count > 100) { - DEBUG_INDEX(("trimming quick cache\n")); - //remove one from the end - cache_ptr = cache_tail; - cache_tail = cache_ptr->prev; - if (cache_tail) cache_tail->next = NULL; - free (cache_ptr); - cache_count--; - } - } - DEBUG_INDEX(("Found a parent\n")); + } + + // now hook this node into the global tree + if (node->parent_id == 0) { + // add top level node to the descriptor tree + //DEBUG_INDEX(("Null parent\n")); + add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); + } + else if (node->parent_id == node->id) { + // add top level node to the descriptor tree + DEBUG_INDEX(("%#"PRIx64" is its own parent. What is this world coming to?\n")); + add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); + } else { + //DEBUG_INDEX(("Searching for parent %#"PRIx64" of %#"PRIx64"\n", node->parent_id, node->id)); + pst_desc_ll *parent = pst_getDptr(pf, node->parent_id); + if (parent) { + //DEBUG_INDEX(("Found parent %#"PRIx64"\n", node->parent_id)); parent->no_child++; - d_ptr->parent = parent; - if (parent->child_tail) parent->child_tail->next = d_ptr; - if (!parent->child) parent->child = d_ptr; - d_ptr->prev = parent->child_tail; - parent->child_tail = d_ptr; + node->parent = parent; + add_descriptor_to_list(node, &parent->child, &parent->child_tail); + } + else { + //DEBUG_INDEX(("No parent %#"PRIx64", have an orphan child %#"PRIx64"\n", node->parent_id, node->id)); + add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); } } } + int pst_build_desc_ptr (pst_file *pf, off_t offset, int32_t depth, uint64_t linku1, uint64_t *high_id, uint64_t start_val, uint64_t end_val) { struct pst_table_ptr_structn table, table2; pst_descn desc_rec; @@ -930,18 +910,6 @@ int32_t x, item_count; uint64_t old = start_val; char *buf = NULL, *bptr; - struct cache_list_node *cache_ptr = NULL; - struct cache_list_node *lostfound_ptr = NULL; - struct cache_list_node *lostfound_shd = NULL; - struct cache_list_node *lostfound_tmp = NULL; - - if (depth == 0) { - // initialize the linked list and lost/found list. - cache_head = NULL; - cache_tail = NULL; - lostfound_head = NULL; - cache_count = 0; - } DEBUG_ENT("pst_build_desc_ptr"); DEBUG_INDEX(("offset %#"PRIx64" depth %i linku1 %#"PRIx64" start %#"PRIx64" end %#"PRIx64"\n", offset, depth, linku1, start_val, end_val)); @@ -1000,8 +968,13 @@ } } // When duplicates found, just update the info.... perhaps this is correct functionality - DEBUG_INDEX(("Searching for existing record\n")); + DEBUG_INDEX(("Searching for existing record %#"PRIx64"\n", desc_rec.d_id)); if (desc_rec.d_id <= *high_id && (d_ptr = pst_getDptr(pf, desc_rec.d_id))) { + // This is probably unreachable code, originally written when the + // tree walking code was broken since it did not know about the node + // count. It always processed all the items in the node, even unused + // items, and that probably made it look like there were duplicate + // entries. uint64_t bigzero = 0; DEBUG_INDEX(("Updating Existing Values\n")); d_ptr->list_index = pst_getID(pf, desc_rec.list_id); @@ -1033,19 +1006,21 @@ else pf->d_tail = d_ptr->prev; - d_ptr->prev = NULL; - d_ptr->next = NULL; - d_ptr->parent = NULL; - record_descriptor(pf, d_ptr, desc_rec.parent_id); // add to the global tree + d_ptr->parent_id = desc_rec.parent_id; + d_ptr->prev = NULL; + d_ptr->next = NULL; + d_ptr->parent = NULL; + record_descriptor(pf, d_ptr); // add to the global tree } } else { if (*high_id < desc_rec.d_id) { DEBUG_INDEX(("Updating New High\n")); *high_id = desc_rec.d_id; } - DEBUG_INDEX(("New Record\n")); + DEBUG_INDEX(("New Record %#"PRIx64" with parent %#x\n", desc_rec.d_id, desc_rec.parent_id)); d_ptr = (pst_desc_ll*) xmalloc(sizeof(pst_desc_ll)); d_ptr->id = desc_rec.d_id; + d_ptr->parent_id = desc_rec.parent_id; d_ptr->list_index = pst_getID(pf, desc_rec.list_id); d_ptr->desc = pst_getID(pf, desc_rec.desc_id); d_ptr->prev = NULL; @@ -1054,32 +1029,9 @@ d_ptr->child = NULL; d_ptr->child_tail = NULL; d_ptr->no_child = 0; - record_descriptor(pf, d_ptr, desc_rec.parent_id); // add to the global tree - - } - // check here to see if d_ptr is the parent of any of the items in the lost / found list - lostfound_ptr = lostfound_head; - lostfound_shd = NULL; - while (lostfound_ptr) { - if (lostfound_ptr->parent == d_ptr->id) { - DEBUG_INDEX(("Found a lost/found child (%#"PRIx64") of the current record. Joining to main structure.\n", lostfound_ptr->ptr->id)); - parent = d_ptr; - d_ptr = lostfound_ptr->ptr; - parent->no_child++; - d_ptr->parent = parent; - if (parent->child_tail) parent->child_tail->next = d_ptr; - if (!parent->child) parent->child = d_ptr; - d_ptr->prev = parent->child_tail; - parent->child_tail = d_ptr; - if (!lostfound_shd) lostfound_head = lostfound_ptr->next; - else lostfound_shd->next = lostfound_ptr->next; - lostfound_tmp = lostfound_ptr->next; - free(lostfound_ptr); - lostfound_ptr = lostfound_tmp; - } else { - lostfound_shd = lostfound_ptr; - lostfound_ptr = lostfound_ptr->next; - } + record_descriptor(pf, d_ptr); // add to the global tree + //DEBUG_INDEX(("dump parent descriptor tree\n")); //!! + //d_ptr = pst_getDptr(pf, (uint64_t)-1); //!! } } } else { @@ -1122,21 +1074,6 @@ (void)pst_build_desc_ptr(pf, table.offset, depth+1, table.u1, high_id, table.start, table2.start); } } - if (depth == 0) { - // free the quick cache - while (cache_head) { - cache_ptr = cache_head->next; - free(cache_head); - cache_head = cache_ptr; - } - // free the lost and found - while (lostfound_head) { - lostfound_ptr = lostfound_head->next; - WARN(("unused lost/found item %#"PRIx64" with parent %#"PRIx64, lostfound_head->parent, lostfound_head->ptr->id)); - free(lostfound_head); - lostfound_head = lostfound_ptr; - } - } if (buf) free(buf); DEBUG_RET(); return 0; @@ -4137,6 +4074,7 @@ pst_desc_ll *ptr = pf->d_head; DEBUG_ENT("pst_getDptr"); while (ptr && (ptr->id != id)) { + //DEBUG_INDEX(("Looking for %#"PRIx64" at node %#"PRIx64" with parent %#"PRIx64"\n", id, ptr->id, ptr->parent_id)); if (ptr->child) { ptr = ptr->child; continue;