Mercurial > libpst
comparison src/libpst.c @ 101:1fc33da23175
fix for orphan children when building descriptor tree, avoid writing uninitialized data to debug log file
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Sun, 05 Oct 2008 09:56:32 -0700 |
parents | 1e4a7610d525 |
children | 8c4482be0b4c |
comparison
equal
deleted
inserted
replaced
100:1e4a7610d525 | 101:1fc33da23175 |
---|---|
819 DEBUG_RET(); | 819 DEBUG_RET(); |
820 return 0; | 820 return 0; |
821 } | 821 } |
822 | 822 |
823 | 823 |
824 /** this list node type is used for a quick cache | |
825 of the descriptor tree nodes (rooted at pf->d_head) | |
826 and for a "lost and found" list. | |
827 If the parent isn't found yet, put it on the lost and found | |
828 list and check it each time you read a new item. | |
829 */ | |
830 struct cache_list_node { | |
831 pst_desc_ll *ptr; | |
832 /** only used for lost and found lists */ | |
833 uint64_t parent; | |
834 struct cache_list_node *next; | |
835 struct cache_list_node *prev; | |
836 }; | |
837 static struct cache_list_node *cache_head; | |
838 static struct cache_list_node *cache_tail; | |
839 static struct cache_list_node *lostfound_head; | |
840 static int cache_count; | |
841 | |
842 | |
843 /** | 824 /** |
844 add the d_ptr descriptor into the global tree | 825 * add a pst descriptor node to a linked list of such nodes. |
845 */ | 826 * |
846 static void record_descriptor(pst_file *pf, pst_desc_ll *d_ptr, uint64_t parent_id); | 827 * @param node pointer to the node to be added to the list |
847 static void record_descriptor(pst_file *pf, pst_desc_ll *d_ptr, uint64_t parent_id) { | 828 * @param head pointer to the list head pointer |
848 struct cache_list_node *lostfound_ptr = NULL; | 829 * @param tail pointer to the list tail pointer |
849 struct cache_list_node *cache_ptr = NULL; | 830 */ |
850 pst_desc_ll *parent = NULL; | 831 static void add_descriptor_to_list(pst_desc_ll *node, pst_desc_ll **head, pst_desc_ll **tail); |
851 | 832 static void add_descriptor_to_list(pst_desc_ll *node, pst_desc_ll **head, pst_desc_ll **tail) |
852 if (parent_id == 0 || parent_id == d_ptr->id) { | 833 { |
834 DEBUG_ENT("add_descriptor_to_list"); | |
835 //DEBUG_INDEX(("Added node %#"PRIx64" parent %#"PRIx64" real parent %#"PRIx64" prev %#"PRIx64" next %#"PRIx64"\n", | |
836 // node->id, node->parent_id, | |
837 // (node->parent ? node->parent->id : (uint64_t)0), | |
838 // (node->prev ? node->prev->id : (uint64_t)0), | |
839 // (node->next ? node->next->id : (uint64_t)0))); | |
840 if (*tail) (*tail)->next = node; | |
841 if (!(*head)) *head = node; | |
842 node->prev = *tail; | |
843 *tail = node; | |
844 DEBUG_RET(); | |
845 } | |
846 | |
847 | |
848 /** | |
849 * add a pst descriptor node into the global tree. | |
850 * | |
851 * @param pf global pst file pointer | |
852 * @param node pointer to the node to be added to the tree | |
853 */ | |
854 static void record_descriptor(pst_file *pf, pst_desc_ll *node); | |
855 static void record_descriptor(pst_file *pf, pst_desc_ll *node) | |
856 { | |
857 // find any orphan children of this node, and collect them | |
858 pst_desc_ll *n = pf->d_head; | |
859 while (n) { | |
860 if (n->parent_id == node->id) { | |
861 // found a child of this node | |
862 DEBUG_INDEX(("Found orphan child %#"PRIx64" of parent %#"PRIx64"\n", n->id, node->id)); | |
863 pst_desc_ll *nn = n->next; | |
864 pst_desc_ll *pp = n->prev; | |
865 node->no_child++; | |
866 n->parent = node; | |
867 n->prev = NULL; | |
868 n->next = NULL; | |
869 add_descriptor_to_list(n, &node->child, &node->child_tail); | |
870 if (pp) pp->next = nn; else pf->d_head = nn; | |
871 if (nn) nn->prev = pp; else pf->d_tail = pp; | |
872 n = nn; | |
873 } | |
874 else { | |
875 n = n->next; | |
876 } | |
877 } | |
878 | |
879 // now hook this node into the global tree | |
880 if (node->parent_id == 0) { | |
853 // add top level node to the descriptor tree | 881 // add top level node to the descriptor tree |
854 if (parent_id == 0) { | 882 //DEBUG_INDEX(("Null parent\n")); |
855 DEBUG_INDEX(("No Parent\n")); | 883 add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); |
856 } else { | 884 } |
857 DEBUG_INDEX(("Record is its own parent. What is this world coming to?\n")); | 885 else if (node->parent_id == node->id) { |
858 } | 886 // add top level node to the descriptor tree |
859 if (pf->d_tail) pf->d_tail->next = d_ptr; | 887 DEBUG_INDEX(("%#"PRIx64" is its own parent. What is this world coming to?\n")); |
860 if (!pf->d_head) pf->d_head = d_ptr; | 888 add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); |
861 d_ptr->prev = pf->d_tail; | |
862 pf->d_tail = d_ptr; | |
863 } else { | 889 } else { |
864 DEBUG_INDEX(("Searching for parent\n")); | 890 //DEBUG_INDEX(("Searching for parent %#"PRIx64" of %#"PRIx64"\n", node->parent_id, node->id)); |
865 // check in the cache for the parent | 891 pst_desc_ll *parent = pst_getDptr(pf, node->parent_id); |
866 cache_ptr = cache_head; | 892 if (parent) { |
867 while (cache_ptr && (cache_ptr->ptr->id != parent_id)) { | 893 //DEBUG_INDEX(("Found parent %#"PRIx64"\n", node->parent_id)); |
868 cache_ptr = cache_ptr->next; | |
869 } | |
870 if (!cache_ptr && (parent = pst_getDptr(pf, parent_id)) == NULL) { | |
871 // check in the lost/found list | |
872 lostfound_ptr = lostfound_head; | |
873 while (lostfound_ptr && (lostfound_ptr->ptr->id != parent_id)) { | |
874 lostfound_ptr = lostfound_ptr->next; | |
875 } | |
876 if (!lostfound_ptr) { | |
877 DEBUG_WARN(("ERROR -- cannot find parent with id %#"PRIx64". Adding id %#"PRIx64" to lost/found\n", parent_id, d_ptr->id)); | |
878 lostfound_ptr = (struct cache_list_node*) xmalloc(sizeof(struct cache_list_node)); | |
879 lostfound_ptr->prev = NULL; | |
880 lostfound_ptr->next = lostfound_head; | |
881 lostfound_ptr->parent = parent_id; | |
882 lostfound_ptr->ptr = d_ptr; | |
883 lostfound_head = lostfound_ptr; | |
884 } else { | |
885 parent = lostfound_ptr->ptr; | |
886 DEBUG_INDEX(("Found parent (%#"PRIx64") in lost/found\n", parent->id)); | |
887 } | |
888 } | |
889 | |
890 if (cache_ptr || parent) { | |
891 if (cache_ptr) | |
892 // parent is already in the cache | |
893 parent = cache_ptr->ptr; | |
894 else { | |
895 //add the parent to the cache | |
896 DEBUG_INDEX(("Cache addition\n")); | |
897 cache_ptr = (struct cache_list_node*) xmalloc(sizeof(struct cache_list_node)); | |
898 cache_ptr->prev = NULL; | |
899 cache_ptr->next = cache_head; | |
900 cache_ptr->ptr = parent; | |
901 if (cache_head) cache_head->prev = cache_ptr; | |
902 if (!cache_tail) cache_tail = cache_ptr; | |
903 cache_head = cache_ptr; | |
904 cache_count++; | |
905 if (cache_count > 100) { | |
906 DEBUG_INDEX(("trimming quick cache\n")); | |
907 //remove one from the end | |
908 cache_ptr = cache_tail; | |
909 cache_tail = cache_ptr->prev; | |
910 if (cache_tail) cache_tail->next = NULL; | |
911 free (cache_ptr); | |
912 cache_count--; | |
913 } | |
914 } | |
915 DEBUG_INDEX(("Found a parent\n")); | |
916 parent->no_child++; | 894 parent->no_child++; |
917 d_ptr->parent = parent; | 895 node->parent = parent; |
918 if (parent->child_tail) parent->child_tail->next = d_ptr; | 896 add_descriptor_to_list(node, &parent->child, &parent->child_tail); |
919 if (!parent->child) parent->child = d_ptr; | 897 } |
920 d_ptr->prev = parent->child_tail; | 898 else { |
921 parent->child_tail = d_ptr; | 899 //DEBUG_INDEX(("No parent %#"PRIx64", have an orphan child %#"PRIx64"\n", node->parent_id, node->id)); |
922 } | 900 add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); |
923 } | 901 } |
924 } | 902 } |
903 } | |
904 | |
925 | 905 |
926 int pst_build_desc_ptr (pst_file *pf, off_t offset, int32_t depth, uint64_t linku1, uint64_t *high_id, uint64_t start_val, uint64_t end_val) { | 906 int pst_build_desc_ptr (pst_file *pf, off_t offset, int32_t depth, uint64_t linku1, uint64_t *high_id, uint64_t start_val, uint64_t end_val) { |
927 struct pst_table_ptr_structn table, table2; | 907 struct pst_table_ptr_structn table, table2; |
928 pst_descn desc_rec; | 908 pst_descn desc_rec; |
929 pst_desc_ll *d_ptr=NULL, *parent=NULL; | 909 pst_desc_ll *d_ptr=NULL, *parent=NULL; |
930 int32_t x, item_count; | 910 int32_t x, item_count; |
931 uint64_t old = start_val; | 911 uint64_t old = start_val; |
932 char *buf = NULL, *bptr; | 912 char *buf = NULL, *bptr; |
933 struct cache_list_node *cache_ptr = NULL; | |
934 struct cache_list_node *lostfound_ptr = NULL; | |
935 struct cache_list_node *lostfound_shd = NULL; | |
936 struct cache_list_node *lostfound_tmp = NULL; | |
937 | |
938 if (depth == 0) { | |
939 // initialize the linked list and lost/found list. | |
940 cache_head = NULL; | |
941 cache_tail = NULL; | |
942 lostfound_head = NULL; | |
943 cache_count = 0; | |
944 } | |
945 | 913 |
946 DEBUG_ENT("pst_build_desc_ptr"); | 914 DEBUG_ENT("pst_build_desc_ptr"); |
947 DEBUG_INDEX(("offset %#"PRIx64" depth %i linku1 %#"PRIx64" start %#"PRIx64" end %#"PRIx64"\n", offset, depth, linku1, start_val, end_val)); | 915 DEBUG_INDEX(("offset %#"PRIx64" depth %i linku1 %#"PRIx64" start %#"PRIx64" end %#"PRIx64"\n", offset, depth, linku1, start_val, end_val)); |
948 if (end_val <= start_val) { | 916 if (end_val <= start_val) { |
949 DEBUG_WARN(("The end value is BEFORE the start value. This function will quit. Soz. [start:%#"PRIx64", end:%#"PRIx64"]\n", start_val, end_val)); | 917 DEBUG_WARN(("The end value is BEFORE the start value. This function will quit. Soz. [start:%#"PRIx64", end:%#"PRIx64"]\n", start_val, end_val)); |
998 DEBUG_RET(); | 966 DEBUG_RET(); |
999 return -1; | 967 return -1; |
1000 } | 968 } |
1001 } | 969 } |
1002 // When duplicates found, just update the info.... perhaps this is correct functionality | 970 // When duplicates found, just update the info.... perhaps this is correct functionality |
1003 DEBUG_INDEX(("Searching for existing record\n")); | 971 DEBUG_INDEX(("Searching for existing record %#"PRIx64"\n", desc_rec.d_id)); |
1004 if (desc_rec.d_id <= *high_id && (d_ptr = pst_getDptr(pf, desc_rec.d_id))) { | 972 if (desc_rec.d_id <= *high_id && (d_ptr = pst_getDptr(pf, desc_rec.d_id))) { |
973 // This is probably unreachable code, originally written when the | |
974 // tree walking code was broken since it did not know about the node | |
975 // count. It always processed all the items in the node, even unused | |
976 // items, and that probably made it look like there were duplicate | |
977 // entries. | |
1005 uint64_t bigzero = 0; | 978 uint64_t bigzero = 0; |
1006 DEBUG_INDEX(("Updating Existing Values\n")); | 979 DEBUG_INDEX(("Updating Existing Values\n")); |
1007 d_ptr->list_index = pst_getID(pf, desc_rec.list_id); | 980 d_ptr->list_index = pst_getID(pf, desc_rec.list_id); |
1008 d_ptr->desc = pst_getID(pf, desc_rec.desc_id); | 981 d_ptr->desc = pst_getID(pf, desc_rec.desc_id); |
1009 DEBUG_INDEX(("\tdesc = %#"PRIx64"\tlist_index=%#"PRIx64"\n", | 982 DEBUG_INDEX(("\tdesc = %#"PRIx64"\tlist_index=%#"PRIx64"\n", |
1031 else if (d_ptr->parent) | 1004 else if (d_ptr->parent) |
1032 d_ptr->parent->child_tail = d_ptr->prev; | 1005 d_ptr->parent->child_tail = d_ptr->prev; |
1033 else | 1006 else |
1034 pf->d_tail = d_ptr->prev; | 1007 pf->d_tail = d_ptr->prev; |
1035 | 1008 |
1036 d_ptr->prev = NULL; | 1009 d_ptr->parent_id = desc_rec.parent_id; |
1037 d_ptr->next = NULL; | 1010 d_ptr->prev = NULL; |
1038 d_ptr->parent = NULL; | 1011 d_ptr->next = NULL; |
1039 record_descriptor(pf, d_ptr, desc_rec.parent_id); // add to the global tree | 1012 d_ptr->parent = NULL; |
1013 record_descriptor(pf, d_ptr); // add to the global tree | |
1040 } | 1014 } |
1041 } else { | 1015 } else { |
1042 if (*high_id < desc_rec.d_id) { | 1016 if (*high_id < desc_rec.d_id) { |
1043 DEBUG_INDEX(("Updating New High\n")); | 1017 DEBUG_INDEX(("Updating New High\n")); |
1044 *high_id = desc_rec.d_id; | 1018 *high_id = desc_rec.d_id; |
1045 } | 1019 } |
1046 DEBUG_INDEX(("New Record\n")); | 1020 DEBUG_INDEX(("New Record %#"PRIx64" with parent %#x\n", desc_rec.d_id, desc_rec.parent_id)); |
1047 d_ptr = (pst_desc_ll*) xmalloc(sizeof(pst_desc_ll)); | 1021 d_ptr = (pst_desc_ll*) xmalloc(sizeof(pst_desc_ll)); |
1048 d_ptr->id = desc_rec.d_id; | 1022 d_ptr->id = desc_rec.d_id; |
1023 d_ptr->parent_id = desc_rec.parent_id; | |
1049 d_ptr->list_index = pst_getID(pf, desc_rec.list_id); | 1024 d_ptr->list_index = pst_getID(pf, desc_rec.list_id); |
1050 d_ptr->desc = pst_getID(pf, desc_rec.desc_id); | 1025 d_ptr->desc = pst_getID(pf, desc_rec.desc_id); |
1051 d_ptr->prev = NULL; | 1026 d_ptr->prev = NULL; |
1052 d_ptr->next = NULL; | 1027 d_ptr->next = NULL; |
1053 d_ptr->parent = NULL; | 1028 d_ptr->parent = NULL; |
1054 d_ptr->child = NULL; | 1029 d_ptr->child = NULL; |
1055 d_ptr->child_tail = NULL; | 1030 d_ptr->child_tail = NULL; |
1056 d_ptr->no_child = 0; | 1031 d_ptr->no_child = 0; |
1057 record_descriptor(pf, d_ptr, desc_rec.parent_id); // add to the global tree | 1032 record_descriptor(pf, d_ptr); // add to the global tree |
1058 | 1033 //DEBUG_INDEX(("dump parent descriptor tree\n")); //!! |
1059 } | 1034 //d_ptr = pst_getDptr(pf, (uint64_t)-1); //!! |
1060 // check here to see if d_ptr is the parent of any of the items in the lost / found list | |
1061 lostfound_ptr = lostfound_head; | |
1062 lostfound_shd = NULL; | |
1063 while (lostfound_ptr) { | |
1064 if (lostfound_ptr->parent == d_ptr->id) { | |
1065 DEBUG_INDEX(("Found a lost/found child (%#"PRIx64") of the current record. Joining to main structure.\n", lostfound_ptr->ptr->id)); | |
1066 parent = d_ptr; | |
1067 d_ptr = lostfound_ptr->ptr; | |
1068 parent->no_child++; | |
1069 d_ptr->parent = parent; | |
1070 if (parent->child_tail) parent->child_tail->next = d_ptr; | |
1071 if (!parent->child) parent->child = d_ptr; | |
1072 d_ptr->prev = parent->child_tail; | |
1073 parent->child_tail = d_ptr; | |
1074 if (!lostfound_shd) lostfound_head = lostfound_ptr->next; | |
1075 else lostfound_shd->next = lostfound_ptr->next; | |
1076 lostfound_tmp = lostfound_ptr->next; | |
1077 free(lostfound_ptr); | |
1078 lostfound_ptr = lostfound_tmp; | |
1079 } else { | |
1080 lostfound_shd = lostfound_ptr; | |
1081 lostfound_ptr = lostfound_ptr->next; | |
1082 } | |
1083 } | 1035 } |
1084 } | 1036 } |
1085 } else { | 1037 } else { |
1086 // this node contains node pointers | 1038 // this node contains node pointers |
1087 DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, ITEM_SIZE32); | 1039 DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, ITEM_SIZE32); |
1118 DEBUG_RET(); | 1070 DEBUG_RET(); |
1119 return -1; | 1071 return -1; |
1120 } | 1072 } |
1121 } | 1073 } |
1122 (void)pst_build_desc_ptr(pf, table.offset, depth+1, table.u1, high_id, table.start, table2.start); | 1074 (void)pst_build_desc_ptr(pf, table.offset, depth+1, table.u1, high_id, table.start, table2.start); |
1123 } | |
1124 } | |
1125 if (depth == 0) { | |
1126 // free the quick cache | |
1127 while (cache_head) { | |
1128 cache_ptr = cache_head->next; | |
1129 free(cache_head); | |
1130 cache_head = cache_ptr; | |
1131 } | |
1132 // free the lost and found | |
1133 while (lostfound_head) { | |
1134 lostfound_ptr = lostfound_head->next; | |
1135 WARN(("unused lost/found item %#"PRIx64" with parent %#"PRIx64, lostfound_head->parent, lostfound_head->ptr->id)); | |
1136 free(lostfound_head); | |
1137 lostfound_head = lostfound_ptr; | |
1138 } | 1075 } |
1139 } | 1076 } |
1140 if (buf) free(buf); | 1077 if (buf) free(buf); |
1141 DEBUG_RET(); | 1078 DEBUG_RET(); |
1142 return 0; | 1079 return 0; |
4135 */ | 4072 */ |
4136 pst_desc_ll* pst_getDptr(pst_file *pf, uint64_t id) { | 4073 pst_desc_ll* pst_getDptr(pst_file *pf, uint64_t id) { |
4137 pst_desc_ll *ptr = pf->d_head; | 4074 pst_desc_ll *ptr = pf->d_head; |
4138 DEBUG_ENT("pst_getDptr"); | 4075 DEBUG_ENT("pst_getDptr"); |
4139 while (ptr && (ptr->id != id)) { | 4076 while (ptr && (ptr->id != id)) { |
4077 //DEBUG_INDEX(("Looking for %#"PRIx64" at node %#"PRIx64" with parent %#"PRIx64"\n", id, ptr->id, ptr->parent_id)); | |
4140 if (ptr->child) { | 4078 if (ptr->child) { |
4141 ptr = ptr->child; | 4079 ptr = ptr->child; |
4142 continue; | 4080 continue; |
4143 } | 4081 } |
4144 while (!ptr->next && ptr->parent) { | 4082 while (!ptr->next && ptr->parent) { |