comparison src/libpst.c @ 101:1fc33da23175

fix for orphan children when building descriptor tree, avoid writing uninitialized data to debug log file
author Carl Byington <carl@five-ten-sg.com>
date Sun, 05 Oct 2008 09:56:32 -0700
parents 1e4a7610d525
children 8c4482be0b4c
comparison
equal deleted inserted replaced
100:1e4a7610d525 101:1fc33da23175
819 DEBUG_RET(); 819 DEBUG_RET();
820 return 0; 820 return 0;
821 } 821 }
822 822
823 823
824 /** this list node type is used for a quick cache
825 of the descriptor tree nodes (rooted at pf->d_head)
826 and for a "lost and found" list.
827 If the parent isn't found yet, put it on the lost and found
828 list and check it each time you read a new item.
829 */
830 struct cache_list_node {
831 pst_desc_ll *ptr;
832 /** only used for lost and found lists */
833 uint64_t parent;
834 struct cache_list_node *next;
835 struct cache_list_node *prev;
836 };
837 static struct cache_list_node *cache_head;
838 static struct cache_list_node *cache_tail;
839 static struct cache_list_node *lostfound_head;
840 static int cache_count;
841
842
843 /** 824 /**
844 add the d_ptr descriptor into the global tree 825 * add a pst descriptor node to a linked list of such nodes.
845 */ 826 *
846 static void record_descriptor(pst_file *pf, pst_desc_ll *d_ptr, uint64_t parent_id); 827 * @param node pointer to the node to be added to the list
847 static void record_descriptor(pst_file *pf, pst_desc_ll *d_ptr, uint64_t parent_id) { 828 * @param head pointer to the list head pointer
848 struct cache_list_node *lostfound_ptr = NULL; 829 * @param tail pointer to the list tail pointer
849 struct cache_list_node *cache_ptr = NULL; 830 */
850 pst_desc_ll *parent = NULL; 831 static void add_descriptor_to_list(pst_desc_ll *node, pst_desc_ll **head, pst_desc_ll **tail);
851 832 static void add_descriptor_to_list(pst_desc_ll *node, pst_desc_ll **head, pst_desc_ll **tail)
852 if (parent_id == 0 || parent_id == d_ptr->id) { 833 {
834 DEBUG_ENT("add_descriptor_to_list");
835 //DEBUG_INDEX(("Added node %#"PRIx64" parent %#"PRIx64" real parent %#"PRIx64" prev %#"PRIx64" next %#"PRIx64"\n",
836 // node->id, node->parent_id,
837 // (node->parent ? node->parent->id : (uint64_t)0),
838 // (node->prev ? node->prev->id : (uint64_t)0),
839 // (node->next ? node->next->id : (uint64_t)0)));
840 if (*tail) (*tail)->next = node;
841 if (!(*head)) *head = node;
842 node->prev = *tail;
843 *tail = node;
844 DEBUG_RET();
845 }
846
847
848 /**
849 * add a pst descriptor node into the global tree.
850 *
851 * @param pf global pst file pointer
852 * @param node pointer to the node to be added to the tree
853 */
854 static void record_descriptor(pst_file *pf, pst_desc_ll *node);
855 static void record_descriptor(pst_file *pf, pst_desc_ll *node)
856 {
857 // find any orphan children of this node, and collect them
858 pst_desc_ll *n = pf->d_head;
859 while (n) {
860 if (n->parent_id == node->id) {
861 // found a child of this node
862 DEBUG_INDEX(("Found orphan child %#"PRIx64" of parent %#"PRIx64"\n", n->id, node->id));
863 pst_desc_ll *nn = n->next;
864 pst_desc_ll *pp = n->prev;
865 node->no_child++;
866 n->parent = node;
867 n->prev = NULL;
868 n->next = NULL;
869 add_descriptor_to_list(n, &node->child, &node->child_tail);
870 if (pp) pp->next = nn; else pf->d_head = nn;
871 if (nn) nn->prev = pp; else pf->d_tail = pp;
872 n = nn;
873 }
874 else {
875 n = n->next;
876 }
877 }
878
879 // now hook this node into the global tree
880 if (node->parent_id == 0) {
853 // add top level node to the descriptor tree 881 // add top level node to the descriptor tree
854 if (parent_id == 0) { 882 //DEBUG_INDEX(("Null parent\n"));
855 DEBUG_INDEX(("No Parent\n")); 883 add_descriptor_to_list(node, &pf->d_head, &pf->d_tail);
856 } else { 884 }
857 DEBUG_INDEX(("Record is its own parent. What is this world coming to?\n")); 885 else if (node->parent_id == node->id) {
858 } 886 // add top level node to the descriptor tree
859 if (pf->d_tail) pf->d_tail->next = d_ptr; 887 DEBUG_INDEX(("%#"PRIx64" is its own parent. What is this world coming to?\n"));
860 if (!pf->d_head) pf->d_head = d_ptr; 888 add_descriptor_to_list(node, &pf->d_head, &pf->d_tail);
861 d_ptr->prev = pf->d_tail;
862 pf->d_tail = d_ptr;
863 } else { 889 } else {
864 DEBUG_INDEX(("Searching for parent\n")); 890 //DEBUG_INDEX(("Searching for parent %#"PRIx64" of %#"PRIx64"\n", node->parent_id, node->id));
865 // check in the cache for the parent 891 pst_desc_ll *parent = pst_getDptr(pf, node->parent_id);
866 cache_ptr = cache_head; 892 if (parent) {
867 while (cache_ptr && (cache_ptr->ptr->id != parent_id)) { 893 //DEBUG_INDEX(("Found parent %#"PRIx64"\n", node->parent_id));
868 cache_ptr = cache_ptr->next;
869 }
870 if (!cache_ptr && (parent = pst_getDptr(pf, parent_id)) == NULL) {
871 // check in the lost/found list
872 lostfound_ptr = lostfound_head;
873 while (lostfound_ptr && (lostfound_ptr->ptr->id != parent_id)) {
874 lostfound_ptr = lostfound_ptr->next;
875 }
876 if (!lostfound_ptr) {
877 DEBUG_WARN(("ERROR -- cannot find parent with id %#"PRIx64". Adding id %#"PRIx64" to lost/found\n", parent_id, d_ptr->id));
878 lostfound_ptr = (struct cache_list_node*) xmalloc(sizeof(struct cache_list_node));
879 lostfound_ptr->prev = NULL;
880 lostfound_ptr->next = lostfound_head;
881 lostfound_ptr->parent = parent_id;
882 lostfound_ptr->ptr = d_ptr;
883 lostfound_head = lostfound_ptr;
884 } else {
885 parent = lostfound_ptr->ptr;
886 DEBUG_INDEX(("Found parent (%#"PRIx64") in lost/found\n", parent->id));
887 }
888 }
889
890 if (cache_ptr || parent) {
891 if (cache_ptr)
892 // parent is already in the cache
893 parent = cache_ptr->ptr;
894 else {
895 //add the parent to the cache
896 DEBUG_INDEX(("Cache addition\n"));
897 cache_ptr = (struct cache_list_node*) xmalloc(sizeof(struct cache_list_node));
898 cache_ptr->prev = NULL;
899 cache_ptr->next = cache_head;
900 cache_ptr->ptr = parent;
901 if (cache_head) cache_head->prev = cache_ptr;
902 if (!cache_tail) cache_tail = cache_ptr;
903 cache_head = cache_ptr;
904 cache_count++;
905 if (cache_count > 100) {
906 DEBUG_INDEX(("trimming quick cache\n"));
907 //remove one from the end
908 cache_ptr = cache_tail;
909 cache_tail = cache_ptr->prev;
910 if (cache_tail) cache_tail->next = NULL;
911 free (cache_ptr);
912 cache_count--;
913 }
914 }
915 DEBUG_INDEX(("Found a parent\n"));
916 parent->no_child++; 894 parent->no_child++;
917 d_ptr->parent = parent; 895 node->parent = parent;
918 if (parent->child_tail) parent->child_tail->next = d_ptr; 896 add_descriptor_to_list(node, &parent->child, &parent->child_tail);
919 if (!parent->child) parent->child = d_ptr; 897 }
920 d_ptr->prev = parent->child_tail; 898 else {
921 parent->child_tail = d_ptr; 899 //DEBUG_INDEX(("No parent %#"PRIx64", have an orphan child %#"PRIx64"\n", node->parent_id, node->id));
922 } 900 add_descriptor_to_list(node, &pf->d_head, &pf->d_tail);
923 } 901 }
924 } 902 }
903 }
904
925 905
926 int pst_build_desc_ptr (pst_file *pf, off_t offset, int32_t depth, uint64_t linku1, uint64_t *high_id, uint64_t start_val, uint64_t end_val) { 906 int pst_build_desc_ptr (pst_file *pf, off_t offset, int32_t depth, uint64_t linku1, uint64_t *high_id, uint64_t start_val, uint64_t end_val) {
927 struct pst_table_ptr_structn table, table2; 907 struct pst_table_ptr_structn table, table2;
928 pst_descn desc_rec; 908 pst_descn desc_rec;
929 pst_desc_ll *d_ptr=NULL, *parent=NULL; 909 pst_desc_ll *d_ptr=NULL, *parent=NULL;
930 int32_t x, item_count; 910 int32_t x, item_count;
931 uint64_t old = start_val; 911 uint64_t old = start_val;
932 char *buf = NULL, *bptr; 912 char *buf = NULL, *bptr;
933 struct cache_list_node *cache_ptr = NULL;
934 struct cache_list_node *lostfound_ptr = NULL;
935 struct cache_list_node *lostfound_shd = NULL;
936 struct cache_list_node *lostfound_tmp = NULL;
937
938 if (depth == 0) {
939 // initialize the linked list and lost/found list.
940 cache_head = NULL;
941 cache_tail = NULL;
942 lostfound_head = NULL;
943 cache_count = 0;
944 }
945 913
946 DEBUG_ENT("pst_build_desc_ptr"); 914 DEBUG_ENT("pst_build_desc_ptr");
947 DEBUG_INDEX(("offset %#"PRIx64" depth %i linku1 %#"PRIx64" start %#"PRIx64" end %#"PRIx64"\n", offset, depth, linku1, start_val, end_val)); 915 DEBUG_INDEX(("offset %#"PRIx64" depth %i linku1 %#"PRIx64" start %#"PRIx64" end %#"PRIx64"\n", offset, depth, linku1, start_val, end_val));
948 if (end_val <= start_val) { 916 if (end_val <= start_val) {
949 DEBUG_WARN(("The end value is BEFORE the start value. This function will quit. Soz. [start:%#"PRIx64", end:%#"PRIx64"]\n", start_val, end_val)); 917 DEBUG_WARN(("The end value is BEFORE the start value. This function will quit. Soz. [start:%#"PRIx64", end:%#"PRIx64"]\n", start_val, end_val));
998 DEBUG_RET(); 966 DEBUG_RET();
999 return -1; 967 return -1;
1000 } 968 }
1001 } 969 }
1002 // When duplicates found, just update the info.... perhaps this is correct functionality 970 // When duplicates found, just update the info.... perhaps this is correct functionality
1003 DEBUG_INDEX(("Searching for existing record\n")); 971 DEBUG_INDEX(("Searching for existing record %#"PRIx64"\n", desc_rec.d_id));
1004 if (desc_rec.d_id <= *high_id && (d_ptr = pst_getDptr(pf, desc_rec.d_id))) { 972 if (desc_rec.d_id <= *high_id && (d_ptr = pst_getDptr(pf, desc_rec.d_id))) {
973 // This is probably unreachable code, originally written when the
974 // tree walking code was broken since it did not know about the node
975 // count. It always processed all the items in the node, even unused
976 // items, and that probably made it look like there were duplicate
977 // entries.
1005 uint64_t bigzero = 0; 978 uint64_t bigzero = 0;
1006 DEBUG_INDEX(("Updating Existing Values\n")); 979 DEBUG_INDEX(("Updating Existing Values\n"));
1007 d_ptr->list_index = pst_getID(pf, desc_rec.list_id); 980 d_ptr->list_index = pst_getID(pf, desc_rec.list_id);
1008 d_ptr->desc = pst_getID(pf, desc_rec.desc_id); 981 d_ptr->desc = pst_getID(pf, desc_rec.desc_id);
1009 DEBUG_INDEX(("\tdesc = %#"PRIx64"\tlist_index=%#"PRIx64"\n", 982 DEBUG_INDEX(("\tdesc = %#"PRIx64"\tlist_index=%#"PRIx64"\n",
1031 else if (d_ptr->parent) 1004 else if (d_ptr->parent)
1032 d_ptr->parent->child_tail = d_ptr->prev; 1005 d_ptr->parent->child_tail = d_ptr->prev;
1033 else 1006 else
1034 pf->d_tail = d_ptr->prev; 1007 pf->d_tail = d_ptr->prev;
1035 1008
1036 d_ptr->prev = NULL; 1009 d_ptr->parent_id = desc_rec.parent_id;
1037 d_ptr->next = NULL; 1010 d_ptr->prev = NULL;
1038 d_ptr->parent = NULL; 1011 d_ptr->next = NULL;
1039 record_descriptor(pf, d_ptr, desc_rec.parent_id); // add to the global tree 1012 d_ptr->parent = NULL;
1013 record_descriptor(pf, d_ptr); // add to the global tree
1040 } 1014 }
1041 } else { 1015 } else {
1042 if (*high_id < desc_rec.d_id) { 1016 if (*high_id < desc_rec.d_id) {
1043 DEBUG_INDEX(("Updating New High\n")); 1017 DEBUG_INDEX(("Updating New High\n"));
1044 *high_id = desc_rec.d_id; 1018 *high_id = desc_rec.d_id;
1045 } 1019 }
1046 DEBUG_INDEX(("New Record\n")); 1020 DEBUG_INDEX(("New Record %#"PRIx64" with parent %#x\n", desc_rec.d_id, desc_rec.parent_id));
1047 d_ptr = (pst_desc_ll*) xmalloc(sizeof(pst_desc_ll)); 1021 d_ptr = (pst_desc_ll*) xmalloc(sizeof(pst_desc_ll));
1048 d_ptr->id = desc_rec.d_id; 1022 d_ptr->id = desc_rec.d_id;
1023 d_ptr->parent_id = desc_rec.parent_id;
1049 d_ptr->list_index = pst_getID(pf, desc_rec.list_id); 1024 d_ptr->list_index = pst_getID(pf, desc_rec.list_id);
1050 d_ptr->desc = pst_getID(pf, desc_rec.desc_id); 1025 d_ptr->desc = pst_getID(pf, desc_rec.desc_id);
1051 d_ptr->prev = NULL; 1026 d_ptr->prev = NULL;
1052 d_ptr->next = NULL; 1027 d_ptr->next = NULL;
1053 d_ptr->parent = NULL; 1028 d_ptr->parent = NULL;
1054 d_ptr->child = NULL; 1029 d_ptr->child = NULL;
1055 d_ptr->child_tail = NULL; 1030 d_ptr->child_tail = NULL;
1056 d_ptr->no_child = 0; 1031 d_ptr->no_child = 0;
1057 record_descriptor(pf, d_ptr, desc_rec.parent_id); // add to the global tree 1032 record_descriptor(pf, d_ptr); // add to the global tree
1058 1033 //DEBUG_INDEX(("dump parent descriptor tree\n")); //!!
1059 } 1034 //d_ptr = pst_getDptr(pf, (uint64_t)-1); //!!
1060 // check here to see if d_ptr is the parent of any of the items in the lost / found list
1061 lostfound_ptr = lostfound_head;
1062 lostfound_shd = NULL;
1063 while (lostfound_ptr) {
1064 if (lostfound_ptr->parent == d_ptr->id) {
1065 DEBUG_INDEX(("Found a lost/found child (%#"PRIx64") of the current record. Joining to main structure.\n", lostfound_ptr->ptr->id));
1066 parent = d_ptr;
1067 d_ptr = lostfound_ptr->ptr;
1068 parent->no_child++;
1069 d_ptr->parent = parent;
1070 if (parent->child_tail) parent->child_tail->next = d_ptr;
1071 if (!parent->child) parent->child = d_ptr;
1072 d_ptr->prev = parent->child_tail;
1073 parent->child_tail = d_ptr;
1074 if (!lostfound_shd) lostfound_head = lostfound_ptr->next;
1075 else lostfound_shd->next = lostfound_ptr->next;
1076 lostfound_tmp = lostfound_ptr->next;
1077 free(lostfound_ptr);
1078 lostfound_ptr = lostfound_tmp;
1079 } else {
1080 lostfound_shd = lostfound_ptr;
1081 lostfound_ptr = lostfound_ptr->next;
1082 }
1083 } 1035 }
1084 } 1036 }
1085 } else { 1037 } else {
1086 // this node contains node pointers 1038 // this node contains node pointers
1087 DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, ITEM_SIZE32); 1039 DEBUG_HEXDUMPC(buf, DESC_BLOCK_SIZE, ITEM_SIZE32);
1118 DEBUG_RET(); 1070 DEBUG_RET();
1119 return -1; 1071 return -1;
1120 } 1072 }
1121 } 1073 }
1122 (void)pst_build_desc_ptr(pf, table.offset, depth+1, table.u1, high_id, table.start, table2.start); 1074 (void)pst_build_desc_ptr(pf, table.offset, depth+1, table.u1, high_id, table.start, table2.start);
1123 }
1124 }
1125 if (depth == 0) {
1126 // free the quick cache
1127 while (cache_head) {
1128 cache_ptr = cache_head->next;
1129 free(cache_head);
1130 cache_head = cache_ptr;
1131 }
1132 // free the lost and found
1133 while (lostfound_head) {
1134 lostfound_ptr = lostfound_head->next;
1135 WARN(("unused lost/found item %#"PRIx64" with parent %#"PRIx64, lostfound_head->parent, lostfound_head->ptr->id));
1136 free(lostfound_head);
1137 lostfound_head = lostfound_ptr;
1138 } 1075 }
1139 } 1076 }
1140 if (buf) free(buf); 1077 if (buf) free(buf);
1141 DEBUG_RET(); 1078 DEBUG_RET();
1142 return 0; 1079 return 0;
4135 */ 4072 */
4136 pst_desc_ll* pst_getDptr(pst_file *pf, uint64_t id) { 4073 pst_desc_ll* pst_getDptr(pst_file *pf, uint64_t id) {
4137 pst_desc_ll *ptr = pf->d_head; 4074 pst_desc_ll *ptr = pf->d_head;
4138 DEBUG_ENT("pst_getDptr"); 4075 DEBUG_ENT("pst_getDptr");
4139 while (ptr && (ptr->id != id)) { 4076 while (ptr && (ptr->id != id)) {
4077 //DEBUG_INDEX(("Looking for %#"PRIx64" at node %#"PRIx64" with parent %#"PRIx64"\n", id, ptr->id, ptr->parent_id));
4140 if (ptr->child) { 4078 if (ptr->child) {
4141 ptr = ptr->child; 4079 ptr = ptr->child;
4142 continue; 4080 continue;
4143 } 4081 }
4144 while (!ptr->next && ptr->parent) { 4082 while (!ptr->next && ptr->parent) {