#include "AdaptivePointerPoolBuilder.h" #include #include "../../inc/tbb/parallel_sort.h" std::string AdaptivePointerPoolBuilder::GetFullFileName(const std::string& fileName) const { std::string maskBitSizes; for (unsigned8 maskBitSize : mMaskBits) maskBitSizes += std::to_string(maskBitSize); return fileName + ".a" + (mUseLookupTable ? "l" : "d") + std::to_string(mMaskSize) + maskBitSizes + ".pool"; } size_t GetIndexibleNodes(unsigned8 byteCount, unsigned8 maskSize) { return BitHelper::Exp2(byteCount * 8 - maskSize); } size_t AdaptivePointerPoolBuilder::GetMaxLookupTableSize() const { size_t maxSize = 0; // The max lookup table size is the maximum size we can index using the consecutive pointer sizes (based on pointer bits): for (unsigned8 i = 0; i < BitHelper::Exp2(mMaskSize) - 1; i++) maxSize += GetIndexibleNodes(mMaskBits[i], mMaskSize); return maxSize; } // Given an index in the lookup table, calculates the size of a pointer to that index (taking the mask size into account) unsigned8 AdaptivePointerPoolBuilder::GetMinimumSizeOfPointer(const unsigned32& pointer) const { size_t rangeStart = 0; size_t rangeEnd = 0; for (unsigned8 i = 0; i < BitHelper::Exp2(mMaskSize) - 1; i++) { unsigned8 size = mMaskBits[i]; rangeEnd = rangeStart + GetIndexibleNodes(size, mMaskSize); if (pointer >= rangeStart && pointer < rangeEnd) return size; rangeStart = rangeEnd; } return 4; } unsigned32 AdaptivePointerPoolBuilder::GetShortenedPointerTo(const unsigned32& pointer, unsigned8& mask) const { size_t rangeStart = 0; size_t rangeEnd = 0; for (unsigned8 i = 0; i < BitHelper::Exp2(mMaskSize) - 1; i++) { mask = i; unsigned8 size = mMaskBits[i]; rangeEnd = rangeStart + GetIndexibleNodes(size, mMaskSize); if (pointer >= rangeStart && pointer < rangeEnd) break; rangeStart = rangeEnd; } if (pointer >= rangeEnd) mask = BitHelper::GetLSMask(0, mMaskSize); return pointer - (unsigned32)rangeStart; } void AdaptivePointerPoolBuilder::InitBuild(const BaseTree* tree) { if (!mBuildInitiated) CalculateEverything(tree, mPointerSizes, mPointerSizePerLevel, mNodeLevelOffsets, mParentCounts, mLookupTableNodesPerLevel, mNodeWithinLookupTable); mBuildInitiated = true; } void AdaptivePointerPoolBuilder::CalculateEverything(const BaseTree* tree, std::vector& pointerSizes, std::vector& pointerSizesPerLevel, std::vector& levelOffsets, std::vector& parentCounts, std::vector>& lookupTableNodesPerLevel, BoolArray& nodeWithinLookupTable) const { // Calculate the pointer sizes and which nodes are in the lookup table. unsigned8 depth = tree->GetMaxLevel(); unsigned32 nodeCount = (unsigned32)tree->GetNodeCount(); // Find out how many parents all nodes have parentCounts = tree->GetParentCounts(); // Sort the nodes on parent counts per level. // Do this by sorting a map. This maps indices in the GPU pool to indices in the tree. // e.g. to find the node at position 6 in the GPU pool, use tree->GetNode(nodeMap[6]). std::vector nodeMap(nodeCount); tbb::parallel_for((unsigned32)0, nodeCount, [&](const unsigned32& i){ nodeMap[i] = i; }); OrderNodes(tree, nodeMap, parentCounts); // Now find the indices where the levels start levelOffsets = std::vector(depth + 2); unsigned8 curLevel = 255; for (unsigned32 i = 0; i < nodeCount; i++) { const Node* node = tree->GetNode(nodeMap[i]); if (node->GetLevel() != curLevel) levelOffsets[++curLevel] = i; } levelOffsets[depth + 1] = (unsigned32)tree->GetNodeCount(); // Go bottom-up through the tree. For each level, calculate the size of the level and the size of normal (byte) pointers. // Also calculate the size of the lookup table and the index until which nodes are put in the lookup table. // Store the index of the last lookup table node to decide the sizes of the lookup table pointers if (mUseLookupTable) { lookupTableNodesPerLevel = std::vector>(depth + 1); nodeWithinLookupTable = BoolArray(nodeCount); } std::vector nodePointerWithinLevel; if (!mUseLookupTable) nodePointerWithinLevel.resize(nodeCount); pointerSizesPerLevel = std::vector(depth + 1); pointerSizes = std::vector(nodeCount); size_t maxLookupTableSize = GetMaxLookupTableSize(); for (unsigned8 level = depth + 1; level-- > 0;) { unsigned32 levelStart = levelOffsets[level]; unsigned32 levelEnd = levelOffsets[level + 1]; size_t levelSize = 0; std::vector lookupTable; // Calculate the size of this level and which nodes are put in the lookup table for (unsigned32 i = levelStart; i < levelEnd; i++) { unsigned32 nodeId = nodeMap[i]; if (!mUseLookupTable) nodePointerWithinLevel[nodeId] = (unsigned32)levelSize; // If the node has more than 2 parents, it's worth putting in the lookup table if (mUseLookupTable && parentCounts[nodeId] > 2 && i - levelStart < maxLookupTableSize) { lookupTable.push_back(nodeId); nodeWithinLookupTable.Set(nodeId, true); } const Node* node = tree->GetNode(nodeId); unsigned32* children = node->GetChildren(); size_t nodeSize = GetBaseNodeSize(tree, nodeId); for (ChildIndex c = 0; c < node->GetChildCount(); c++) nodeSize += pointerSizes[children[c]]; levelSize += nodeSize; } // Now that we know the level size and the lookup table layout, we can calculate the size of pointers to nodes in this level unsigned8 levelPointerSize = BitHelper::RoundToBytes(BitHelper::Log2Ceil(levelSize) + mMaskSize) / 8; if (levelPointerSize == 0) levelPointerSize = 1; // Pointers should be at least 1 byte pointerSizesPerLevel[level] = levelPointerSize; if (mUseLookupTable && levelPointerSize <= 1) // We can't save space with a lookup table { lookupTable.clear(); for (unsigned32 i = levelStart; i < levelEnd; i++) nodeWithinLookupTable.Set(nodeMap[i], false); } //// Hack: put everything in the lookup table. //nodeWithinLookupTable.SetRange(levelStart, levelEnd, true); //lookupTable.clear(); //for (unsigned32 i = levelStart; i < levelEnd; i++) lookupTable.push_back(nodeMap[i]); for (unsigned32 i = levelStart; i < levelEnd; i++) { unsigned32 nodeId = nodeMap[i]; if (mUseLookupTable) { if (nodeWithinLookupTable[nodeId]) // Since the nodes are inserted in the lookup table in order, we can calculate a nodes index in the lookup table // using i - levelStart. pointerSizes[nodeId] = GetMinimumSizeOfPointer(i - levelStart); else pointerSizes[nodeId] = levelPointerSize; } else { unsigned8 minimumPointerSize = GetMinimumSizeOfPointer(nodePointerWithinLevel[nodeId]); if (minimumPointerSize < BitHelper::Exp2(mMaskSize)) // Only use a smaller pointer if the size of the pointer can be indicated by the mask pointerSizes[nodeId] = minimumPointerSize; else pointerSizes[nodeId] = levelPointerSize; } } if (mUseLookupTable) lookupTableNodesPerLevel[level] = lookupTable; } } void AdaptivePointerPoolBuilder::OrderNodes(const BaseTree* tree, std::vector& nodeMap) const { OrderNodes(tree, nodeMap, mParentCounts); } void AdaptivePointerPoolBuilder::OrderNodes(const BaseTree* tree, std::vector& nodeMap, const std::vector& parentCounts) { // First order on level (asc), then on number of parents (desc), so that the most used nodes have the smallest pointers tbb::parallel_sort(nodeMap.begin(), nodeMap.end(), [&](const unsigned32& i1, const unsigned32& i2) { bool res = false; unsigned8 lvl1 = tree->GetNode(i1)->GetLevel(); unsigned8 lvl2 = tree->GetNode(i2)->GetLevel(); if (lvl1 != lvl2) res = lvl1 < lvl2; else if (parentCounts[i1] != parentCounts[i2]) res = (parentCounts[i1] > parentCounts[i2]); // If the level and number of parents is the same, then, for consistency, order on nodeID. else res = i1 < i2; return res; }); } void AdaptivePointerPoolBuilder::FinishBuild(const BaseTree* tree) { ClearVariables(); mBuildInitiated = false; } unsigned8 AdaptivePointerPoolBuilder::GetBytesPerPointer(const BaseTree* tree, const unsigned32& nodeIndex) const { return mPointerSizes[nodeIndex]; } size_t AdaptivePointerPoolBuilder::GetPoolInfoSize(const BaseTree* tree) const { unsigned8 depth = tree->GetMaxLevel(); // Start with a list of pointers to the starts of the lookup table (4 bytes each, one per level) return 4 * (depth + 1) // Size of the level offsets + (depth + 1) // And the sizes of full pointers per level + 1 // 1 Byte to indicate the pointer sizes that belong to each mask (e.g. 00 -> 1, 01 -> 2, 10 -> 3) + (mUseLookupTable ? (4 * depth) : 0); // Size of the pointers to the starts of the lookup tables. // Note that the first level doesn't have a lookup table (as there are no pointers to the root). } std::vector AdaptivePointerPoolBuilder::GetPoolInfo(const BaseTree* tree, const std::vector& nodePointers, const std::vector& nodeOrder) { std::vector res(GetPoolInfoSize(tree)); unsigned8 depth = tree->GetMaxLevel(); // Calculate the pointer level offsets: mPointerLevelOffsets = std::vector(depth + 1); for (unsigned8 level = 0; level <= depth; level++) mPointerLevelOffsets[level] = (unsigned32)nodePointers[nodeOrder[mNodeLevelOffsets[level]]]; // Write the level offsets for (unsigned8 level = 0; level <= depth; level++) BitHelper::SplitInBytesAndMove(mPointerLevelOffsets[level], res, level * 4, 4); // Write the pointer sizes per level (for full pointers) for (unsigned8 level = 0; level <= depth; level++) res[(depth + 1) * 4 + level] = mPointerSizePerLevel[level]; // Write the byte indicating the pointer sizes per mask value unsigned8 maskBitsDescr = 0; for (unsigned8 i = 0; i < BitHelper::Exp2(mMaskSize) - 1; i++) maskBitsDescr |= (mMaskBits[i] - 1) << (i * 2); res[(depth + 1) * (4 + 1)] = maskBitsDescr; // Write the pointers to the lookup table starts per level if (mUseLookupTable) { size_t curIndex = GetAdditionalPoolInfoStart(tree); // curIndex is the start of the lookup tables for (unsigned8 level = 1; level <= depth; level++) { BitHelper::SplitInBytesAndMove(curIndex, res, (depth + 1) * (4 + 1) + 1 + (level - 1) * 4, 4); curIndex += mLookupTableNodesPerLevel[level].size() * mPointerSizePerLevel[level]; } } return res; } size_t AdaptivePointerPoolBuilder::GetAdditionalPoolInfoSize(const BaseTree* tree) const { // Calculate the size of the lookup tables if (mUseLookupTable) { size_t res = 0; for (unsigned8 level = 1; level <= tree->GetMaxLevel(); level++) res += mLookupTableNodesPerLevel[level].size() * mPointerSizePerLevel[level]; return res; } else return 0; } std::vector AdaptivePointerPoolBuilder::GetAdditionalPoolInfo(const BaseTree* tree, const std::vector& nodePointers, const std::vector& nodeOrder) { std::vector res(GetAdditionalPoolInfoSize(tree)); if (mUseLookupTable) { unsigned8 depth = tree->GetMaxLevel(); // Write the lookup tables size_t curIndex = 0; for (unsigned8 level = 1; level <= depth; level++) { unsigned8 levelPointerSize = mPointerSizePerLevel[level]; unsigned32 levelOffset = mPointerLevelOffsets[level]; std::vector& levelLookupTableNodes = mLookupTableNodesPerLevel[level]; for (unsigned32 lookupTableNode : levelLookupTableNodes) { unsigned32 actualPointer = (unsigned32)nodePointers[lookupTableNode] - levelOffset; BitHelper::SplitInBytesAndMove(actualPointer, res, curIndex, levelPointerSize); curIndex += levelPointerSize; } } } return res; } std::vector AdaptivePointerPoolBuilder::WrapPointer(const BaseTree* tree, const unsigned32& nodeIndex, const unsigned32& indexInPool, const unsigned32& pointer) const { const Node* node = tree->GetNode(nodeIndex); unsigned8 level = node->GetLevel(); unsigned8 pointerSize = mPointerSizes[nodeIndex]; unsigned8 bitPointerSize = pointerSize * 8; unsigned32 mask = (unsigned32)BitHelper::Exp2(mMaskSize) - 1; unsigned32 actualPointer = pointer - mPointerLevelOffsets[level]; if (mUseLookupTable) { if (mNodeWithinLookupTable[nodeIndex]) { // Find the index of the node within the lookup table size_t lookupTablePointer = indexInPool - mNodeLevelOffsets[level]; assert(mLookupTableNodesPerLevel[level][lookupTablePointer] == nodeIndex); unsigned8 sectionMask; actualPointer = GetShortenedPointerTo((unsigned32)lookupTablePointer, sectionMask); mask = sectionMask; } } else { unsigned8 sectionMask; actualPointer = GetShortenedPointerTo(actualPointer, sectionMask); mask = sectionMask; } assert(actualPointer < BitHelper::Exp2(bitPointerSize - mMaskSize)); assert(mask < BitHelper::Exp2(mMaskSize)); unsigned32 pointerWithMask = actualPointer | (mask << (bitPointerSize - mMaskSize)); return BitHelper::SplitInBytes(pointerWithMask, pointerSize); } void AdaptivePointerPoolBuilder::ClearVariables() { mPointerSizes.clear(); mPointerSizes.shrink_to_fit(); mPointerSizePerLevel.clear(); mPointerSizePerLevel.shrink_to_fit(); mPointerLevelOffsets.clear(); mPointerLevelOffsets.shrink_to_fit(); mNodeLevelOffsets.clear(); mNodeLevelOffsets.shrink_to_fit(); mParentCounts.clear(); mParentCounts.shrink_to_fit(); mLookupTableNodesPerLevel.clear(); mLookupTableNodesPerLevel.shrink_to_fit(); mNodeWithinLookupTable.Resize(0); }