Files
CDAG/Research/core/BitHelper.h

331 lines
13 KiB
C++

#pragma once
#include "Defines.h"
#include "CompileTimeArray.h"
#include <vector>
#include <assert.h>
#include "../inc/tbb/parallel_for.h"
template<size_t x> struct BitCount
{
// The line below actually counts the number of bits set in x
enum {
value = (((((x & 0x55) + ((x >> 1) & 0x55)) & 0x33) + ((((x & 0x55) + ((x >> 1) & 0x55)) >> 2) & 0x33)) & 0x0F) +
((((((x & 0x55) + ((x >> 1) & 0x55)) & 0x33) + ((((x & 0x55) + ((x >> 1) & 0x55)) >> 2) & 0x33)) >> 4) & 0x0F)
};
};
typedef generate_array<256, BitCount>::result BitCountLookupTable;
namespace BitHelper
{
inline static unsigned8 GetSet(unsigned64 x)
{
x = (x & 0x5555555555555555) + ((x >> 1) & 0x5555555555555555); // 0x55 = 01010101
x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); // 0x33 = 00110011
x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F0F0F0F0F); // 0x0F = 00001111
x = (x & 0x00FF00FF00FF00FF) + ((x >> 8) & 0x00FF00FF00FF00FF);
x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF);
x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF);
return (unsigned8)x;
}
inline static unsigned8 GetSet(unsigned32 v)
{
// From https://graphics.stanford.edu/~seander/bithacks.html, method only uses 12 operations :)
v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
return (unsigned8)((((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24); // count
//x -= ((x >> 1) & 0x55555555);
//x = (((x >> 2) & 0x33333333) + (x & 0x33333333));
//x = (((x >> 4) + x) & 0x0f0f0f0f);
//x += (x >> 8);
//x += (x >> 16);
//return (x & 0x0000003f);
}
inline static unsigned8 GetSet(unsigned16 x)
{
x = (x & 0x5555) + ((x >> 1) & 0x5555); // 0x55 = 01010101
x = (x & 0x3333) + ((x >> 2) & 0x3333); // 0x33 = 00110011
x = (x & 0x0F0F) + ((x >> 4) & 0x0F0F); // 0x0F = 00001111
x = (x & 0x00FF) + ((x >> 8) & 0x00FF);
return (unsigned8)x;
}
inline static unsigned8 GetSet(unsigned8 x)
{
return BitCountLookupTable::data[x];
//// Parallel bit count
//x = (x & 0x55) + ((x >> 1) & 0x55); // 0x55 = 01010101
//x = (x & 0x33) + ((x >> 2) & 0x33); // 0x33 = 00110011
//x = (x & 0x0F) + ((x >> 4) & 0x0F); // 0x0F = 00001111
//return x;
}
template<typename T>
inline T GetLSMaskUntil(const unsigned8 bit) { return (~((T)0)) << bit; }
template<typename T> inline T GetLSSingleBitMask(const unsigned8 bit) { return (T)1 << bit; }
template<typename T, unsigned8 bitsInT = sizeof(T) * 8> inline T GetHSSingleBitMask(const unsigned8 bit) { return (T)1 << (bitsInT - 1 - bit); }
// Template specialization for HSSingleBitMask
template<> inline unsigned8 GetHSSingleBitMask(const unsigned8 pos) { return (unsigned8)0x80 >> pos; }
template<> inline unsigned16 GetHSSingleBitMask(const unsigned8 pos) { return (unsigned16)0x8000 >> pos; }
template<> inline unsigned32 GetHSSingleBitMask(const unsigned8 pos) { return (unsigned32)0x80000000 >> pos; }
template<> inline unsigned64 GetHSSingleBitMask(const unsigned8 pos) { return (unsigned64)0x8000000000000000 >> pos; }
template<typename T, unsigned8 bitsInT = sizeof(T) * 8>
inline T GetLSMask(const unsigned8 startIndex, const unsigned8 endIndex) {
if (startIndex == endIndex) return (T)0;
// Note that s needs to be defined first. If this happens in the same line, I couldn't get the compiler to make sure the size of the datatype is correct
T s = ~((T)0);
unsigned8 firstShift = bitsInT - endIndex;
s <<= firstShift;
s >>= firstShift + startIndex;
s <<= startIndex;
return s;
}
template<typename T, unsigned8 bitsInT = sizeof(T) * 8>
inline T GetHSMask(const unsigned8 startIndex, const unsigned8 endIndex) {
if (startIndex == endIndex) return (T)0;
// Note that s needs to be defined first. If this happens in the same line, I couldn't get the compiler to make sure the size of the datatype is correct
T s = ~((T)0);
unsigned8 firstShift = bitsInT - endIndex;
s >>= firstShift;
s <<= firstShift + startIndex;
s >>= startIndex;
return s;
}
template<typename T, unsigned8 bitsInT = sizeof(T) * 8>
inline unsigned8 GetHSSetBefore(const T value, const unsigned8 pos) { return GetSet((T)(value << (bitsInT - pos))); }
template<typename T> inline bool GetHS(const T value, const unsigned8 pos) { return (GetHSSingleBitMask<T>(pos) & value) != 0; }
template<typename T> inline bool GetLS(const T value, const unsigned8 pos) { return (BitHelper::GetLSSingleBitMask<T>(pos) & value) != 0; }
template<typename T> inline T SetLS(const T value, const T& setMask, const T& setValue) { return (value & ~setMask) | (setMask & setValue); }
template<typename T> inline void SetLS(T& value, const unsigned8 pos) { value |= GetLSSingleBitMask<T>(pos); }
template<typename T> inline void SetHS(T& value, const unsigned8 pos) { value |= GetHSSingleBitMask<T>(pos); }
template<typename T> inline void SetLS(T& value, const unsigned8 pos, const bool& set) {
T bitmask = GetLSSingleBitMask<T>(pos);
value &= ~bitmask; // Clear the bit
if (set) value |= bitmask; // Set it if necessary
}
template<typename T> inline void SetHS(T& value, const unsigned8 pos, const bool& set)
{
T bitmask = GetHSSingleBitMask<T>(pos);
value &= ~bitmask;
if (set) value |= bitmask;
}
inline unsigned32 CeilToNearestPowerOfTwo(unsigned32 v)
{
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
inline unsigned32 FloorToNearestPowerOfTwo(unsigned32 value) { return CeilToNearestPowerOfTwo(value) >> 1; }
template<typename T>
inline bool IsPowerOfTwo(T v) { return v && !(v & (v - 1)); }
// Log2 = the index of the highest significant bit.
template<typename T>
inline unsigned8 Log2(T v) {
unsigned8 r = 0;
while (v >>= 1) r++;
return (unsigned8)r;
}
template<typename T>
inline unsigned8 Log2Ceil(T v) {
return Log2(v) + (IsPowerOfTwo(v) ? 0 : 1); // If v is not a power of two, the number was rounded down.
}
// 2^x is equal to taking the number 1 and shifting it x indices.
template<typename T>
inline unsigned64 Exp2(const T& v) { return ((unsigned64)1) << v; }
template<typename T>
inline T RoundToBytes(const T& v)
{
if ((v & GetLSMask<T>(0, 3)) != 0) return ((v >> 3) + 1) << 3;
else return v;
}
template<typename T, unsigned8 bitsInT = sizeof(T) * 8>
inline T CircularShiftLeft(T v, T shift) { shift %= bitsInT; return (v << shift) | (v >> (bitsInT - shift)); }
template<typename T, unsigned8 bitsInT = sizeof(T) * 8>
inline T CircularShiftRight(T v, T shift) { shift %= bitsInT; return (v >> shift) | (v << (bitsInT - shift)); }
template<typename T>
inline void SplitInBytesAndMove(T value, std::vector<unsigned8>& destination, size_t offset, size_t size = sizeof(T))
{
SplitInBytesAndMove<T>(value, &destination[0], offset, size);
}
template<typename T>
inline void SplitInBytesAndMove(T value, unsigned8* destination, size_t offset, size_t size = sizeof(T))
{
for (size_t byte = 0; byte < size; byte++)
{
size_t lsByte = size - 1 - byte;
destination[offset + byte] = (unsigned8)((GetLSMask<T>((unsigned8)(lsByte * 8), (unsigned8)((lsByte + 1) * 8)) & value) >> (lsByte << 3));
}
}
template<typename T>
inline std::vector<unsigned8> SplitInBytes(T value, size_t size = sizeof(T))
{
std::vector<unsigned8> res(size);
SplitInBytesAndMove(value, res, 0, size);
return res;
}
template<typename T>
inline void JoinBytes(const std::vector<unsigned8>& source, T& dest, size_t offset = 0, size_t size = sizeof(T)) { JoinBytes(&source[0], dest, offset, size); }
template<typename T>
inline void JoinBytes(const unsigned8* source, T& dest, size_t offset = 0, size_t size = sizeof(T))
{
for (size_t byte = 0; byte < size; byte++)
dest |= source[offset + byte] << ((size - byte - 1) * 8);
}
template<typename T>
inline void JoinBytesLittleEndian(const std::vector<unsigned8>& source, T& dest, size_t offset = 0, size_t size = sizeof(T)) { JoinBytes(&source[0], dest, offset, size); }
template<typename T>
inline void JoinBytesLittleEndian(const unsigned8* source, T& dest, size_t offset = 0, size_t size = sizeof(T))
{
for (size_t byte = 0; byte < size; byte++)
dest |= source[offset + (size - byte - 1)] << ((size - byte - 1) * 8);
}
template<typename T, unsigned8 bitsInT = ((unsigned8)sizeof(T) * 8)>
std::vector<unsigned8> GetBitMapHS(T mask)
{
unsigned setBits = BitHelper::GetSet(mask);
std::vector<unsigned8> bitMap(setBits);
unsigned index = 0;
for (unsigned8 i = 0; i < bitsInT; i++)
{
if (BitHelper::GetHS(mask, i))
bitMap[index++] = i;
}
return bitMap;
}
// Packs the value v tightly, meaning that only the bits that are set in the mask will be kept, and they will be shifted so that they are next to each other
template<typename T, unsigned8 bitsInType = sizeof(T) * 8>
inline T PackTight(const T& v, const T& mask)
{
unsigned8 bitOffset = bitsInType - GetSet(mask);
T res = 0;
for (T i = 0; i < bitsInType; i++)
{
T curBitMask = GetHSSingleBitMask(i);
if ((mask & curBitMask) != 0)
{
if ((v & curBitMask) != 0) SetHS(res, bitOffset);
bitOffset++;
}
}
return res;
}
template<typename T, unsigned8 bitsInType = sizeof(T) * 8>
inline T UnpackTight(const T& v, const T& mask)
{
unsigned8 bitOffset = bitsInType - GetSet(mask);
T res = 0;
for (unsigned8 i = 0; i < bitsInType; i++)
if (GetHS(mask, i))
if (GetHS(v, bitOffset++)) SetHS(res, i);
return res;
}
// Used to tightly pack a whole vector of values at once. Offset should give the offset in bits within the first packed value
template<typename T_packed, typename T_unpacked, unsigned8 bitsInPacked = sizeof(T_packed) * 8, unsigned8 bitsInUnpacked = sizeof(T_unpacked) * 8>
std::vector<T_packed> PackTight(const std::vector<T_unpacked>& input, const std::vector<unsigned8>& bitMap, const unsigned8 offset = 0, const size_t startIndex = 0, size_t endIndex = ~size_t(0))
{
assert(offset < bitsInPacked);
if (endIndex > input.size()) endIndex = input.size();
size_t setBits = bitMap.size();
size_t requiredBits = setBits * (endIndex - startIndex) + offset;
size_t outputSize = requiredBits / bitsInPacked + (((requiredBits % bitsInPacked) == 0) ? 0 : 1);
std::vector<T_packed> packed(outputSize);
// Compress the new data (in parallel per packed byte)
tbb::parallel_for(size_t(0), outputSize, [&](size_t i)
{
size_t startBit = i * bitsInPacked;
T_packed cur = 0;
for (size_t j = 0; j < bitsInPacked; j++)
{
size_t bit = startBit + j;
if (bit < offset) continue;
bit -= offset;
size_t sourceIndex = bit / setBits;
size_t sourceBitIndex = bit % setBits;
size_t inputIndex = startIndex + sourceIndex;
if (inputIndex >= endIndex) break;
auto source = input[inputIndex];
if (GetHS(source, bitMap[sourceBitIndex]))
SetHS(cur, (unsigned8)j);
}
packed[i] = cur;
});
return packed;
}
// Used to tightly pack a whole vector of values at once. Offset should give the offset in bits within the first packed value
template<typename T_packed, typename T_unpacked, unsigned8 bitsInPacked = sizeof(T_packed) * 8, unsigned8 bitsInUnpacked = sizeof(T_unpacked) * 8>
std::vector<T_packed> PackTight(const std::vector<T_unpacked>& input, const T_unpacked& mask, const unsigned8 offset = 0, const size_t startIndex = 0, size_t endIndex = ~size_t(0))
{
return PackTight<T_packed, T_unpacked, bitsInPacked, bitsInUnpacked>(input, GetBitMapHS(mask), offset, startIndex, endIndex);
}
// Unpack a single packed byte at some location.
// packed should contain the vector with packed data
// i gives the index of the value to unpack (unpacked value index)
// bitMap should contain the bitMap for the mask to be used (GetBitMapHS(mask))
// offset gives the offset from which the data starts
// packedArrayOffset gives the index where the packed data with the current mask starts
template<typename T_packed, typename T_unpacked, unsigned8 bitsInPacked = sizeof(T_packed) * 8, unsigned8 bitsInUnpacked = sizeof(T_unpacked) * 8>
T_unpacked UnpackTightAt(const std::vector<T_packed>& packed, const size_t& i, const std::vector<unsigned8>& bitMap, const unsigned8& offset = 0, const size_t& packedArrayOffset = 0)
{
assert(offset < bitsInPacked);
size_t setBits = bitMap.size();
size_t divisionShift = (bitsInPacked >> 3) * 3; // (bitsInPacked / 8) * 3
size_t modulusMask = BitHelper::GetLSMask<size_t>(0, (unsigned8)divisionShift); // Mask that captures the bits that are shifted out by the divisionShift
T_unpacked value = 0;
for (size_t j = 0; j < bitMap.size(); j++)
{
size_t bit = i * setBits + j + offset;
size_t byteIndex = bit >> divisionShift; // Divide by the number of bits in the packed type (gives the index of the packed value to look at)
T_packed bitInByte = (T_packed)(bit & modulusMask); // Modulus by the number of bits in the packed type (gives the bit index within the packed type)
if (BitHelper::GetHS(packed[packedArrayOffset + byteIndex], bitInByte)) // If the bit in the packed type is set, it should also be in the unpacked type
SetHS(value, bitMap[j]);
}
return value;
}
template<typename T_packed, typename T_unpacked, unsigned8 bitsInPacked = sizeof(T_packed) * 8, unsigned8 bitsInUnpacked = sizeof(T_unpacked) * 8>
T_unpacked UnpackTightAt(const std::vector<T_packed>& packed, const size_t& i, const T_unpacked mask, const unsigned8& offset = 0, const size_t& packedArrayOffset = 0)
{
return UnpackTightAt<T_packed, T_unpacked, bitsInPacked, bitsInUnpacked>(packed, i, GetBitMapHS(mask), offset, packedArrayOffset);
}
};