8235551: BitMap::count_one_bits should use population_count
Co-authored-by: Kim Barrett <kim.barrett@oracle.com> Reviewed-by: kbarrett, neliasso
This commit is contained in:
parent
f51595bec2
commit
91de8b6c91
@ -309,7 +309,7 @@ uint RegMask::Size() const {
|
||||
uint sum = 0;
|
||||
assert(valid_watermarks(), "sanity");
|
||||
for (int i = _lwm; i <= _hwm; i++) {
|
||||
sum += population_count(_A[i]);
|
||||
sum += population_count((unsigned)_A[i]);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "utilities/bitMap.inline.hpp"
|
||||
#include "utilities/copy.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/population_count.hpp"
|
||||
|
||||
STATIC_ASSERT(sizeof(BitMap::bm_word_t) == BytesPerWord); // "Implementation assumption."
|
||||
|
||||
@ -646,50 +647,11 @@ bool BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const BitMap::idx_t* BitMap::_pop_count_table = NULL;
|
||||
|
||||
void BitMap::init_pop_count_table() {
|
||||
if (_pop_count_table == NULL) {
|
||||
BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256, mtInternal);
|
||||
for (uint i = 0; i < 256; i++) {
|
||||
table[i] = num_set_bits(i);
|
||||
}
|
||||
|
||||
if (!Atomic::replace_if_null(&_pop_count_table, table)) {
|
||||
guarantee(_pop_count_table != NULL, "invariant");
|
||||
FREE_C_HEAP_ARRAY(idx_t, table);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BitMap::idx_t BitMap::num_set_bits(bm_word_t w) {
|
||||
idx_t bits = 0;
|
||||
|
||||
while (w != 0) {
|
||||
while ((w & 1) == 0) {
|
||||
w >>= 1;
|
||||
}
|
||||
bits++;
|
||||
w >>= 1;
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) {
|
||||
assert(_pop_count_table != NULL, "precondition");
|
||||
return _pop_count_table[c];
|
||||
}
|
||||
|
||||
BitMap::idx_t BitMap::count_one_bits() const {
|
||||
init_pop_count_table(); // If necessary.
|
||||
idx_t sum = 0;
|
||||
typedef unsigned char uchar;
|
||||
for (idx_t i = 0; i < size_in_words(); i++) {
|
||||
bm_word_t w = map()[i];
|
||||
for (size_t j = 0; j < sizeof(bm_word_t); j++) {
|
||||
sum += num_set_bits_from_table(uchar(w & 255));
|
||||
w >>= 8;
|
||||
}
|
||||
sum += population_count(w);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
@ -169,12 +169,6 @@ class BitMap {
|
||||
// Verify [beg,end) is a valid range, e.g. beg <= end <= size().
|
||||
void verify_range(idx_t beg, idx_t end) const NOT_DEBUG_RETURN;
|
||||
|
||||
// Statistics.
|
||||
static const idx_t* _pop_count_table;
|
||||
static void init_pop_count_table();
|
||||
static idx_t num_set_bits(bm_word_t w);
|
||||
static idx_t num_set_bits_from_table(unsigned char c);
|
||||
|
||||
// Allocation Helpers.
|
||||
|
||||
// Allocates and clears the bitmap memory.
|
||||
|
@ -25,23 +25,47 @@
|
||||
#ifndef SHARE_UTILITIES_POPULATION_COUNT_HPP
|
||||
#define SHARE_UTILITIES_POPULATION_COUNT_HPP
|
||||
|
||||
#include "metaprogramming/conditional.hpp"
|
||||
#include "metaprogramming/enableIf.hpp"
|
||||
#include "metaprogramming/isIntegral.hpp"
|
||||
#include "metaprogramming/isSigned.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
|
||||
// Returns the population count of x, i.e., the number of bits set in x.
|
||||
//
|
||||
// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2.
|
||||
// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2 and the text that
|
||||
// follows.
|
||||
//
|
||||
// Ideally this should be dispatched per platform to use optimized
|
||||
// instructions when available, such as POPCNT on modern x86/AMD. Our builds
|
||||
// still target and support older architectures that might lack support for
|
||||
// these, however. For example, with current build configurations,
|
||||
// __builtin_popcount(x) would generate a call to a similar but slower 64-bit
|
||||
// version of this 32-bit implementation.
|
||||
static uint32_t population_count(uint32_t x) {
|
||||
x -= ((x >> 1) & 0x55555555);
|
||||
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
||||
return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
|
||||
// these. For example, with current build configurations, __builtin_popcount(x)
|
||||
// generate a call to a similar but slower 64-bit version when calling with
|
||||
// a 32-bit integer type.
|
||||
template <typename T>
|
||||
inline unsigned population_count(T x) {
|
||||
STATIC_ASSERT(BitsPerWord <= 128);
|
||||
STATIC_ASSERT(BitsPerByte == 8);
|
||||
STATIC_ASSERT(IsIntegral<T>::value);
|
||||
STATIC_ASSERT(!IsSigned<T>::value);
|
||||
// We need to take care with implicit integer promotion when dealing with
|
||||
// integers < 32-bit. We chose to do this by explicitly widening constants
|
||||
// to unsigned
|
||||
typedef typename Conditional<(sizeof(T) < sizeof(unsigned)), unsigned, T>::type P;
|
||||
const T all = ~T(0); // 0xFF..FF
|
||||
const P fives = all/3; // 0x55..55
|
||||
const P threes = (all/15) * 3; // 0x33..33
|
||||
const P z_ones = all/255; // 0x0101..01
|
||||
const P z_effs = z_ones * 15; // 0x0F0F..0F
|
||||
P r = x;
|
||||
r -= ((r >> 1) & fives);
|
||||
r = (r & threes) + ((r >> 2) & threes);
|
||||
r = ((r + (r >> 4)) & z_effs) * z_ones;
|
||||
// The preceeding multiply by z_ones is the only place where the intermediate
|
||||
// calculations can exceed the range of T. We need to discard any such excess
|
||||
// before the right-shift, hence the conversion back to T.
|
||||
return static_cast<T>(r) >> (((sizeof(T) - 1) * BitsPerByte));
|
||||
}
|
||||
|
||||
#endif // SHARE_UTILITIES_POPULATION_COUNT_HPP
|
||||
|
@ -25,12 +25,13 @@
|
||||
#include "precompiled.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "utilities/population_count.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "unittest.hpp"
|
||||
|
||||
#define BITS_IN_BYTE_ARRAY_SIZE 256
|
||||
|
||||
uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
|
||||
const uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
@ -49,30 +50,49 @@ uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
TEST(population_count, sparse) {
|
||||
template <typename T>
|
||||
void sparse() {
|
||||
const T max_val = max_value<T>();
|
||||
|
||||
// Step through the entire input range from a random starting point,
|
||||
// verify population_count return values against the lookup table
|
||||
// approach used historically
|
||||
uint32_t step = 4711;
|
||||
for (uint32_t value = os::random() % step; value < UINT_MAX - step; value += step) {
|
||||
uint32_t lookup = test_popcnt_bitsInByte[(value >> 24) & 0xff] +
|
||||
test_popcnt_bitsInByte[(value >> 16) & 0xff] +
|
||||
test_popcnt_bitsInByte[(value >> 8) & 0xff] +
|
||||
test_popcnt_bitsInByte[ value & 0xff];
|
||||
T step = T(1) << ((sizeof(T) * 8) - 7);
|
||||
|
||||
for (T value = os::random() % step; value < max_val - step; value += step) {
|
||||
uint64_t v = (uint64_t)value;
|
||||
unsigned lookup = 0u;
|
||||
for (unsigned i = 0u; i < sizeof(T); i++) {
|
||||
lookup += test_popcnt_bitsInByte[v & 0xff];
|
||||
v >>= 8u;
|
||||
}
|
||||
EXPECT_EQ(lookup, population_count(value))
|
||||
<< "value = " << value;
|
||||
}
|
||||
|
||||
// Test a few edge cases
|
||||
EXPECT_EQ(0u, population_count(0u))
|
||||
EXPECT_EQ(0u, population_count(T(0u)))
|
||||
<< "value = " << 0;
|
||||
EXPECT_EQ(1u, population_count(1u))
|
||||
EXPECT_EQ(1u, population_count(T(1u)))
|
||||
<< "value = " << 1;
|
||||
EXPECT_EQ(1u, population_count(2u))
|
||||
EXPECT_EQ(1u, population_count(T(2u)))
|
||||
<< "value = " << 2;
|
||||
EXPECT_EQ(32u, population_count(UINT_MAX))
|
||||
<< "value = " << UINT_MAX;
|
||||
EXPECT_EQ(31u, population_count(UINT_MAX - 1))
|
||||
<< "value = " << (UINT_MAX - 1);
|
||||
EXPECT_EQ(T(sizeof(T) * BitsPerByte), population_count(max_val))
|
||||
<< "value = " << max_val;
|
||||
EXPECT_EQ(T(sizeof(T) * BitsPerByte - 1u), population_count(T(max_val - 1u)))
|
||||
<< "value = " << (max_val - 1u);
|
||||
}
|
||||
|
||||
|
||||
TEST(population_count, sparse8) {
|
||||
sparse<uint8_t>();
|
||||
}
|
||||
TEST(population_count, sparse16) {
|
||||
sparse<uint16_t>();
|
||||
}
|
||||
TEST(population_count, sparse32) {
|
||||
sparse<uint32_t>();
|
||||
}
|
||||
TEST(population_count, sparse64) {
|
||||
sparse<uint64_t>();
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user