8234331: Add robust and optimized utility for rounding up to next power of two

Reviewed-by: eosterlund, stuefe, jrose
2019-12-06 18:05:36 +01:00 · 2019-12-06 18:05:36 +01:00 · 06cb195865
commit 06cb195865
parent 60fda4e577
19 changed files with 551 additions and 173 deletions
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp
@ -23,9 +23,9 @@

 #include "precompiled.hpp"
 #include "gc/z/zGlobals.hpp"
-#include "gc/z/zUtils.inline.hpp"
 #include "runtime/globals.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"

 //
 // The heap can have three different layouts, depending on the max heap size.
@ -142,7 +142,7 @@ uintptr_t ZPlatformAddressBase() {
 size_t ZPlatformAddressOffsetBits() {
  const size_t min_address_offset_bits = 42; // 4TB
  const size_t max_address_offset_bits = 44; // 16TB
-  const size_t address_offset = ZUtils::round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
+  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
  const size_t address_offset_bits = log2_intptr(address_offset);
  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
 }
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp
@ -23,9 +23,9 @@

 #include "precompiled.hpp"
 #include "gc/z/zGlobals.hpp"
-#include "gc/z/zUtils.inline.hpp"
 #include "runtime/globals.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"

 //
 // The heap can have three different layouts, depending on the max heap size.
@ -142,7 +142,7 @@ uintptr_t ZPlatformAddressBase() {
 size_t ZPlatformAddressOffsetBits() {
  const size_t min_address_offset_bits = 42; // 4TB
  const size_t max_address_offset_bits = 44; // 16TB
-  const size_t address_offset = ZUtils::round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
+  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
  const size_t address_offset_bits = log2_intptr(address_offset);
  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
 }
--- a/src/hotspot/share/gc/z/zForwarding.cpp
+++ b/src/hotspot/share/gc/z/zForwarding.cpp
@ -24,9 +24,9 @@
 #include "precompiled.hpp"
 #include "gc/z/zForwarding.inline.hpp"
 #include "gc/z/zPage.inline.hpp"
-#include "gc/z/zUtils.inline.hpp"
 #include "memory/allocation.hpp"
 #include "utilities/debug.hpp"
+#include "utilities/powerOfTwo.hpp"

 ZForwarding* ZForwarding::create(ZPage* page) {
  // Allocate table for linear probing. The size of the table must be
@ -34,7 +34,7 @@ ZForwarding* ZForwarding::create(ZPage* page) {
  // The table is sized to have a load factor of 50%, i.e. sized to have
  // double the number of entries actually inserted.
  assert(page->live_objects() > 0, "Invalid value");
-  const size_t nentries = ZUtils::round_up_power_of_2(page->live_objects() * 2);
+  const size_t nentries = round_up_power_of_2(page->live_objects() * 2);
  return ::new (AttachedArray::alloc(nentries)) ZForwarding(page, nentries);
 }

--- a/src/hotspot/share/gc/z/zHeuristics.cpp
+++ b/src/hotspot/share/gc/z/zHeuristics.cpp
@ -25,10 +25,10 @@
 #include "gc/z/zCPU.inline.hpp"
 #include "gc/z/zGlobals.hpp"
 #include "gc/z/zHeuristics.hpp"
-#include "gc/z/zUtils.inline.hpp"
 #include "logging/log.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/os.hpp"
+#include "utilities/powerOfTwo.hpp"

 void ZHeuristics::set_medium_page_size() {
  // Set ZPageSizeMedium so that a medium page occupies at most 3.125% of the
@ -39,7 +39,7 @@ void ZHeuristics::set_medium_page_size() {
  const size_t max = ZGranuleSize * 16;
  const size_t unclamped = MaxHeapSize * 0.03125;
  const size_t clamped = MIN2(MAX2(min, unclamped), max);
-  const size_t size = ZUtils::round_down_power_of_2(clamped);
+  const size_t size = round_down_power_of_2(clamped);

  if (size > ZPageSizeSmall) {
    // Enable medium pages
--- a/src/hotspot/share/gc/z/zMark.cpp
+++ b/src/hotspot/share/gc/z/zMark.cpp
@ -48,6 +48,7 @@
 #include "runtime/thread.hpp"
 #include "utilities/align.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"
 #include "utilities/ticks.hpp"

 static const ZStatSubPhase ZSubPhaseConcurrentMark("Concurrent Mark");
@ -79,7 +80,7 @@ size_t ZMark::calculate_nstripes(uint nworkers) const {
  // Calculate the number of stripes from the number of workers we use,
  // where the number of stripes must be a power of two and we want to
  // have at least one worker per stripe.
-  const size_t nstripes = ZUtils::round_down_power_of_2(nworkers);
+  const size_t nstripes = round_down_power_of_2(nworkers);
  return MIN2(nstripes, ZMarkStripesMax);
 }

--- a/src/hotspot/share/gc/z/zUtils.inline.hpp
+++ b/src/hotspot/share/gc/z/zUtils.inline.hpp
@ -32,21 +32,6 @@
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"

-inline size_t ZUtils::round_up_power_of_2(size_t value) {
-  assert(value != 0, "Invalid value");
-
-  if (is_power_of_2(value)) {
-    return value;
-  }
-
-  return (size_t)1 << (log2_intptr(value) + 1);
-}
-
-inline size_t ZUtils::round_down_power_of_2(size_t value) {
-  assert(value != 0, "Invalid value");
-  return (size_t)1 << log2_intptr(value);
-}
-
 inline size_t ZUtils::bytes_to_words(size_t size_in_bytes) {
  assert(is_aligned(size_in_bytes, BytesPerWord), "Size not word aligned");
  return size_in_bytes >> LogBytesPerWord;
--- a/src/hotspot/share/libadt/dict.cpp
+++ b/src/hotspot/share/libadt/dict.cpp
@ -24,6 +24,7 @@

 #include "precompiled.hpp"
 #include "libadt/dict.hpp"
+#include "utilities/powerOfTwo.hpp"

 // Dictionaries - An Abstract Data Type

@ -86,8 +87,7 @@ Dict::Dict(CmpKey initcmp, Hash inithash, Arena *arena, int size)
    initflag = 1;               // Never again
  }

-  i=16;
-  while( i < size ) i <<= 1;
+  i = MAX2(16, round_up_power_of_2(size));
  _size = i;                    // Size is a power of 2
  _cnt = 0;                     // Dictionary is empty
  _bin = (bucket*)_arena->Amalloc_4(sizeof(bucket)*_size);
--- a/src/hotspot/share/libadt/vectset.cpp
+++ b/src/hotspot/share/libadt/vectset.cpp
@ -27,6 +27,7 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/arena.hpp"
 #include "utilities/count_leading_zeros.hpp"
+#include "utilities/powerOfTwo.hpp"

 VectorSet::VectorSet(Arena *arena) : _size(2),
    _data(NEW_ARENA_ARRAY(arena, uint32_t, 2)),
@ -38,8 +39,8 @@ VectorSet::VectorSet(Arena *arena) : _size(2),
 // Expand the existing set to a bigger size
 void VectorSet::grow(uint new_size) {
  new_size = (new_size + bit_mask) >> word_bits;
-  assert(new_size != 0 && new_size < (1U << 31), "");
-  uint x = (1U << 31) >> (count_leading_zeros(new_size) - 1);
+  assert(new_size > 0, "sanity");
+  uint x = next_power_of_2(new_size);
  _data = REALLOC_ARENA_ARRAY(_set_arena, uint32_t, _data, _size, x);
  Copy::zero_to_bytes(_data + _size, (x - _size) * sizeof(uint32_t));
  _size = x;
--- a/src/hotspot/share/opto/block.cpp
+++ b/src/hotspot/share/opto/block.cpp
@ -36,6 +36,7 @@
 #include "opto/opcodes.hpp"
 #include "opto/rootnode.hpp"
 #include "utilities/copy.hpp"
+#include "utilities/powerOfTwo.hpp"

 void Block_Array::grow( uint i ) {
  assert(i >= Max(), "must be an overflow");
@ -47,7 +48,7 @@ void Block_Array::grow( uint i ) {
    _blocks[0] = NULL;
  }
  uint old = _size;
-  while( i >= _size ) _size <<= 1;      // Double to fit
+  _size = next_power_of_2(i);
  _blocks = (Block**)_arena->Arealloc( _blocks, old*sizeof(Block*),_size*sizeof(Block*));
  Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );
 }
--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@ -3391,10 +3391,7 @@ void IdealLoopTree::verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent
 void PhaseIdealLoop::set_idom(Node* d, Node* n, uint dom_depth) {
  uint idx = d->_idx;
  if (idx >= _idom_size) {
-    uint newsize = _idom_size<<1;
-    while( idx >= newsize ) {
-      newsize <<= 1;
-    }
+    uint newsize = next_power_of_2(idx);
    _idom      = REALLOC_RESOURCE_ARRAY( Node*,     _idom,_idom_size,newsize);
    _dom_depth = REALLOC_RESOURCE_ARRAY( uint, _dom_depth,_idom_size,newsize);
    memset( _dom_depth + _idom_size, 0, (newsize - _idom_size) * sizeof(uint) );
--- a/src/hotspot/share/opto/node.cpp
+++ b/src/hotspot/share/opto/node.cpp
@ -41,6 +41,7 @@
 #include "opto/type.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/macros.hpp"
+#include "utilities/powerOfTwo.hpp"

 class RegMask;
 // #include "phase.hpp"
@ -653,7 +654,7 @@ void Node::grow( uint len ) {
    to[3] = NULL;
    return;
  }
-  while( new_max <= len ) new_max <<= 1; // Find next power-of-2
+  new_max = next_power_of_2(len);
  // Trimming to limit allows a uint8 to handle up to 255 edges.
  // Previously I was using only powers-of-2 which peaked at 128 edges.
  //if( new_max >= limit ) new_max = limit-1;
@ -676,7 +677,7 @@ void Node::out_grow( uint len ) {
    _out = (Node **)arena->Amalloc(4*sizeof(Node*));
    return;
  }
-  while( new_max <= len ) new_max <<= 1; // Find next power-of-2
+  new_max = next_power_of_2(len);
  // Trimming to limit allows a uint8 to handle up to 255 edges.
  // Previously I was using only powers-of-2 which peaked at 128 edges.
  //if( new_max >= limit ) new_max = limit-1;
@ -2256,7 +2257,7 @@ void Node_Array::grow( uint i ) {
    _nodes[0] = NULL;
  }
  uint old = _max;
-  while( i >= _max ) _max <<= 1;        // Double to fit
+  _max = next_power_of_2(i);
  _nodes = (Node**)_a->Arealloc( _nodes, old*sizeof(Node*),_max*sizeof(Node*));
  Copy::zero_to_bytes( &_nodes[old], (_max-old)*sizeof(Node*) );
 }
--- a/src/hotspot/share/opto/phaseX.cpp
+++ b/src/hotspot/share/opto/phaseX.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -39,6 +39,7 @@
 #include "opto/regalloc.hpp"
 #include "opto/rootnode.hpp"
 #include "utilities/macros.hpp"
+#include "utilities/powerOfTwo.hpp"

 //=============================================================================
 #define NODE_HASH_MINIMUM_SIZE    255
@ -260,12 +261,9 @@ bool NodeHash::hash_delete( const Node *n ) {

 //------------------------------round_up---------------------------------------
 // Round up to nearest power of 2
-uint NodeHash::round_up( uint x ) {
-  x += (x>>2);                  // Add 25% slop
-  if( x <16 ) return 16;        // Small stuff
-  uint i=16;
-  while( i < x ) i <<= 1;       // Double to fit
-  return i;                     // Return hash table size
+uint NodeHash::round_up(uint x) {
+  x += (x >> 2);                  // Add 25% slop
+  return MAX2(16U, round_up_power_of_2(x));
 }

 //------------------------------grow-------------------------------------------
@ -2138,7 +2136,7 @@ void Type_Array::grow( uint i ) {
    _types[0] = NULL;
  }
  uint old = _max;
-  while( i >= _max ) _max <<= 1;        // Double to fit
+  _max = next_power_of_2(i);
  _types = (const Type**)_a->Arealloc( _types, old*sizeof(Type*),_max*sizeof(Type*));
  memset( &_types[old], 0, (_max-old)*sizeof(Type*) );
 }
--- a/src/hotspot/share/runtime/threadSMR.cpp
+++ b/src/hotspot/share/runtime/threadSMR.cpp
@ -37,6 +37,7 @@
 #include "utilities/copy.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
+#include "utilities/powerOfTwo.hpp"
 #include "utilities/resourceHash.hpp"
 #include "utilities/vmError.hpp"

@ -809,13 +810,7 @@ void ThreadsSMRSupport::free_list(ThreadsList* threads) {

  // Hash table size should be first power of two higher than twice the length of the ThreadsList
  int hash_table_size = MIN2((int)get_java_thread_list()->length(), 32) << 1;
-  hash_table_size--;
-  hash_table_size |= hash_table_size >> 1;
-  hash_table_size |= hash_table_size >> 2;
-  hash_table_size |= hash_table_size >> 4;
-  hash_table_size |= hash_table_size >> 8;
-  hash_table_size |= hash_table_size >> 16;
-  hash_table_size++;
+  hash_table_size = round_up_power_of_2(hash_table_size);

  // Gather a hash table of the current hazard ptrs:
  ThreadScanHashtable *scan_table = new ThreadScanHashtable(hash_table_size);
@ -872,13 +867,7 @@ bool ThreadsSMRSupport::is_a_protected_JavaThread(JavaThread *thread) {
  // Hash table size should be first power of two higher than twice
  // the length of the Threads list.
  int hash_table_size = MIN2((int)get_java_thread_list()->length(), 32) << 1;
-  hash_table_size--;
-  hash_table_size |= hash_table_size >> 1;
-  hash_table_size |= hash_table_size >> 2;
-  hash_table_size |= hash_table_size >> 4;
-  hash_table_size |= hash_table_size >> 8;
-  hash_table_size |= hash_table_size >> 16;
-  hash_table_size++;
+  hash_table_size = round_up_power_of_2(hash_table_size);

  // Gather a hash table of the JavaThreads indirectly referenced by
  // hazard ptrs.
--- a/src/hotspot/share/utilities/count_leading_zeros.hpp
+++ b/src/hotspot/share/utilities/count_leading_zeros.hpp
@ -27,45 +27,190 @@

 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
-#include "utilities/count_trailing_zeros.hpp"

-#if defined(TARGET_COMPILER_visCPP)
-#include <intrin.h>
-#pragma intrinsic(_BitScanReverse)
-#elif defined(TARGET_COMPILER_xlc)
-#include <builtins.h>
-#endif
+// uint32_t count_leading_zeros(T x)

-// uint32_t count_leading_zeros(uint32_t x)
 // Return the number of leading zeros in x, e.g. the zero-based index
 // of the most significant set bit in x.  Undefined for 0.
-inline uint32_t count_leading_zeros(uint32_t x) {
-  assert(x != 0, "precondition");
+
+// We implement and support variants for 8, 16, 32 and 64 bit integral types.
+template <typename T, size_t n> struct CountLeadingZerosImpl;
+
+template <typename T> unsigned count_leading_zeros(T v) {
+  assert(v != 0, "precondition");
+  return CountLeadingZerosImpl<T, sizeof(T)>::doit(v);
+}
+
+/*****************************************************************************
+ * GCC and compatible (including Clang)
+ *****************************************************************************/
 #if defined(TARGET_COMPILER_gcc)
-  return __builtin_clz(x);
+
+template <typename T> struct CountLeadingZerosImpl<T, 1> {
+  static unsigned doit(T v) {
+    return __builtin_clz((uint32_t)v & 0xFF) - 24u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 2> {
+  static unsigned doit(T v) {
+    return __builtin_clz((uint32_t)v & 0xFFFF) - 16u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 4> {
+  static unsigned doit(T v) {
+    return __builtin_clz(v);
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 8> {
+  static unsigned doit(T v) {
+    return __builtin_clzll(v);
+  }
+};
+
+/*****************************************************************************
+ * Microsoft Visual Studio
+ *****************************************************************************/
 #elif defined(TARGET_COMPILER_visCPP)
-  unsigned long index;
-  _BitScanReverse(&index, x);
-  return index ^ 31u;
-#elif defined(TARGET_COMPILER_xlc)
-  return __cntlz4(x);
+
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+
+#ifdef _LP64
+#pragma intrinsic(_BitScanReverse64)
+#endif
+
+template <typename T> struct CountLeadingZerosImpl<T, 1> {
+  static unsigned doit(T v) {
+    unsigned long index;
+    _BitScanReverse(&index, (uint32_t)v & 0xFF);
+    return 7u - index;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 2> {
+  static unsigned doit(T v) {
+    unsigned long index;
+    _BitScanReverse(&index, (uint32_t)v & 0xFFFF);
+    return 15u - index;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 4> {
+  static unsigned doit(T v) {
+    unsigned long index;
+    _BitScanReverse(&index, v);
+    return 31u - index;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 8> {
+  static unsigned doit(T v) {
+    unsigned long index;
+#ifdef _LP64
+    _BitScanReverse64(&index, v);
+    return 63u - index;
 #else
+    uint64_t high = ((uint64_t)v) >> 32ULL;
+    if (high != 0) {
+      return count_leading_zeros((uint32_t)high);
+    } else {
+      return count_leading_zeros((uint32_t)v) + 32;
+    }
+#endif
+  }
+};
+
+/*****************************************************************************
+ * IBM XL C/C++
+ *****************************************************************************/
+#elif defined(TARGET_COMPILER_xlc)
+
+#include <builtins.h>
+
+template <typename T> struct CountLeadingZerosImpl<T, 1> {
+  static unsigned doit(T v) {
+    return __cntlz4((uint32_t)v & 0xFF) - 24u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 2> {
+  static unsigned doit(T v) {
+    return __cntlz4((uint32_t)v & 0xFFFF) - 16u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 4> {
+  static unsigned doit(T v) {
+    return __cntlz4(v);
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 8> {
+  static unsigned doit(T v) {
+    return __cntlz8(v);
+  }
+};
+
+/*****************************************************************************
+ * Fallback
+ *****************************************************************************/
+#else
+
+inline uint32_t count_leading_zeros_32(uint32_t x) {
+  assert(x != 0, "precondition");
+
  // Efficient and portable fallback implementation:
  // http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
  // - with positions xor'd by 31 to get number of leading zeros
  // rather than position of highest bit.
-  static const int MultiplyDeBruijnBitPosition[32] = {
-      31, 22, 30, 21, 18, 10, 29,  2, 20, 17, 15, 13, 9,  6, 28, 1,
-      23, 19, 11,  3, 16, 14,  7, 24, 12,  4,  8, 25, 5, 26, 27, 0
+  static const uint32_t MultiplyDeBruijnBitPosition[32] = {
+      31, 22, 30, 21, 18, 10, 29,  2, 20, 17, 15, 13, 9,  6, 28,  1,
+      23, 19, 11,  3, 16, 14,  7, 24, 12,  4,  8, 25, 5, 26, 27,  0
  };

-  x |= x >> 1; // first round down to one less than a power of 2
+  // First round down to one less than a power of 2
+  x |= x >> 1;
  x |= x >> 2;
  x |= x >> 4;
  x |= x >> 8;
  x |= x >> 16;
-  return MultiplyDeBruijnBitPosition[(uint32_t)( x * 0x07c4acddu ) >> 27];
-#endif
+  // Multiply by a magic constant which ensure the highest 5 bits point to
+  // the right index in the lookup table
+  return MultiplyDeBruijnBitPosition[(x * 0x07c4acddu) >> 27u];
 }

+template <typename T> struct CountLeadingZerosImpl<T, 1> {
+  static unsigned doit(T v) {
+    return count_leading_zeros_32((uint32_t)v & 0xFF) - 24u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 2> {
+  static unsigned doit(T v) {
+    return count_leading_zeros_32((uint32_t)v & 0xFFFF) - 16u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 4> {
+  static unsigned doit(T v) {
+    return count_leading_zeros_32(v);
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 8> {
+  static unsigned doit(T v) {
+    uint64_t high = ((uint64_t)v) >> 32ULL;
+    if (high != 0) {
+      return count_leading_zeros_32((uint32_t)high);
+    } else {
+      return count_leading_zeros_32((uint32_t)v) + 32u;
+    }
+  }
+};
+
+#endif
+
 #endif // SHARE_UTILITIES_COUNT_LEADING_ZEROS_HPP
--- a/src/hotspot/share/utilities/growableArray.hpp
+++ b/src/hotspot/share/utilities/growableArray.hpp
@ -30,6 +30,7 @@
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
+#include "utilities/powerOfTwo.hpp"

 // A growable array.

@ -476,10 +477,9 @@ template<class E> class GrowableArray : public GenericGrowableArray {
 // Global GrowableArray methods (one instance in the library per each 'E' type).

 template<class E> void GrowableArray<E>::grow(int j) {
-    // grow the array by doubling its size (amortized growth)
    int old_max = _max;
-    if (_max == 0) _max = 1; // prevent endless loop
-    while (j >= _max) _max = _max*2;
+    // grow the array by increasing _max to the first power of two larger than the size we need
+    _max = next_power_of_2((uint32_t)j);
    // j < _max
    E* newData = (E*)raw_allocate(sizeof(E));
    int i = 0;
--- a/src/hotspot/share/utilities/powerOfTwo.hpp
+++ b/src/hotspot/share/utilities/powerOfTwo.hpp
@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_UTILITIES_POWEROFTWO_HPP
+#define SHARE_UTILITIES_POWEROFTWO_HPP
+
+#include "metaprogramming/enableIf.hpp"
+#include "metaprogramming/isIntegral.hpp"
+#include "metaprogramming/isSigned.hpp"
+#include "utilities/count_leading_zeros.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Power of two convenience library.
+
+// Round down to the closest power of two greater to or equal to the given
+// value.
+
+// Signed version: 0 is an invalid input, negative values are invalid
+template <typename T>
+inline typename EnableIf<IsSigned<T>::value, T>::type round_down_power_of_2(T value) {
+  STATIC_ASSERT(IsIntegral<T>::value);
+  assert(value > 0, "Invalid value");
+  uint32_t lz = count_leading_zeros(value);
+  assert(lz < sizeof(T) * BitsPerByte, "Sanity");
+  return T(1) << (sizeof(T) * BitsPerByte - 1 - lz);
+}
+
+// Unsigned version: 0 is an invalid input
+template <typename T>
+inline typename EnableIf<!IsSigned<T>::value, T>::type round_down_power_of_2(T value) {
+  STATIC_ASSERT(IsIntegral<T>::value);
+  assert(value != 0, "Invalid value");
+  uint32_t lz = count_leading_zeros(value);
+  assert(lz < sizeof(T) * BitsPerByte, "Sanity");
+  return T(1) << (sizeof(T) * BitsPerByte - 1 - lz);
+}
+
+// Round up to the closest power of two greater to or equal to
+// the given value.
+
+// Signed version: 0 is an invalid input, negative values are invalid,
+// overflows with assert if value is larger than 2^30 or 2^62 for 32- and
+// 64-bit integers, respectively
+template <typename T>
+inline typename EnableIf<IsSigned<T>::value, T>::type round_up_power_of_2(T value) {
+  STATIC_ASSERT(IsIntegral<T>::value);
+  STATIC_ASSERT(IsSigned<T>::value);
+  assert(value > 0, "Invalid value");
+  if (is_power_of_2(value)) {
+    return value;
+  }
+  uint32_t lz = count_leading_zeros(value);
+  assert(lz < sizeof(T) * BitsPerByte, "Sanity");
+  assert(lz > 1, "Will overflow");
+  return T(1) << (sizeof(T) * BitsPerByte - lz);
+}
+
+// Unsigned version: 0 is an invalid input, overflows with assert if value
+// is larger than 2^31 or 2^63 for 32- and 64-bit integers, respectively
+template <typename T>
+inline typename EnableIf<!IsSigned<T>::value, T>::type round_up_power_of_2(T value) {
+  STATIC_ASSERT(IsIntegral<T>::value);
+  STATIC_ASSERT(!IsSigned<T>::value);
+  assert(value != 0, "Invalid value");
+  if (is_power_of_2(value)) {
+    return value;
+  }
+  uint32_t lz = count_leading_zeros(value);
+  assert(lz < sizeof(T) * BitsPerByte, "Sanity");
+  assert(lz > 0, "Will overflow");
+  return T(1) << (sizeof(T) * BitsPerByte - lz);
+}
+
+// Helper function to get the maximum positive value. Implemented here
+// since using std::numeric_limits<T>::max() seems problematic on some
+// platforms.
+
+template <typename T> T max_value() {
+  if (IsSigned<T>::value) {
+    // Highest positive power of two expressible in the type
+    uint64_t val = static_cast<T>(1) << (sizeof(T) * BitsPerByte - 2);
+    // Fill lower bits with ones
+    val |= val >> 1;
+    val |= val >> 2;
+    val |= val >> 4;
+    if (sizeof(T) >= 2)  val |= val >> 8;
+    if (sizeof(T) >= 4)  val |= val >> 16;
+    if (sizeof(T) == 8)  val |= val >> 32;
+    return (T)val;
+  } else {
+    return ~(static_cast<T>(0));
+  }
+}
+
+// Calculate the next power of two greater than the given value.
+
+// Accepts 0 (returns 1), overflows with assert if value is larger than
+// or equal to 2^31 (signed: 2^30) or 2^63 (signed: 2^62), for 32-
+// and 64-bit integers, respectively
+template <typename T>
+inline T next_power_of_2(T value)  {
+  assert(value != max_value<T>(), "Overflow");
+  return round_up_power_of_2(value + 1);
+}
+
+#endif // SHARE_UTILITIES_POWEROFTWO_HPP
--- a/test/hotspot/gtest/gc/z/test_zUtils.cpp
+++ b/test/hotspot/gtest/gc/z/test_zUtils.cpp
@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "precompiled.hpp"
-#include "gc/z/zUtils.inline.hpp"
-#include "unittest.hpp"
-
-#include <limits>
-
-template <typename T>
-static T max_alignment() {
-  T max = std::numeric_limits<T>::max();
-  return max ^ (max >> 1);
-}
-
-TEST(ZUtilsTest, round_up_power_of_2) {
-  EXPECT_EQ(ZUtils::round_up_power_of_2(1u), 1u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(2u), 2u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(3u), 4u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(4u), 4u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(5u), 8u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(6u), 8u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(7u), 8u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(8u), 8u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(9u), 16u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(10u), 16u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(1023u), 1024u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(1024u), 1024u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(1025u), 2048u);
-
-  const size_t max = max_alignment<size_t>();
-  EXPECT_EQ(ZUtils::round_up_power_of_2(max - 1), max);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(max), max);
-}
-
-TEST(ZUtilsTest, round_down_power_of_2) {
-  EXPECT_EQ(ZUtils::round_down_power_of_2(1u), 1u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(2u), 2u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(3u), 2u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(4u), 4u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(5u), 4u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(6u), 4u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(7u), 4u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(8u), 8u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(9u), 8u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(10u), 8u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(1023u), 512u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(1024u), 1024u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(1025u), 1024u);
-
-  const size_t max = max_alignment<size_t>();
-  EXPECT_EQ(ZUtils::round_down_power_of_2(max), max);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(max - 1), max / 2);
-}
--- a/test/hotspot/gtest/utilities/test_count_leading_zeros.cpp
+++ b/test/hotspot/gtest/utilities/test_count_leading_zeros.cpp
@ -23,35 +23,78 @@
 */

 #include "precompiled.hpp"
+#include "metaprogramming/isSigned.hpp"
 #include "utilities/count_leading_zeros.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "unittest.hpp"

-TEST(count_leading_zeros, one_or_two_set_bits) {
-  unsigned i = 0;                  // Position of a set bit.
-  for (uint32_t ix = 1; ix != 0; ix <<= 1, ++i) {
-    unsigned j = 0;                // Position of a set bit.
-    for (uint32_t jx = 1; jx != 0; jx <<= 1, ++j) {
-      uint32_t value = ix | jx;
-      EXPECT_EQ(31u - MAX2(i, j), count_leading_zeros(value))
+#include <limits>
+
+template <typename T> void one_or_two_set_bits() {
+  uint32_t bit1_pos = 0;
+  uint32_t bits = sizeof(T) * BitsPerByte;
+  uint32_t limit = bits - (IsSigned<T>::value ? 1 : 0);
+  for (uint64_t ix = 1; bit1_pos < limit; ix = ix * 2, ++bit1_pos) {
+    uint32_t bit2_pos = 0;
+    for (uint64_t jx = 1; bit2_pos < limit; jx = jx * 2, ++bit2_pos) {
+      T value = T(ix | jx);
+      EXPECT_EQ((uint32_t)(bits - 1u - MAX2(bit1_pos, bit2_pos)), count_leading_zeros(value))
        << "value = " << value;
    }
  }
 }

-TEST(count_leading_zeros, high_zeros_low_ones) {
-  unsigned i = 0;                  // Number of leading zeros
-  uint32_t value = ~(uint32_t)0;
-  for ( ; value != 0; value >>= 1, ++i) {
-    EXPECT_EQ(i, count_leading_zeros(value))
+TEST(count_leading_zeros, one_or_two_set_bits) {
+  one_or_two_set_bits<int8_t>();
+  one_or_two_set_bits<int16_t>();
+  one_or_two_set_bits<int32_t>();
+  one_or_two_set_bits<int64_t>();
+  one_or_two_set_bits<uint8_t>();
+  one_or_two_set_bits<uint16_t>();
+  one_or_two_set_bits<uint32_t>();
+  one_or_two_set_bits<uint64_t>();
+}
+
+template <typename T> void high_zeros_low_ones() {
+  uint32_t number_of_leading_zeros = (IsSigned<T>::value ? 1 : 0);
+  T value = std::numeric_limits<T>::max();
+  for ( ; value != 0; value >>= 1, ++number_of_leading_zeros) {
+    EXPECT_EQ(number_of_leading_zeros, count_leading_zeros(value))
      << "value = " << value;
  }
 }

-TEST(count_leading_zeros, high_ones_low_zeros) {
-  uint32_t value = ~(uint32_t)0;
-  for ( ; value != 0; value <<= 1) {
-    EXPECT_EQ(0u, count_leading_zeros(value))
+TEST(count_leading_zeros, high_zeros_low_ones) {
+  high_zeros_low_ones<int8_t>();
+  high_zeros_low_ones<int16_t>();
+  high_zeros_low_ones<int32_t>();
+  high_zeros_low_ones<int64_t>();
+  high_zeros_low_ones<uint8_t>();
+  high_zeros_low_ones<uint16_t>();
+  high_zeros_low_ones<uint32_t>();
+  high_zeros_low_ones<uint64_t>();
+}
+
+template <typename T> void high_ones_low_zeros() {
+  T value = std::numeric_limits<T>::max();
+
+  uint32_t number_of_leading_zeros = (IsSigned<T>::value ? 1 : 0);
+  for (uint64_t i = 1; value != 0; value -= i, i <<= 1) {
+    EXPECT_EQ(number_of_leading_zeros, count_leading_zeros(value))
      << "value = " << value;
  }
+  value = (T)(~((uint64_t)0)); // all ones
+  EXPECT_EQ(0u, count_leading_zeros(value))
+    << "value = " << value;
 }
+
+TEST(count_leading_zeros, high_ones_low_zeros) {
+  high_ones_low_zeros<int8_t>();
+  high_ones_low_zeros<int16_t>();
+  high_ones_low_zeros<int32_t>();
+  high_ones_low_zeros<int64_t>();
+  high_ones_low_zeros<uint8_t>();
+  high_ones_low_zeros<uint16_t>();
+  high_ones_low_zeros<uint32_t>();
+  high_ones_low_zeros<uint64_t>();
+}
--- a/test/hotspot/gtest/utilities/test_powerOfTwo.cpp
+++ b/test/hotspot/gtest/utilities/test_powerOfTwo.cpp
@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICUAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"
+#include "unittest.hpp"
+
+template <typename T> T max_pow2() {
+  T max_val = max_value<T>();
+  return max_val - (max_val >> 1);
+}
+
+template <typename T> void round_up_power_of_2() {
+  EXPECT_EQ(round_up_power_of_2(T(1)), T(1)) << "value = " << T(1);
+  EXPECT_EQ(round_up_power_of_2(T(2)), T(2)) << "value = " << T(2);
+  EXPECT_EQ(round_up_power_of_2(T(3)), T(4)) << "value = " << T(3);
+  EXPECT_EQ(round_up_power_of_2(T(4)), T(4)) << "value = " << T(4);
+  EXPECT_EQ(round_up_power_of_2(T(5)), T(8)) << "value = " << T(5);
+  EXPECT_EQ(round_up_power_of_2(T(6)), T(8)) << "value = " << T(6);
+  EXPECT_EQ(round_up_power_of_2(T(7)), T(8)) << "value = " << T(7);
+  EXPECT_EQ(round_up_power_of_2(T(8)), T(8)) << "value = " << T(8);
+  EXPECT_EQ(round_up_power_of_2(T(9)), T(16)) << "value = " << T(9);
+  EXPECT_EQ(round_up_power_of_2(T(10)), T(16)) << "value = " << T(10);
+
+  T t_max_pow2 = max_pow2<T>();
+
+  // round_up(any power of two) should return input
+  for (T pow2 = T(1); pow2 < t_max_pow2; pow2 *= 2) {
+    EXPECT_EQ(pow2, round_up_power_of_2(pow2))
+      << "value = " << pow2;
+  }
+  EXPECT_EQ(round_up_power_of_2(t_max_pow2), t_max_pow2)
+    << "value = " << (t_max_pow2);
+
+  // For each pow2 gt 2, round_up(pow2 - 1) should return pow2
+  for (T pow2 = T(4); pow2 < t_max_pow2; pow2 *= 2) {
+    EXPECT_EQ(pow2, round_up_power_of_2(pow2 - 1))
+      << "value = " << pow2;
+  }
+  EXPECT_EQ(round_up_power_of_2(t_max_pow2 - 1), t_max_pow2)
+    << "value = " << (t_max_pow2 - 1);
+
+}
+
+TEST(power_of_2, round_up_power_of_2) {
+  round_up_power_of_2<int8_t>();
+  round_up_power_of_2<int16_t>();
+  round_up_power_of_2<int32_t>();
+  round_up_power_of_2<int64_t>();
+  round_up_power_of_2<uint8_t>();
+  round_up_power_of_2<uint16_t>();
+  round_up_power_of_2<uint32_t>();
+  round_up_power_of_2<uint64_t>();
+}
+
+template <typename T> void round_down_power_of_2() {
+  EXPECT_EQ(round_down_power_of_2(T(1)), T(1)) << "value = " << T(1);
+  EXPECT_EQ(round_down_power_of_2(T(2)), T(2)) << "value = " << T(2);
+  EXPECT_EQ(round_down_power_of_2(T(3)), T(2)) << "value = " << T(3);
+  EXPECT_EQ(round_down_power_of_2(T(4)), T(4)) << "value = " << T(4);
+  EXPECT_EQ(round_down_power_of_2(T(5)), T(4)) << "value = " << T(5);
+  EXPECT_EQ(round_down_power_of_2(T(6)), T(4)) << "value = " << T(6);
+  EXPECT_EQ(round_down_power_of_2(T(7)), T(4)) << "value = " << T(7);
+  EXPECT_EQ(round_down_power_of_2(T(8)), T(8)) << "value = " << T(8);
+  EXPECT_EQ(round_down_power_of_2(T(9)), T(8)) << "value = " << T(9);
+  EXPECT_EQ(round_down_power_of_2(T(10)), T(8)) << "value = " << T(10);
+
+  T t_max_pow2 = max_pow2<T>();
+
+  // For each pow2 >= 2:
+  // - round_down(pow2) should return pow2
+  // - round_down(pow2 + 1) should return pow2
+  // - round_down(pow2 - 1) should return pow2 / 2
+  for (T pow2 = T(2); pow2 < t_max_pow2; pow2 = pow2 * 2) {
+    EXPECT_EQ(pow2, round_down_power_of_2(pow2))
+      << "value = " << pow2;
+    EXPECT_EQ(pow2, round_down_power_of_2(pow2 + 1))
+      << "value = " << pow2;
+    EXPECT_EQ(pow2 / 2, round_down_power_of_2(pow2 - 1))
+      << "value = " << (pow2 / 2);
+  }
+  EXPECT_EQ(round_down_power_of_2(t_max_pow2), t_max_pow2)
+    << "value = " << (t_max_pow2);
+  EXPECT_EQ(round_down_power_of_2(t_max_pow2 + 1), t_max_pow2)
+    << "value = " << (t_max_pow2 + 1);
+  EXPECT_EQ(round_down_power_of_2(t_max_pow2 - 1), t_max_pow2 / 2)
+    << "value = " << (t_max_pow2 - 1);
+}
+
+TEST(power_of_2, round_down_power_of_2) {
+  round_down_power_of_2<int8_t>();
+  round_down_power_of_2<int16_t>();
+  round_down_power_of_2<int32_t>();
+  round_down_power_of_2<int64_t>();
+  round_down_power_of_2<uint8_t>();
+  round_down_power_of_2<uint16_t>();
+  round_down_power_of_2<uint32_t>();
+  round_down_power_of_2<uint64_t>();
+}
+
+template <typename T> void next_power_of_2() {
+  EXPECT_EQ(next_power_of_2(T(0)), T(1)) << "value = " << T(0);
+  EXPECT_EQ(next_power_of_2(T(1)), T(2)) << "value = " << T(1);
+  EXPECT_EQ(next_power_of_2(T(2)), T(4)) << "value = " << T(2);
+  EXPECT_EQ(next_power_of_2(T(3)), T(4)) << "value = " << T(3);
+  EXPECT_EQ(next_power_of_2(T(4)), T(8)) << "value = " << T(4);
+  EXPECT_EQ(next_power_of_2(T(5)), T(8)) << "value = " << T(5);
+  EXPECT_EQ(next_power_of_2(T(6)), T(8)) << "value = " << T(6);
+  EXPECT_EQ(next_power_of_2(T(7)), T(8)) << "value = " << T(7);
+  EXPECT_EQ(next_power_of_2(T(8)), T(16)) << "value = " << T(8);
+  EXPECT_EQ(next_power_of_2(T(9)), T(16)) << "value = " << T(9);
+  EXPECT_EQ(next_power_of_2(T(10)), T(16)) << "value = " << T(10);
+
+  T t_max_pow2 = max_pow2<T>();
+
+  // next(pow2 - 1) should return pow2
+  for (T pow2 = T(1); pow2 < t_max_pow2; pow2 = pow2 * 2) {
+    EXPECT_EQ(pow2, next_power_of_2(pow2 - 1))
+      << "value = " << pow2 - 1;
+  }
+  EXPECT_EQ(next_power_of_2(t_max_pow2 - 1), t_max_pow2)
+    << "value = " << (t_max_pow2 - 1);
+
+  // next(pow2) should return pow2 * 2
+  for (T pow2 = T(1); pow2 < t_max_pow2 / 2; pow2 = pow2 * 2) {
+    EXPECT_EQ(pow2 * 2, next_power_of_2(pow2))
+      << "value = " << pow2;
+  }
+}
+
+TEST(power_of_2, next_power_of_2) {
+  next_power_of_2<int8_t>();
+  next_power_of_2<int16_t>();
+  next_power_of_2<int32_t>();
+  next_power_of_2<int64_t>();
+  next_power_of_2<uint8_t>();
+  next_power_of_2<uint16_t>();
+  next_power_of_2<uint32_t>();
+  next_power_of_2<uint64_t>();
+}