8153340: Disallow misconfiguration and improve the consistency of allocation prefetching

Improve allocation prefetching.

Reviewed-by: kvn
This commit is contained in:
Zoltan Majo 2016-04-29 08:32:42 +02:00
parent 3a358f4cff
commit 413417522f
11 changed files with 73 additions and 104 deletions

View File

@ -49,9 +49,11 @@ void VM_Version::initialize() {
AllocatePrefetchDistance = allocate_prefetch_distance();
AllocatePrefetchStyle = allocate_prefetch_style();
if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
warning("BIS instructions are not available on this CPU");
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
if (!has_blk_init()) {
if (AllocatePrefetchInstr == 1) {
warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable");
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
}
}
UseSSE = 0; // Only on x86 and x64
@ -88,11 +90,13 @@ void VM_Version::initialize() {
if (has_blk_init() && UseTLAB &&
FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
// Use BIS instruction for TLAB allocation prefetch.
FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1);
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3);
}
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
}
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
if (AllocatePrefetchInstr == 0) {
// Use different prefetch distance without BIS
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
} else {
// Use smaller prefetch distance with BIS
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
}
@ -107,25 +111,14 @@ void VM_Version::initialize() {
FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2);
}
}
if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
// Use different prefetch distance without BIS
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
}
if (AllocatePrefetchInstr == 1) {
// Need extra space at the end of TLAB for BIS, otherwise prefetching
// instructions will fault (due to accessing memory outside of heap).
// The amount of space is the max of the number of lines to
// prefetch for array and for instance allocations. (Extra space must be
// reserved to accomodate both types of allocations.)
// +1 for rounding up to next cache line, +1 to be safe
int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
int step_size = AllocatePrefetchStepSize;
int distance = AllocatePrefetchDistance;
_reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;
}
}
#endif
if (AllocatePrefetchInstr == 1) {
// Use allocation prefetch style 3 because BIS instructions
// require aligned memory addresses.
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
}
#endif /* COMPILER2 */
}
// Use hardware population count instruction if available.

View File

@ -324,8 +324,9 @@ public class VM {
Address vmInternalInfoAddr = vmVersion.getAddressField("_s_internal_vm_info_string").getValue();
vmInternalInfo = CStringUtilities.getString(vmInternalInfoAddr);
Type threadLocalAllocBuffer = db.lookupType("ThreadLocalAllocBuffer");
CIntegerType intType = (CIntegerType) db.lookupType("int");
CIntegerField reserveForAllocationPrefetchField = vmVersion.getCIntegerField("_reserve_for_allocation_prefetch");
CIntegerField reserveForAllocationPrefetchField = threadLocalAllocBuffer.getCIntegerField("_reserve_for_allocation_prefetch");
reserveForAllocationPrefetch = (int)reserveForAllocationPrefetchField.getCInteger(intType);
} catch (Exception exp) {
throw new RuntimeException("can't determine target's VM version : " + exp.getMessage());

View File

@ -36,6 +36,7 @@
// static member initialization
size_t ThreadLocalAllocBuffer::_max_size = 0;
int ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0;
unsigned ThreadLocalAllocBuffer::_target_refills = 0;
GlobalTLABStats* ThreadLocalAllocBuffer::_global_stats = NULL;
@ -215,6 +216,23 @@ void ThreadLocalAllocBuffer::startup_initialization() {
_global_stats = new GlobalTLABStats();
// Need extra space at the end of TLAB, otherwise prefetching
// instructions will fault (due to accessing memory outside of heap).
// The amount of space is the max of the number of lines to
// prefetch for array and for instance allocations. (Extra space must be
// reserved to accommodate both types of allocations.)
//
// Only SPARC-specific BIS instructions are known to fault. (Those
// instructions are generated if AllocatePrefetchStyle==3 and
// AllocatePrefetchInstr==1). To be on the safe side, however,
// extra space is reserved for all combinations of
// AllocatePrefetchStyle and AllocatePrefetchInstr.
// +1 for rounding up to next cache line, +1 to be safe
int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
_reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) /
(int)HeapWordSize;
// During jvm startup, the main (primordial) thread is initialized
// before the heap is initialized. So reinitialize it now.
guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");

View File

@ -49,8 +49,9 @@ private:
size_t _refill_waste_limit; // hold onto tlab if free() is larger than this
size_t _allocated_before_last_gc; // total bytes allocated up until the last gc
static size_t _max_size; // maximum size of any TLAB
static unsigned _target_refills; // expected number of refills between GCs
static size_t _max_size; // maximum size of any TLAB
static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB
static unsigned _target_refills; // expected number of refills between GCs
unsigned _number_of_refills;
unsigned _fast_refill_waste;
@ -129,7 +130,7 @@ public:
// Reserve space at the end of TLAB
static size_t end_reserve() {
int reserve_size = typeArrayOopDesc::header_size(T_INT);
return MAX2(reserve_size, VM_Version::reserve_for_allocation_prefetch());
return MAX2(reserve_size, _reserve_for_allocation_prefetch);
}
static size_t alignment_reserve() { return align_object_size(end_reserve()); }
static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; }

View File

@ -1897,7 +1897,7 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
Node *prefetch_adr;
Node *prefetch;
uint lines = AllocatePrefetchDistance / AllocatePrefetchStepSize;
uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
uint step_size = AllocatePrefetchStepSize;
uint distance = 0;
@ -1926,12 +1926,8 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
contended_phi_rawmem = pf_phi_rawmem;
i_o = pf_phi_abio;
} else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
// Insert a prefetch for each allocation.
// This code is used for Sparc with BIS.
Node *pf_region = new RegionNode(3);
Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY,
TypeRawPtr::BOTTOM );
transform_later(pf_region);
// Insert a prefetch instruction for each allocation.
// This code is used for SPARC with BIS.
// Generate several prefetch instructions.
uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
@ -1940,10 +1936,15 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
// Next cache address.
Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
_igvn.MakeConX(distance));
_igvn.MakeConX(step_size + distance));
transform_later(cache_adr);
cache_adr = new CastP2XNode(needgc_false, cache_adr);
transform_later(cache_adr);
// For BIS instructions to be emitted, the address must be aligned at cache line size.
// (The VM sets AllocatePrefetchStepSize to the cache line size, unless a value is
// specified at the command line.) If the address is not aligned at cache line size
// boundary, a standard store instruction is triggered (instead of the BIS). For the
// latter, 8-byte alignment is necessary.
Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
cache_adr = new AndXNode(cache_adr, mask);
transform_later(cache_adr);

View File

@ -90,16 +90,29 @@ Flag::Error CICompilerCountConstraintFunc(intx value, bool verbose) {
}
Flag::Error AllocatePrefetchDistanceConstraintFunc(intx value, bool verbose) {
if (value < 0) {
if (value < 0 || value > 512) {
CommandLineError::print(verbose,
"Unable to determine system-specific value for AllocatePrefetchDistance. "
"Please provide appropriate value, if unsure, use 0 to disable prefetching\n");
"AllocatePrefetchDistance (" INTX_FORMAT ") must be "
"between 0 and " INTX_FORMAT "\n",
AllocatePrefetchDistance, 512);
return Flag::VIOLATES_CONSTRAINT;
}
return Flag::SUCCESS;
}
Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) {
if (AllocatePrefetchStyle == 3) {
if (value % wordSize != 0) {
CommandLineError::print(verbose,
"AllocatePrefetchStepSize (" INTX_FORMAT ") must be multiple of %d\n",
value, wordSize);
return Flag::VIOLATES_CONSTRAINT;
}
}
return Flag::SUCCESS;
}
Flag::Error AllocatePrefetchInstrConstraintFunc(intx value, bool verbose) {
intx max_value = max_intx;
#if defined(SPARC)
@ -117,49 +130,6 @@ Flag::Error AllocatePrefetchInstrConstraintFunc(intx value, bool verbose) {
return Flag::SUCCESS;
}
Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) {
intx max_value = 512;
if (value < 1 || value > max_value) {
CommandLineError::print(verbose,
"AllocatePrefetchStepSize (" INTX_FORMAT ") "
"must be between 1 and %d\n",
AllocatePrefetchStepSize,
max_value);
return Flag::VIOLATES_CONSTRAINT;
}
if (AllocatePrefetchDistance % AllocatePrefetchStepSize != 0) {
CommandLineError::print(verbose,
"AllocatePrefetchDistance (" INTX_FORMAT ") "
"%% AllocatePrefetchStepSize (" INTX_FORMAT ") "
"= " INTX_FORMAT " "
"must be 0\n",
AllocatePrefetchDistance, AllocatePrefetchStepSize,
AllocatePrefetchDistance % AllocatePrefetchStepSize);
return Flag::VIOLATES_CONSTRAINT;
}
/* The limit of 64 for the quotient of AllocatePrefetchDistance and AllocatePrefetchSize
* originates from the limit of 64 for AllocatePrefetchLines/AllocateInstancePrefetchLines.
* If AllocatePrefetchStyle == 2, the quotient from above is used in PhaseMacroExpand::prefetch_allocation()
* to determine the number of lines to prefetch. For other values of AllocatePrefetchStyle,
* AllocatePrefetchDistance and AllocatePrefetchSize is used. For consistency, all these
* quantities must have the same limit (64 in this case).
*/
if (AllocatePrefetchDistance / AllocatePrefetchStepSize > 64) {
CommandLineError::print(verbose,
"AllocatePrefetchDistance (" INTX_FORMAT ") too large or "
"AllocatePrefetchStepSize (" INTX_FORMAT ") too small; "
"try decreasing/increasing values so that "
"AllocatePrefetchDistance / AllocatePrefetchStepSize <= 64\n",
AllocatePrefetchDistance, AllocatePrefetchStepSize,
AllocatePrefetchDistance % AllocatePrefetchStepSize);
return Flag::VIOLATES_CONSTRAINT;
}
return Flag::SUCCESS;
}
Flag::Error CompileThresholdConstraintFunc(intx value, bool verbose) {
if (value < 0 || value > INT_MAX >> InvocationCounter::count_shift) {
CommandLineError::print(verbose,

View File

@ -2901,9 +2901,9 @@ public:
\
product(intx, AllocatePrefetchStyle, 1, \
"0 = no prefetch, " \
"1 = prefetch instructions for each allocation, " \
"1 = generate prefetch instructions for each allocation, " \
"2 = use TLAB watermark to gate allocation prefetch, " \
"3 = use BIS instruction on Sparc for allocation prefetch") \
"3 = generate one prefetch instruction per cache line") \
range(0, 3) \
\
product(intx, AllocatePrefetchDistance, -1, \
@ -2926,8 +2926,8 @@ public:
constraint(AllocatePrefetchStepSizeConstraintFunc,AfterMemoryInit)\
\
product(intx, AllocatePrefetchInstr, 0, \
"Prefetch instruction to prefetch ahead of allocation pointer") \
constraint(AllocatePrefetchInstrConstraintFunc, AfterErgo) \
"Select instruction to prefetch ahead of allocation pointer") \
constraint(AllocatePrefetchInstrConstraintFunc, AfterMemoryInit) \
\
/* deoptimization */ \
develop(bool, TraceDeoptimization, false, \

View File

@ -600,6 +600,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
nonstatic_field(ThreadLocalAllocBuffer, _pf_top, HeapWord*) \
nonstatic_field(ThreadLocalAllocBuffer, _desired_size, size_t) \
nonstatic_field(ThreadLocalAllocBuffer, _refill_waste_limit, size_t) \
static_field(ThreadLocalAllocBuffer, _reserve_for_allocation_prefetch, int) \
static_field(ThreadLocalAllocBuffer, _target_refills, unsigned) \
nonstatic_field(ThreadLocalAllocBuffer, _number_of_refills, unsigned) \
nonstatic_field(ThreadLocalAllocBuffer, _fast_refill_waste, unsigned) \
@ -1318,7 +1319,6 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
static_field(Abstract_VM_Version, _vm_minor_version, int) \
static_field(Abstract_VM_Version, _vm_security_version, int) \
static_field(Abstract_VM_Version, _vm_build_number, int) \
static_field(Abstract_VM_Version, _reserve_for_allocation_prefetch, int) \
\
static_field(JDK_Version, _current, JDK_Version) \
nonstatic_field(JDK_Version, _major, unsigned char) \

View File

@ -43,7 +43,6 @@ bool Abstract_VM_Version::_supports_atomic_getadd4 = false;
bool Abstract_VM_Version::_supports_atomic_getadd8 = false;
unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U;
unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0;
int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0;
#ifndef HOTSPOT_VERSION_STRING
#error HOTSPOT_VERSION_STRING must be defined

View File

@ -57,7 +57,6 @@ class Abstract_VM_Version: AllStatic {
static int _vm_build_number;
static unsigned int _parallel_worker_threads;
static bool _parallel_worker_threads_initialized;
static int _reserve_for_allocation_prefetch;
static unsigned int nof_parallel_worker_threads(unsigned int num,
unsigned int dem,
@ -139,12 +138,6 @@ class Abstract_VM_Version: AllStatic {
return _L1_data_cache_line_size;
}
// Need a space at the end of TLAB for prefetch instructions
// which may fault when accessing memory outside of heap.
static int reserve_for_allocation_prefetch() {
return _reserve_for_allocation_prefetch;
}
// ARCH specific policy for the BiasedLocking
static bool use_biased_locking() { return true; }

View File

@ -89,13 +89,6 @@ public class TestOptionsWithRanges {
*/
excludeTestMaxRange("CICompilerCount");
/*
* JDK-8153340
* Temporary exclude AllocatePrefetchDistance option from testing
*/
excludeTestRange("AllocatePrefetchDistance");
/*
* JDK-8136766
* Temporarily remove ThreadStackSize from testing because Windows can set it to 0