8153340: Disallow misconfiguration and improve the consistency of allocation prefetching
Improve allocation prefetching. Reviewed-by: kvn
This commit is contained in:
parent
3a358f4cff
commit
413417522f
@ -49,9 +49,11 @@ void VM_Version::initialize() {
|
||||
AllocatePrefetchDistance = allocate_prefetch_distance();
|
||||
AllocatePrefetchStyle = allocate_prefetch_style();
|
||||
|
||||
if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
|
||||
warning("BIS instructions are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
|
||||
if (!has_blk_init()) {
|
||||
if (AllocatePrefetchInstr == 1) {
|
||||
warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable");
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
UseSSE = 0; // Only on x86 and x64
|
||||
@ -88,11 +90,13 @@ void VM_Version::initialize() {
|
||||
if (has_blk_init() && UseTLAB &&
|
||||
FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
|
||||
// Use BIS instruction for TLAB allocation prefetch.
|
||||
FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1);
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
|
||||
FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
||||
if (AllocatePrefetchInstr == 0) {
|
||||
// Use different prefetch distance without BIS
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
|
||||
} else {
|
||||
// Use smaller prefetch distance with BIS
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
|
||||
}
|
||||
@ -107,25 +111,14 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2);
|
||||
}
|
||||
}
|
||||
if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
||||
// Use different prefetch distance without BIS
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
|
||||
}
|
||||
if (AllocatePrefetchInstr == 1) {
|
||||
// Need extra space at the end of TLAB for BIS, otherwise prefetching
|
||||
// instructions will fault (due to accessing memory outside of heap).
|
||||
// The amount of space is the max of the number of lines to
|
||||
// prefetch for array and for instance allocations. (Extra space must be
|
||||
// reserved to accomodate both types of allocations.)
|
||||
|
||||
// +1 for rounding up to next cache line, +1 to be safe
|
||||
int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
|
||||
int step_size = AllocatePrefetchStepSize;
|
||||
int distance = AllocatePrefetchDistance;
|
||||
_reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (AllocatePrefetchInstr == 1) {
|
||||
// Use allocation prefetch style 3 because BIS instructions
|
||||
// require aligned memory addresses.
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
|
||||
}
|
||||
#endif /* COMPILER2 */
|
||||
}
|
||||
|
||||
// Use hardware population count instruction if available.
|
||||
|
@ -324,8 +324,9 @@ public class VM {
|
||||
Address vmInternalInfoAddr = vmVersion.getAddressField("_s_internal_vm_info_string").getValue();
|
||||
vmInternalInfo = CStringUtilities.getString(vmInternalInfoAddr);
|
||||
|
||||
Type threadLocalAllocBuffer = db.lookupType("ThreadLocalAllocBuffer");
|
||||
CIntegerType intType = (CIntegerType) db.lookupType("int");
|
||||
CIntegerField reserveForAllocationPrefetchField = vmVersion.getCIntegerField("_reserve_for_allocation_prefetch");
|
||||
CIntegerField reserveForAllocationPrefetchField = threadLocalAllocBuffer.getCIntegerField("_reserve_for_allocation_prefetch");
|
||||
reserveForAllocationPrefetch = (int)reserveForAllocationPrefetchField.getCInteger(intType);
|
||||
} catch (Exception exp) {
|
||||
throw new RuntimeException("can't determine target's VM version : " + exp.getMessage());
|
||||
|
@ -36,6 +36,7 @@
|
||||
|
||||
// static member initialization
|
||||
size_t ThreadLocalAllocBuffer::_max_size = 0;
|
||||
int ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0;
|
||||
unsigned ThreadLocalAllocBuffer::_target_refills = 0;
|
||||
GlobalTLABStats* ThreadLocalAllocBuffer::_global_stats = NULL;
|
||||
|
||||
@ -215,6 +216,23 @@ void ThreadLocalAllocBuffer::startup_initialization() {
|
||||
|
||||
_global_stats = new GlobalTLABStats();
|
||||
|
||||
// Need extra space at the end of TLAB, otherwise prefetching
|
||||
// instructions will fault (due to accessing memory outside of heap).
|
||||
// The amount of space is the max of the number of lines to
|
||||
// prefetch for array and for instance allocations. (Extra space must be
|
||||
// reserved to accommodate both types of allocations.)
|
||||
//
|
||||
// Only SPARC-specific BIS instructions are known to fault. (Those
|
||||
// instructions are generated if AllocatePrefetchStyle==3 and
|
||||
// AllocatePrefetchInstr==1). To be on the safe side, however,
|
||||
// extra space is reserved for all combinations of
|
||||
// AllocatePrefetchStyle and AllocatePrefetchInstr.
|
||||
|
||||
// +1 for rounding up to next cache line, +1 to be safe
|
||||
int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
|
||||
_reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) /
|
||||
(int)HeapWordSize;
|
||||
|
||||
// During jvm startup, the main (primordial) thread is initialized
|
||||
// before the heap is initialized. So reinitialize it now.
|
||||
guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
|
||||
|
@ -49,8 +49,9 @@ private:
|
||||
size_t _refill_waste_limit; // hold onto tlab if free() is larger than this
|
||||
size_t _allocated_before_last_gc; // total bytes allocated up until the last gc
|
||||
|
||||
static size_t _max_size; // maximum size of any TLAB
|
||||
static unsigned _target_refills; // expected number of refills between GCs
|
||||
static size_t _max_size; // maximum size of any TLAB
|
||||
static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB
|
||||
static unsigned _target_refills; // expected number of refills between GCs
|
||||
|
||||
unsigned _number_of_refills;
|
||||
unsigned _fast_refill_waste;
|
||||
@ -129,7 +130,7 @@ public:
|
||||
// Reserve space at the end of TLAB
|
||||
static size_t end_reserve() {
|
||||
int reserve_size = typeArrayOopDesc::header_size(T_INT);
|
||||
return MAX2(reserve_size, VM_Version::reserve_for_allocation_prefetch());
|
||||
return MAX2(reserve_size, _reserve_for_allocation_prefetch);
|
||||
}
|
||||
static size_t alignment_reserve() { return align_object_size(end_reserve()); }
|
||||
static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; }
|
||||
|
@ -1897,7 +1897,7 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
|
||||
|
||||
Node *prefetch_adr;
|
||||
Node *prefetch;
|
||||
uint lines = AllocatePrefetchDistance / AllocatePrefetchStepSize;
|
||||
uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
|
||||
uint step_size = AllocatePrefetchStepSize;
|
||||
uint distance = 0;
|
||||
|
||||
@ -1926,12 +1926,8 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
|
||||
contended_phi_rawmem = pf_phi_rawmem;
|
||||
i_o = pf_phi_abio;
|
||||
} else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
|
||||
// Insert a prefetch for each allocation.
|
||||
// This code is used for Sparc with BIS.
|
||||
Node *pf_region = new RegionNode(3);
|
||||
Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY,
|
||||
TypeRawPtr::BOTTOM );
|
||||
transform_later(pf_region);
|
||||
// Insert a prefetch instruction for each allocation.
|
||||
// This code is used for SPARC with BIS.
|
||||
|
||||
// Generate several prefetch instructions.
|
||||
uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
|
||||
@ -1940,10 +1936,15 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
|
||||
|
||||
// Next cache address.
|
||||
Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
|
||||
_igvn.MakeConX(distance));
|
||||
_igvn.MakeConX(step_size + distance));
|
||||
transform_later(cache_adr);
|
||||
cache_adr = new CastP2XNode(needgc_false, cache_adr);
|
||||
transform_later(cache_adr);
|
||||
// For BIS instructions to be emitted, the address must be aligned at cache line size.
|
||||
// (The VM sets AllocatePrefetchStepSize to the cache line size, unless a value is
|
||||
// specified at the command line.) If the address is not aligned at cache line size
|
||||
// boundary, a standard store instruction is triggered (instead of the BIS). For the
|
||||
// latter, 8-byte alignment is necessary.
|
||||
Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
|
||||
cache_adr = new AndXNode(cache_adr, mask);
|
||||
transform_later(cache_adr);
|
||||
|
@ -90,16 +90,29 @@ Flag::Error CICompilerCountConstraintFunc(intx value, bool verbose) {
|
||||
}
|
||||
|
||||
Flag::Error AllocatePrefetchDistanceConstraintFunc(intx value, bool verbose) {
|
||||
if (value < 0) {
|
||||
if (value < 0 || value > 512) {
|
||||
CommandLineError::print(verbose,
|
||||
"Unable to determine system-specific value for AllocatePrefetchDistance. "
|
||||
"Please provide appropriate value, if unsure, use 0 to disable prefetching\n");
|
||||
"AllocatePrefetchDistance (" INTX_FORMAT ") must be "
|
||||
"between 0 and " INTX_FORMAT "\n",
|
||||
AllocatePrefetchDistance, 512);
|
||||
return Flag::VIOLATES_CONSTRAINT;
|
||||
}
|
||||
|
||||
return Flag::SUCCESS;
|
||||
}
|
||||
|
||||
Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) {
|
||||
if (AllocatePrefetchStyle == 3) {
|
||||
if (value % wordSize != 0) {
|
||||
CommandLineError::print(verbose,
|
||||
"AllocatePrefetchStepSize (" INTX_FORMAT ") must be multiple of %d\n",
|
||||
value, wordSize);
|
||||
return Flag::VIOLATES_CONSTRAINT;
|
||||
}
|
||||
}
|
||||
return Flag::SUCCESS;
|
||||
}
|
||||
|
||||
Flag::Error AllocatePrefetchInstrConstraintFunc(intx value, bool verbose) {
|
||||
intx max_value = max_intx;
|
||||
#if defined(SPARC)
|
||||
@ -117,49 +130,6 @@ Flag::Error AllocatePrefetchInstrConstraintFunc(intx value, bool verbose) {
|
||||
return Flag::SUCCESS;
|
||||
}
|
||||
|
||||
Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) {
|
||||
intx max_value = 512;
|
||||
if (value < 1 || value > max_value) {
|
||||
CommandLineError::print(verbose,
|
||||
"AllocatePrefetchStepSize (" INTX_FORMAT ") "
|
||||
"must be between 1 and %d\n",
|
||||
AllocatePrefetchStepSize,
|
||||
max_value);
|
||||
return Flag::VIOLATES_CONSTRAINT;
|
||||
}
|
||||
|
||||
if (AllocatePrefetchDistance % AllocatePrefetchStepSize != 0) {
|
||||
CommandLineError::print(verbose,
|
||||
"AllocatePrefetchDistance (" INTX_FORMAT ") "
|
||||
"%% AllocatePrefetchStepSize (" INTX_FORMAT ") "
|
||||
"= " INTX_FORMAT " "
|
||||
"must be 0\n",
|
||||
AllocatePrefetchDistance, AllocatePrefetchStepSize,
|
||||
AllocatePrefetchDistance % AllocatePrefetchStepSize);
|
||||
return Flag::VIOLATES_CONSTRAINT;
|
||||
}
|
||||
|
||||
/* The limit of 64 for the quotient of AllocatePrefetchDistance and AllocatePrefetchSize
|
||||
* originates from the limit of 64 for AllocatePrefetchLines/AllocateInstancePrefetchLines.
|
||||
* If AllocatePrefetchStyle == 2, the quotient from above is used in PhaseMacroExpand::prefetch_allocation()
|
||||
* to determine the number of lines to prefetch. For other values of AllocatePrefetchStyle,
|
||||
* AllocatePrefetchDistance and AllocatePrefetchSize is used. For consistency, all these
|
||||
* quantities must have the same limit (64 in this case).
|
||||
*/
|
||||
if (AllocatePrefetchDistance / AllocatePrefetchStepSize > 64) {
|
||||
CommandLineError::print(verbose,
|
||||
"AllocatePrefetchDistance (" INTX_FORMAT ") too large or "
|
||||
"AllocatePrefetchStepSize (" INTX_FORMAT ") too small; "
|
||||
"try decreasing/increasing values so that "
|
||||
"AllocatePrefetchDistance / AllocatePrefetchStepSize <= 64\n",
|
||||
AllocatePrefetchDistance, AllocatePrefetchStepSize,
|
||||
AllocatePrefetchDistance % AllocatePrefetchStepSize);
|
||||
return Flag::VIOLATES_CONSTRAINT;
|
||||
}
|
||||
|
||||
return Flag::SUCCESS;
|
||||
}
|
||||
|
||||
Flag::Error CompileThresholdConstraintFunc(intx value, bool verbose) {
|
||||
if (value < 0 || value > INT_MAX >> InvocationCounter::count_shift) {
|
||||
CommandLineError::print(verbose,
|
||||
|
@ -2901,9 +2901,9 @@ public:
|
||||
\
|
||||
product(intx, AllocatePrefetchStyle, 1, \
|
||||
"0 = no prefetch, " \
|
||||
"1 = prefetch instructions for each allocation, " \
|
||||
"1 = generate prefetch instructions for each allocation, " \
|
||||
"2 = use TLAB watermark to gate allocation prefetch, " \
|
||||
"3 = use BIS instruction on Sparc for allocation prefetch") \
|
||||
"3 = generate one prefetch instruction per cache line") \
|
||||
range(0, 3) \
|
||||
\
|
||||
product(intx, AllocatePrefetchDistance, -1, \
|
||||
@ -2926,8 +2926,8 @@ public:
|
||||
constraint(AllocatePrefetchStepSizeConstraintFunc,AfterMemoryInit)\
|
||||
\
|
||||
product(intx, AllocatePrefetchInstr, 0, \
|
||||
"Prefetch instruction to prefetch ahead of allocation pointer") \
|
||||
constraint(AllocatePrefetchInstrConstraintFunc, AfterErgo) \
|
||||
"Select instruction to prefetch ahead of allocation pointer") \
|
||||
constraint(AllocatePrefetchInstrConstraintFunc, AfterMemoryInit) \
|
||||
\
|
||||
/* deoptimization */ \
|
||||
develop(bool, TraceDeoptimization, false, \
|
||||
|
@ -600,6 +600,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
nonstatic_field(ThreadLocalAllocBuffer, _pf_top, HeapWord*) \
|
||||
nonstatic_field(ThreadLocalAllocBuffer, _desired_size, size_t) \
|
||||
nonstatic_field(ThreadLocalAllocBuffer, _refill_waste_limit, size_t) \
|
||||
static_field(ThreadLocalAllocBuffer, _reserve_for_allocation_prefetch, int) \
|
||||
static_field(ThreadLocalAllocBuffer, _target_refills, unsigned) \
|
||||
nonstatic_field(ThreadLocalAllocBuffer, _number_of_refills, unsigned) \
|
||||
nonstatic_field(ThreadLocalAllocBuffer, _fast_refill_waste, unsigned) \
|
||||
@ -1318,7 +1319,6 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
static_field(Abstract_VM_Version, _vm_minor_version, int) \
|
||||
static_field(Abstract_VM_Version, _vm_security_version, int) \
|
||||
static_field(Abstract_VM_Version, _vm_build_number, int) \
|
||||
static_field(Abstract_VM_Version, _reserve_for_allocation_prefetch, int) \
|
||||
\
|
||||
static_field(JDK_Version, _current, JDK_Version) \
|
||||
nonstatic_field(JDK_Version, _major, unsigned char) \
|
||||
|
@ -43,7 +43,6 @@ bool Abstract_VM_Version::_supports_atomic_getadd4 = false;
|
||||
bool Abstract_VM_Version::_supports_atomic_getadd8 = false;
|
||||
unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U;
|
||||
unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0;
|
||||
int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0;
|
||||
|
||||
#ifndef HOTSPOT_VERSION_STRING
|
||||
#error HOTSPOT_VERSION_STRING must be defined
|
||||
|
@ -57,7 +57,6 @@ class Abstract_VM_Version: AllStatic {
|
||||
static int _vm_build_number;
|
||||
static unsigned int _parallel_worker_threads;
|
||||
static bool _parallel_worker_threads_initialized;
|
||||
static int _reserve_for_allocation_prefetch;
|
||||
|
||||
static unsigned int nof_parallel_worker_threads(unsigned int num,
|
||||
unsigned int dem,
|
||||
@ -139,12 +138,6 @@ class Abstract_VM_Version: AllStatic {
|
||||
return _L1_data_cache_line_size;
|
||||
}
|
||||
|
||||
// Need a space at the end of TLAB for prefetch instructions
|
||||
// which may fault when accessing memory outside of heap.
|
||||
static int reserve_for_allocation_prefetch() {
|
||||
return _reserve_for_allocation_prefetch;
|
||||
}
|
||||
|
||||
// ARCH specific policy for the BiasedLocking
|
||||
static bool use_biased_locking() { return true; }
|
||||
|
||||
|
@ -89,13 +89,6 @@ public class TestOptionsWithRanges {
|
||||
*/
|
||||
excludeTestMaxRange("CICompilerCount");
|
||||
|
||||
/*
|
||||
* JDK-8153340
|
||||
* Temporary exclude AllocatePrefetchDistance option from testing
|
||||
*/
|
||||
excludeTestRange("AllocatePrefetchDistance");
|
||||
|
||||
|
||||
/*
|
||||
* JDK-8136766
|
||||
* Temporarily remove ThreadStackSize from testing because Windows can set it to 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user