8050147: StoreLoad barrier interferes with stack usages

Reviewed-by: jrose, kvn, drchase
This commit is contained in:
Aleksey Shipilev 2014-09-04 13:11:25 +04:00
parent 0bfeda937a
commit 13329b5040
14 changed files with 79 additions and 5 deletions

View File

@ -26,6 +26,7 @@
#define CPU_X86_VM_ASSEMBLER_X86_HPP
#include "asm/register.hpp"
#include "vm_version_x86.hpp"
class BiasedLockingCounters;
@ -1292,14 +1293,34 @@ private:
if (order_constraint & StoreLoad) {
// All usable chips support "locked" instructions which suffice
// as barriers, and are much faster than the alternative of
// using cpuid instruction. We use here a locked add [esp],0.
// using cpuid instruction. We use here a locked add [esp-C],0.
// This is conveniently otherwise a no-op except for blowing
// flags.
// flags, and introducing a false dependency on target memory
// location. We can't do anything with flags, but we can avoid
// memory dependencies in the current method by locked-adding
// somewhere else on the stack. Doing [esp+C] will collide with
// something on stack in current method, hence we go for [esp-C].
// It is convenient since it is almost always in data cache, for
// any small C. We need to step back from SP to avoid data
// dependencies with other things on below SP (callee-saves, for
// example). Without a clear way to figure out the minimal safe
// distance from SP, it makes sense to step back the complete
// cache line, as this will also avoid possible second-order effects
// with locked ops against the cache line. Our choice of offset
// is bounded by x86 operand encoding, which should stay within
// [-128; +127] to have the 8-byte displacement encoding.
//
// Any change to this code may need to revisit other places in
// the code where this idiom is used, in particular the
// orderAccess code.
int offset = -VM_Version::L1_line_size();
if (offset < -128) {
offset = -128;
}
lock();
addl(Address(rsp, 0), 0);// Assert the lock# signal here
addl(Address(rsp, offset), 0);// Assert the lock# signal here
}
}
}

View File

@ -563,3 +563,8 @@ void os::verify_stack_alignment() {
assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
}
#endif
void os::extra_bang_size_in_bytes() {
// PPC does not require the additional stack bang.
return 0;
}

View File

@ -1030,3 +1030,8 @@ void os::setup_fpu() {
void os::verify_stack_alignment() {
}
#endif
int os::extra_bang_size_in_bytes() {
// JDK-8050147 requires the full cache line bang for x86.
return VM_Version::L1_line_size();
}

View File

@ -465,3 +465,8 @@ extern "C" {
void os::verify_stack_alignment() {
}
#endif
int os::extra_bang_size_in_bytes() {
// Zero does not require an additional stack bang.
return 0;
}

View File

@ -612,3 +612,8 @@ void os::verify_stack_alignment() {
assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
}
#endif
int os:extra_bang_size_in_bytes() {
// PPC does not require the additional stack bang.
return 0;
}

View File

@ -752,3 +752,8 @@ size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
void os::verify_stack_alignment() {
}
#endif
int os::extra_bang_size_in_bytes() {
// SPARC does not require the additional stack bang.
return 0;
}

View File

@ -930,3 +930,8 @@ void os::workaround_expand_exec_shield_cs_limit() {
// keep the page mapped so CS limit isn't reduced.
#endif
}
int os::extra_bang_size_in_bytes() {
// JDK-8050147 requires the full cache line bang for x86.
return VM_Version::L1_line_size();
}

View File

@ -495,3 +495,8 @@ extern "C" {
void os::verify_stack_alignment() {
}
#endif
int os::extra_bang_size_in_bytes() {
// Zero does not require an additional stack banging.
return 0;
}

View File

@ -774,3 +774,8 @@ add_func_t* os::atomic_add_func = os::atomic_add_bootstrap;
void os::verify_stack_alignment() {
}
#endif
int os::extra_bang_size_in_bytes() {
// SPARC does not require an additional stack bang.
return 0;
}

View File

@ -918,3 +918,8 @@ void os::verify_stack_alignment() {
#endif
}
#endif
int os::extra_bang_size_in_bytes() {
// JDK-8050147 requires the full cache line bang for x86.
return VM_Version::L1_line_size();
}

View File

@ -639,3 +639,8 @@ void os::verify_stack_alignment() {
#endif
}
#endif
int os::extra_bang_size_in_bytes() {
// JDK-8050147 requires the full cache line bang for x86.
return VM_Version::L1_line_size();
}

View File

@ -170,7 +170,7 @@ address LIR_Assembler::pc() const {
// removes the need to bang the stack in the deoptimization blob which
// in turn simplifies stack overflow handling.
int LIR_Assembler::bang_size_in_bytes() const {
return MAX2(initial_frame_size_in_bytes(), _compilation->interpreter_frame_size());
return MAX2(initial_frame_size_in_bytes() + os::extra_bang_size_in_bytes(), _compilation->interpreter_frame_size());
}
void LIR_Assembler::emit_exception_entries(ExceptionInfoList* info_list) {

View File

@ -430,7 +430,7 @@ int Compile::frame_size_in_words() const {
// removes the need to bang the stack in the deoptimization blob which
// in turn simplifies stack overflow handling.
int Compile::bang_size_in_bytes() const {
return MAX2(_interpreter_frame_size, frame_size_in_bytes());
return MAX2(frame_size_in_bytes() + os::extra_bang_size_in_bytes(), _interpreter_frame_size);
}
// ============================================================================

View File

@ -761,6 +761,9 @@ class os: AllStatic {
// Hook for os specific jvm options that we don't want to abort on seeing
static bool obsolete_option(const JavaVMOption *option);
// Amount beyond the callee frame size that we bang the stack.
static int extra_bang_size_in_bytes();
// Extensions
#include "runtime/os_ext.hpp"