8086053: Address inconsistencies regarding ZeroTLAB

Add zero-initialization to C1 for fast TLAB refills; strenghten C2 conditions for skipping zero-initialization.

Reviewed-by: kvn, thartmann
This commit is contained in:
Zoltan Majo 2016-01-12 09:19:09 +01:00
parent 5be1924e89
commit dfa6539a6a
18 changed files with 215 additions and 128 deletions

View File

@ -205,12 +205,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
void C1_MacroAssembler::initialize_body(Register base, Register index) { void C1_MacroAssembler::initialize_body(Register base, Register index) {
assert_different_registers(base, index); zero_memory(base, index);
Label loop;
bind(loop);
subcc(index, HeapWordSize, index);
brx(Assembler::greaterEqual, true, Assembler::pt, loop);
delayed()->st_ptr(G0, base, index);
} }
@ -237,7 +232,7 @@ void C1_MacroAssembler::allocate_object(
} }
try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case); try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case);
initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2); initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2, /* is_tlab_allocated */ UseTLAB);
} }
void C1_MacroAssembler::initialize_object( void C1_MacroAssembler::initialize_object(
@ -246,7 +241,8 @@ void C1_MacroAssembler::initialize_object(
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time int con_size_in_bytes, // object size in bytes if known at compile time
Register t1, // temp register Register t1, // temp register
Register t2 // temp register Register t2, // temp register
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
) { ) {
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
@ -269,31 +265,33 @@ void C1_MacroAssembler::initialize_object(
#endif #endif
// initialize body if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
const int threshold = 5 * HeapWordSize; // approximate break even point for code size // initialize body
if (var_size_in_bytes != noreg) { const int threshold = 5 * HeapWordSize; // approximate break even point for code size
// use a loop if (var_size_in_bytes != noreg) {
add(obj, hdr_size_in_bytes, t1); // compute address of first element // use a loop
sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body add(obj, hdr_size_in_bytes, t1); // compute address of first element
initialize_body(t1, t2); sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body
initialize_body(t1, t2);
#ifndef _LP64 #ifndef _LP64
} else if (con_size_in_bytes < threshold * 2) { } else if (con_size_in_bytes < threshold * 2) {
// on v9 we can do double word stores to fill twice as much space. // on v9 we can do double word stores to fill twice as much space.
assert(hdr_size_in_bytes % 8 == 0, "double word aligned"); assert(hdr_size_in_bytes % 8 == 0, "double word aligned");
assert(con_size_in_bytes % 8 == 0, "double word aligned"); assert(con_size_in_bytes % 8 == 0, "double word aligned");
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i); for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i);
#endif #endif
} else if (con_size_in_bytes <= threshold) { } else if (con_size_in_bytes <= threshold) {
// use explicit NULL stores // use explicit NULL stores
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i); for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i);
} else if (con_size_in_bytes > hdr_size_in_bytes) { } else if (con_size_in_bytes > hdr_size_in_bytes) {
// use a loop // use a loop
const Register base = t1; const Register base = t1;
const Register index = t2; const Register index = t2;
add(obj, hdr_size_in_bytes, base); // compute address of first element add(obj, hdr_size_in_bytes, base); // compute address of first element
// compute index = number of words to clear // compute index = number of words to clear
set(con_size_in_bytes - hdr_size_in_bytes, index); set(con_size_in_bytes - hdr_size_in_bytes, index);
initialize_body(base, index); initialize_body(base, index);
}
} }
if (CURRENT_ENV->dtrace_alloc_probes()) { if (CURRENT_ENV->dtrace_alloc_probes()) {

View File

@ -50,7 +50,8 @@
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time int con_size_in_bytes, // object size in bytes if known at compile time
Register t1, // temp register Register t1, // temp register
Register t2 // temp register Register t2, // temp register
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
); );
// allocation of fixed-size objects // allocation of fixed-size objects

View File

@ -435,7 +435,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path); __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2); __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ true);
__ verify_oop(O0_obj); __ verify_oop(O0_obj);
__ mov(O0, I0); __ mov(O0, I0);
__ ret(); __ ret();
@ -447,7 +447,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path); __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
__ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2); __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2); __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ false);
__ verify_oop(O0_obj); __ verify_oop(O0_obj);
__ mov(O0, I0); __ mov(O0, I0);
__ ret(); __ ret();
@ -542,7 +542,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ ldub(klass_lh, G3_t1, klass_lh_header_size_offset); __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
__ sub(G1_arr_size, G3_t1, O1_t2); // body length __ sub(G1_arr_size, G3_t1, O1_t2); // body length
__ add(O0_obj, G3_t1, G3_t1); // body start __ add(O0_obj, G3_t1, G3_t1); // body start
__ initialize_body(G3_t1, O1_t2); if (!ZeroTLAB) {
__ initialize_body(G3_t1, O1_t2);
}
__ verify_oop(O0_obj); __ verify_oop(O0_obj);
__ retl(); __ retl();
__ delayed()->nop(); __ delayed()->nop();

View File

@ -3459,11 +3459,27 @@ void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case
add(top, t1, top); // t1 is tlab_size add(top, t1, top); // t1 is tlab_size
sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
if (ZeroTLAB) {
// This is a fast TLAB refill, therefore the GC is not notified of it.
// So compiled code must fill the new TLAB with zeroes.
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
zero_memory(t2, t1);
}
verify_tlab(); verify_tlab();
ba(retry); ba(retry);
delayed()->nop(); delayed()->nop();
} }
void MacroAssembler::zero_memory(Register base, Register index) {
assert_different_registers(base, index);
Label loop;
bind(loop);
subcc(index, HeapWordSize, index);
brx(Assembler::greaterEqual, true, Assembler::pt, loop);
delayed()->st_ptr(G0, base, index);
}
void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
Register t1, Register t2) { Register t1, Register t2) {
// Bump total bytes allocated by this thread // Bump total bytes allocated by this thread

View File

@ -1278,6 +1278,7 @@ public:
Label& slow_case // continuation point if fast allocation fails Label& slow_case // continuation point if fast allocation fails
); );
void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
void zero_memory(Register base, Register index);
void incr_allocated_bytes(RegisterOrConstant size_in_bytes, void incr_allocated_bytes(RegisterOrConstant size_in_bytes,
Register t1, Register t2); Register t1, Register t2);

View File

@ -182,54 +182,13 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
// preserves obj, destroys len_in_bytes // preserves obj, destroys len_in_bytes
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) { void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
Label done; Label done;
assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
Register index = len_in_bytes;
// index is positive and ptr sized
subptr(index, hdr_size_in_bytes);
jcc(Assembler::zero, done);
// initialize topmost word, divide index by 2, check if odd and test if zero
// note: for the remaining code to work, index must be a multiple of BytesPerWord
#ifdef ASSERT
{ Label L;
testptr(index, BytesPerWord - 1);
jcc(Assembler::zero, L);
stop("index is not a multiple of BytesPerWord");
bind(L);
}
#endif
xorptr(t1, t1); // use _zero reg to clear memory (shorter code)
if (UseIncDec) {
shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
} else {
shrptr(index, 2); // use 2 instructions to avoid partial flag stall
shrptr(index, 1);
}
#ifndef _LP64
// index could have been not a multiple of 8 (i.e., bit 2 was set)
{ Label even;
// note: if index was a multiple of 8, than it cannot
// be 0 now otherwise it must have been 0 before
// => if it is even, we don't need to check for 0 again
jcc(Assembler::carryClear, even);
// clear topmost word (no jump needed if conditional assignment would work here)
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 0*BytesPerWord), t1);
// index could be 0 now, need to check again
jcc(Assembler::zero, done);
bind(even);
}
#endif // !_LP64
// initialize remaining object fields: rdx is a multiple of 2 now
{ Label loop;
bind(loop);
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 1*BytesPerWord), t1);
NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 2*BytesPerWord), t1);)
decrement(index);
jcc(Assembler::notZero, loop);
}
// done // len_in_bytes is positive and ptr sized
subptr(len_in_bytes, hdr_size_in_bytes);
jcc(Assembler::zero, done);
zero_memory(obj, len_in_bytes, hdr_size_in_bytes, t1);
bind(done); bind(done);
} }
@ -241,47 +200,49 @@ void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2,
try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2); initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
} }
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) { void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
"con_size_in_bytes is not multiple of alignment"); "con_size_in_bytes is not multiple of alignment");
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
initialize_header(obj, klass, noreg, t1, t2); initialize_header(obj, klass, noreg, t1, t2);
// clear rest of allocated space if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
const Register t1_zero = t1; // clear rest of allocated space
const Register index = t2; const Register t1_zero = t1;
const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below) const Register index = t2;
if (var_size_in_bytes != noreg) { const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below)
mov(index, var_size_in_bytes); if (var_size_in_bytes != noreg) {
initialize_body(obj, index, hdr_size_in_bytes, t1_zero); mov(index, var_size_in_bytes);
} else if (con_size_in_bytes <= threshold) { initialize_body(obj, index, hdr_size_in_bytes, t1_zero);
// use explicit null stores } else if (con_size_in_bytes <= threshold) {
// code size = 2 + 3*n bytes (n = number of fields to clear) // use explicit null stores
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) // code size = 2 + 3*n bytes (n = number of fields to clear)
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
movptr(Address(obj, i), t1_zero); for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
} else if (con_size_in_bytes > hdr_size_in_bytes) { movptr(Address(obj, i), t1_zero);
// use loop to null out the fields } else if (con_size_in_bytes > hdr_size_in_bytes) {
// code size = 16 bytes for even n (n = number of fields to clear) // use loop to null out the fields
// initialize last object field first if odd number of fields // code size = 16 bytes for even n (n = number of fields to clear)
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) // initialize last object field first if odd number of fields
movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3); xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
// initialize last object field if constant size is odd movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0) // initialize last object field if constant size is odd
movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero); if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0)
// initialize remaining object fields: rdx is a multiple of 2 movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero);
{ Label loop; // initialize remaining object fields: rdx is a multiple of 2
bind(loop); { Label loop;
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)), bind(loop);
t1_zero); movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)),
NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)), t1_zero);
t1_zero);) NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)),
decrement(index); t1_zero);)
jcc(Assembler::notZero, loop); decrement(index);
jcc(Assembler::notZero, loop);
}
} }
} }

View File

@ -65,7 +65,8 @@
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time int con_size_in_bytes, // object size in bytes if known at compile time
Register t1, // temp register Register t1, // temp register
Register t2 // temp register Register t2, // temp register
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
); );
// allocation of fixed-size objects // allocation of fixed-size objects

View File

@ -1040,7 +1040,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path); __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
__ initialize_object(obj, klass, obj_size, 0, t1, t2); __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
__ verify_oop(obj); __ verify_oop(obj);
__ pop(rbx); __ pop(rbx);
__ pop(rdi); __ pop(rdi);
@ -1053,7 +1053,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ eden_allocate(obj, obj_size, 0, t1, slow_path); __ eden_allocate(obj, obj_size, 0, t1, slow_path);
__ incr_allocated_bytes(thread, obj_size, 0); __ incr_allocated_bytes(thread, obj_size, 0);
__ initialize_object(obj, klass, obj_size, 0, t1, t2); __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
__ verify_oop(obj); __ verify_oop(obj);
__ pop(rbx); __ pop(rbx);
__ pop(rdi); __ pop(rdi);
@ -1169,7 +1169,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ andptr(t1, Klass::_lh_header_size_mask); __ andptr(t1, Klass::_lh_header_size_mask);
__ subptr(arr_size, t1); // body length __ subptr(arr_size, t1); // body length
__ addptr(t1, obj); // body start __ addptr(t1, obj); // body start
__ initialize_body(t1, arr_size, 0, t2); if (!ZeroTLAB) {
__ initialize_body(t1, arr_size, 0, t2);
}
__ verify_oop(obj); __ verify_oop(obj);
__ ret(0); __ ret(0);

View File

@ -5426,7 +5426,7 @@ Register MacroAssembler::tlab_refill(Label& retry,
Label& try_eden, Label& try_eden,
Label& slow_case) { Label& slow_case) {
Register top = rax; Register top = rax;
Register t1 = rcx; Register t1 = rcx; // object size
Register t2 = rsi; Register t2 = rsi;
Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
@ -5522,12 +5522,76 @@ Register MacroAssembler::tlab_refill(Label& retry,
addptr(top, t1); addptr(top, t1);
subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
if (ZeroTLAB) {
// This is a fast TLAB refill, therefore the GC is not notified of it.
// So compiled code must fill the new TLAB with zeroes.
movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
zero_memory(top, t1, 0, t2);
}
verify_tlab(); verify_tlab();
jmp(retry); jmp(retry);
return thread_reg; // for use by caller return thread_reg; // for use by caller
} }
// Preserves the contents of address, destroys the contents length_in_bytes and temp.
void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different");
assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord");
Label done;
testptr(length_in_bytes, length_in_bytes);
jcc(Assembler::zero, done);
// initialize topmost word, divide index by 2, check if odd and test if zero
// note: for the remaining code to work, index must be a multiple of BytesPerWord
#ifdef ASSERT
{
Label L;
testptr(length_in_bytes, BytesPerWord - 1);
jcc(Assembler::zero, L);
stop("length must be a multiple of BytesPerWord");
bind(L);
}
#endif
Register index = length_in_bytes;
xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
if (UseIncDec) {
shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
} else {
shrptr(index, 2); // use 2 instructions to avoid partial flag stall
shrptr(index, 1);
}
#ifndef _LP64
// index could have not been a multiple of 8 (i.e., bit 2 was set)
{
Label even;
// note: if index was a multiple of 8, then it cannot
// be 0 now otherwise it must have been 0 before
// => if it is even, we don't need to check for 0 again
jcc(Assembler::carryClear, even);
// clear topmost word (no jump would be needed if conditional assignment worked here)
movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
// index could be 0 now, must check again
jcc(Assembler::zero, done);
bind(even);
}
#endif // !_LP64
// initialize remaining object fields: index is a multiple of 2 now
{
Label loop;
bind(loop);
movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
decrement(index);
jcc(Assembler::notZero, loop);
}
bind(done);
}
void MacroAssembler::incr_allocated_bytes(Register thread, void MacroAssembler::incr_allocated_bytes(Register thread,
Register var_size_in_bytes, Register var_size_in_bytes,
int con_size_in_bytes, int con_size_in_bytes,

View File

@ -522,6 +522,8 @@ class MacroAssembler: public Assembler {
Label& slow_case // continuation point if fast allocation fails Label& slow_case // continuation point if fast allocation fails
); );
Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
void incr_allocated_bytes(Register thread, void incr_allocated_bytes(Register thread,
Register var_size_in_bytes, int con_size_in_bytes, Register var_size_in_bytes, int con_size_in_bytes,
Register t1 = noreg); Register t1 = noreg);

View File

@ -105,7 +105,7 @@ void ThreadLocalAllocBuffer::accumulate_statistics() {
// an illusion of a contiguous Eden and optionally retires the tlab. // an illusion of a contiguous Eden and optionally retires the tlab.
// Waste accounting should be done in caller as appropriate; see, // Waste accounting should be done in caller as appropriate; see,
// for example, clear_before_allocation(). // for example, clear_before_allocation().
void ThreadLocalAllocBuffer::make_parsable(bool retire) { void ThreadLocalAllocBuffer::make_parsable(bool retire, bool zap) {
if (end() != NULL) { if (end() != NULL) {
invariants(); invariants();
@ -113,7 +113,7 @@ void ThreadLocalAllocBuffer::make_parsable(bool retire) {
myThread()->incr_allocated_bytes(used_bytes()); myThread()->incr_allocated_bytes(used_bytes());
} }
CollectedHeap::fill_with_object(top(), hard_end(), retire); CollectedHeap::fill_with_object(top(), hard_end(), retire && zap);
if (retire || ZeroTLAB) { // "Reset" the TLAB if (retire || ZeroTLAB) { // "Reset" the TLAB
set_start(NULL); set_start(NULL);

View File

@ -145,8 +145,8 @@ public:
// Initialization at startup // Initialization at startup
static void startup_initialization(); static void startup_initialization();
// Make an in-use tlab parsable, optionally also retiring it. // Make an in-use tlab parsable, optionally retiring and/or zapping it.
void make_parsable(bool retire); void make_parsable(bool retire, bool zap = true);
// Retire in-use tlab before allocation of a new tlab // Retire in-use tlab before allocation of a new tlab
void clear_before_allocation(); void clear_before_allocation();

View File

@ -3077,7 +3077,7 @@ bool LibraryCallKit::inline_native_isInterrupted() {
set_control( _gvn.transform(new IfTrueNode(iff_arg))); set_control( _gvn.transform(new IfTrueNode(iff_arg)));
#else #else
// To return true on Windows you must read the _interrupted field // To return true on Windows you must read the _interrupted field
// and check the the event state i.e. take the slow path. // and check the event state i.e. take the slow path.
#endif // TARGET_OS_FAMILY_windows #endif // TARGET_OS_FAMILY_windows
// (d) Otherwise, go to the slow path. // (d) Otherwise, go to the slow path.

View File

@ -1813,10 +1813,11 @@ PhaseMacroExpand::initialize_object(AllocateNode* alloc,
// there can be two Allocates to one Initialize. The answer in all these // there can be two Allocates to one Initialize. The answer in all these
// edge cases is safety first. It is always safe to clear immediately // edge cases is safety first. It is always safe to clear immediately
// within an Allocate, and then (maybe or maybe not) clear some more later. // within an Allocate, and then (maybe or maybe not) clear some more later.
if (!ZeroTLAB) if (!(UseTLAB && ZeroTLAB)) {
rawmem = ClearArrayNode::clear_memory(control, rawmem, object, rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
header_size, size_in_bytes, header_size, size_in_bytes,
&_igvn); &_igvn);
}
} else { } else {
if (!init->is_complete()) { if (!init->is_complete()) {
// Try to win by zeroing only what the init does not store. // Try to win by zeroing only what the init does not store.

View File

@ -295,7 +295,7 @@ Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode*
// out-edges of the dest, we need to avoid making derived pointers // out-edges of the dest, we need to avoid making derived pointers
// from it until we have checked its uses.) // from it until we have checked its uses.)
if (ReduceBulkZeroing if (ReduceBulkZeroing
&& !ZeroTLAB // pointless if already zeroed && !(UseTLAB && ZeroTLAB) // pointless if already zeroed
&& basic_elem_type != T_CONFLICT // avoid corner case && basic_elem_type != T_CONFLICT // avoid corner case
&& !src->eqv_uncast(dest) && !src->eqv_uncast(dest)
&& alloc != NULL && alloc != NULL

View File

@ -3850,7 +3850,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
bool do_zeroing = true; // we might give up if inits are very sparse bool do_zeroing = true; // we might give up if inits are very sparse
int big_init_gaps = 0; // how many large gaps have we seen? int big_init_gaps = 0; // how many large gaps have we seen?
if (ZeroTLAB) do_zeroing = false; if (UseTLAB && ZeroTLAB) do_zeroing = false;
if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false; if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false;
for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) { for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
@ -3951,7 +3951,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
remove_extra_zeroes(); // clear out all the zmems left over remove_extra_zeroes(); // clear out all the zmems left over
add_req(inits); add_req(inits);
if (!ZeroTLAB) { if (!(UseTLAB && ZeroTLAB)) {
// If anything remains to be zeroed, zero it all now. // If anything remains to be zeroed, zero it all now.
zeroes_done = align_size_down(zeroes_done, BytesPerInt); zeroes_done = align_size_down(zeroes_done, BytesPerInt);
// if it is the last unused 4 bytes of an instance, forget about it // if it is the last unused 4 bytes of an instance, forget about it

View File

@ -288,6 +288,7 @@ hotspot_compiler_3 = \
compiler/jsr292/ \ compiler/jsr292/ \
compiler/loopopts/ \ compiler/loopopts/ \
compiler/macronodes/ \ compiler/macronodes/ \
compiler/memoryinitialization/ \
compiler/osr/ \ compiler/osr/ \
compiler/regalloc/ \ compiler/regalloc/ \
compiler/runtime/ \ compiler/runtime/ \

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/*
* @test
* @bug 8086053
* @run main/othervm -Xcomp -XX:+UseTLAB -XX:+ZeroTLAB ZeroTLABTest
* @run main/othervm -Xcomp -XX:+UseTLAB -XX:-ZeroTLAB ZeroTLABTest
* @run main/othervm -Xcomp -XX:-UseTLAB -XX:+ZeroTLAB ZeroTLABTest
* @run main/othervm -Xcomp -XX:-UseTLAB -XX:-ZeroTLAB ZeroTLABTest
*/
public class ZeroTLABTest {
public static void main(String args[]) {
System.out.println("Test PASSED");
}
}