8086053: Address inconsistencies regarding ZeroTLAB
Add zero-initialization to C1 for fast TLAB refills; strenghten C2 conditions for skipping zero-initialization. Reviewed-by: kvn, thartmann
This commit is contained in:
parent
5be1924e89
commit
dfa6539a6a
@ -205,12 +205,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
|
||||
|
||||
|
||||
void C1_MacroAssembler::initialize_body(Register base, Register index) {
|
||||
assert_different_registers(base, index);
|
||||
Label loop;
|
||||
bind(loop);
|
||||
subcc(index, HeapWordSize, index);
|
||||
brx(Assembler::greaterEqual, true, Assembler::pt, loop);
|
||||
delayed()->st_ptr(G0, base, index);
|
||||
zero_memory(base, index);
|
||||
}
|
||||
|
||||
|
||||
@ -237,7 +232,7 @@ void C1_MacroAssembler::allocate_object(
|
||||
}
|
||||
try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case);
|
||||
|
||||
initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2);
|
||||
initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2, /* is_tlab_allocated */ UseTLAB);
|
||||
}
|
||||
|
||||
void C1_MacroAssembler::initialize_object(
|
||||
@ -246,7 +241,8 @@ void C1_MacroAssembler::initialize_object(
|
||||
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
||||
int con_size_in_bytes, // object size in bytes if known at compile time
|
||||
Register t1, // temp register
|
||||
Register t2 // temp register
|
||||
Register t2, // temp register
|
||||
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
|
||||
) {
|
||||
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
|
||||
|
||||
@ -269,31 +265,33 @@ void C1_MacroAssembler::initialize_object(
|
||||
|
||||
#endif
|
||||
|
||||
// initialize body
|
||||
const int threshold = 5 * HeapWordSize; // approximate break even point for code size
|
||||
if (var_size_in_bytes != noreg) {
|
||||
// use a loop
|
||||
add(obj, hdr_size_in_bytes, t1); // compute address of first element
|
||||
sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body
|
||||
initialize_body(t1, t2);
|
||||
if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
|
||||
// initialize body
|
||||
const int threshold = 5 * HeapWordSize; // approximate break even point for code size
|
||||
if (var_size_in_bytes != noreg) {
|
||||
// use a loop
|
||||
add(obj, hdr_size_in_bytes, t1); // compute address of first element
|
||||
sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body
|
||||
initialize_body(t1, t2);
|
||||
#ifndef _LP64
|
||||
} else if (con_size_in_bytes < threshold * 2) {
|
||||
// on v9 we can do double word stores to fill twice as much space.
|
||||
assert(hdr_size_in_bytes % 8 == 0, "double word aligned");
|
||||
assert(con_size_in_bytes % 8 == 0, "double word aligned");
|
||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i);
|
||||
} else if (con_size_in_bytes < threshold * 2) {
|
||||
// on v9 we can do double word stores to fill twice as much space.
|
||||
assert(hdr_size_in_bytes % 8 == 0, "double word aligned");
|
||||
assert(con_size_in_bytes % 8 == 0, "double word aligned");
|
||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i);
|
||||
#endif
|
||||
} else if (con_size_in_bytes <= threshold) {
|
||||
// use explicit NULL stores
|
||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i);
|
||||
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
||||
// use a loop
|
||||
const Register base = t1;
|
||||
const Register index = t2;
|
||||
add(obj, hdr_size_in_bytes, base); // compute address of first element
|
||||
// compute index = number of words to clear
|
||||
set(con_size_in_bytes - hdr_size_in_bytes, index);
|
||||
initialize_body(base, index);
|
||||
} else if (con_size_in_bytes <= threshold) {
|
||||
// use explicit NULL stores
|
||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i);
|
||||
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
||||
// use a loop
|
||||
const Register base = t1;
|
||||
const Register index = t2;
|
||||
add(obj, hdr_size_in_bytes, base); // compute address of first element
|
||||
// compute index = number of words to clear
|
||||
set(con_size_in_bytes - hdr_size_in_bytes, index);
|
||||
initialize_body(base, index);
|
||||
}
|
||||
}
|
||||
|
||||
if (CURRENT_ENV->dtrace_alloc_probes()) {
|
||||
|
@ -50,7 +50,8 @@
|
||||
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
||||
int con_size_in_bytes, // object size in bytes if known at compile time
|
||||
Register t1, // temp register
|
||||
Register t2 // temp register
|
||||
Register t2, // temp register
|
||||
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
|
||||
);
|
||||
|
||||
// allocation of fixed-size objects
|
||||
|
@ -435,7 +435,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
|
||||
__ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
|
||||
|
||||
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
|
||||
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ true);
|
||||
__ verify_oop(O0_obj);
|
||||
__ mov(O0, I0);
|
||||
__ ret();
|
||||
@ -447,7 +447,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
__ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
|
||||
__ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);
|
||||
|
||||
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
|
||||
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ false);
|
||||
__ verify_oop(O0_obj);
|
||||
__ mov(O0, I0);
|
||||
__ ret();
|
||||
@ -542,7 +542,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
__ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
|
||||
__ sub(G1_arr_size, G3_t1, O1_t2); // body length
|
||||
__ add(O0_obj, G3_t1, G3_t1); // body start
|
||||
__ initialize_body(G3_t1, O1_t2);
|
||||
if (!ZeroTLAB) {
|
||||
__ initialize_body(G3_t1, O1_t2);
|
||||
}
|
||||
__ verify_oop(O0_obj);
|
||||
__ retl();
|
||||
__ delayed()->nop();
|
||||
|
@ -3459,11 +3459,27 @@ void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case
|
||||
add(top, t1, top); // t1 is tlab_size
|
||||
sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
|
||||
st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
|
||||
|
||||
if (ZeroTLAB) {
|
||||
// This is a fast TLAB refill, therefore the GC is not notified of it.
|
||||
// So compiled code must fill the new TLAB with zeroes.
|
||||
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
|
||||
zero_memory(t2, t1);
|
||||
}
|
||||
verify_tlab();
|
||||
ba(retry);
|
||||
delayed()->nop();
|
||||
}
|
||||
|
||||
void MacroAssembler::zero_memory(Register base, Register index) {
|
||||
assert_different_registers(base, index);
|
||||
Label loop;
|
||||
bind(loop);
|
||||
subcc(index, HeapWordSize, index);
|
||||
brx(Assembler::greaterEqual, true, Assembler::pt, loop);
|
||||
delayed()->st_ptr(G0, base, index);
|
||||
}
|
||||
|
||||
void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
|
||||
Register t1, Register t2) {
|
||||
// Bump total bytes allocated by this thread
|
||||
|
@ -1278,6 +1278,7 @@ public:
|
||||
Label& slow_case // continuation point if fast allocation fails
|
||||
);
|
||||
void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
|
||||
void zero_memory(Register base, Register index);
|
||||
void incr_allocated_bytes(RegisterOrConstant size_in_bytes,
|
||||
Register t1, Register t2);
|
||||
|
||||
|
@ -182,54 +182,13 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
|
||||
|
||||
// preserves obj, destroys len_in_bytes
|
||||
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
|
||||
assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
|
||||
Label done;
|
||||
assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
|
||||
assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
|
||||
Register index = len_in_bytes;
|
||||
// index is positive and ptr sized
|
||||
subptr(index, hdr_size_in_bytes);
|
||||
jcc(Assembler::zero, done);
|
||||
// initialize topmost word, divide index by 2, check if odd and test if zero
|
||||
// note: for the remaining code to work, index must be a multiple of BytesPerWord
|
||||
#ifdef ASSERT
|
||||
{ Label L;
|
||||
testptr(index, BytesPerWord - 1);
|
||||
jcc(Assembler::zero, L);
|
||||
stop("index is not a multiple of BytesPerWord");
|
||||
bind(L);
|
||||
}
|
||||
#endif
|
||||
xorptr(t1, t1); // use _zero reg to clear memory (shorter code)
|
||||
if (UseIncDec) {
|
||||
shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
|
||||
} else {
|
||||
shrptr(index, 2); // use 2 instructions to avoid partial flag stall
|
||||
shrptr(index, 1);
|
||||
}
|
||||
#ifndef _LP64
|
||||
// index could have been not a multiple of 8 (i.e., bit 2 was set)
|
||||
{ Label even;
|
||||
// note: if index was a multiple of 8, than it cannot
|
||||
// be 0 now otherwise it must have been 0 before
|
||||
// => if it is even, we don't need to check for 0 again
|
||||
jcc(Assembler::carryClear, even);
|
||||
// clear topmost word (no jump needed if conditional assignment would work here)
|
||||
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 0*BytesPerWord), t1);
|
||||
// index could be 0 now, need to check again
|
||||
jcc(Assembler::zero, done);
|
||||
bind(even);
|
||||
}
|
||||
#endif // !_LP64
|
||||
// initialize remaining object fields: rdx is a multiple of 2 now
|
||||
{ Label loop;
|
||||
bind(loop);
|
||||
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 1*BytesPerWord), t1);
|
||||
NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 2*BytesPerWord), t1);)
|
||||
decrement(index);
|
||||
jcc(Assembler::notZero, loop);
|
||||
}
|
||||
|
||||
// done
|
||||
// len_in_bytes is positive and ptr sized
|
||||
subptr(len_in_bytes, hdr_size_in_bytes);
|
||||
jcc(Assembler::zero, done);
|
||||
zero_memory(obj, len_in_bytes, hdr_size_in_bytes, t1);
|
||||
bind(done);
|
||||
}
|
||||
|
||||
@ -241,47 +200,49 @@ void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2,
|
||||
|
||||
try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
|
||||
|
||||
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
|
||||
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
|
||||
}
|
||||
|
||||
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
|
||||
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
|
||||
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
|
||||
"con_size_in_bytes is not multiple of alignment");
|
||||
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
|
||||
|
||||
initialize_header(obj, klass, noreg, t1, t2);
|
||||
|
||||
// clear rest of allocated space
|
||||
const Register t1_zero = t1;
|
||||
const Register index = t2;
|
||||
const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below)
|
||||
if (var_size_in_bytes != noreg) {
|
||||
mov(index, var_size_in_bytes);
|
||||
initialize_body(obj, index, hdr_size_in_bytes, t1_zero);
|
||||
} else if (con_size_in_bytes <= threshold) {
|
||||
// use explicit null stores
|
||||
// code size = 2 + 3*n bytes (n = number of fields to clear)
|
||||
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
|
||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
|
||||
movptr(Address(obj, i), t1_zero);
|
||||
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
||||
// use loop to null out the fields
|
||||
// code size = 16 bytes for even n (n = number of fields to clear)
|
||||
// initialize last object field first if odd number of fields
|
||||
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
|
||||
movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
|
||||
// initialize last object field if constant size is odd
|
||||
if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0)
|
||||
movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero);
|
||||
// initialize remaining object fields: rdx is a multiple of 2
|
||||
{ Label loop;
|
||||
bind(loop);
|
||||
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)),
|
||||
t1_zero);
|
||||
NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)),
|
||||
t1_zero);)
|
||||
decrement(index);
|
||||
jcc(Assembler::notZero, loop);
|
||||
if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
|
||||
// clear rest of allocated space
|
||||
const Register t1_zero = t1;
|
||||
const Register index = t2;
|
||||
const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below)
|
||||
if (var_size_in_bytes != noreg) {
|
||||
mov(index, var_size_in_bytes);
|
||||
initialize_body(obj, index, hdr_size_in_bytes, t1_zero);
|
||||
} else if (con_size_in_bytes <= threshold) {
|
||||
// use explicit null stores
|
||||
// code size = 2 + 3*n bytes (n = number of fields to clear)
|
||||
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
|
||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
|
||||
movptr(Address(obj, i), t1_zero);
|
||||
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
||||
// use loop to null out the fields
|
||||
// code size = 16 bytes for even n (n = number of fields to clear)
|
||||
// initialize last object field first if odd number of fields
|
||||
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
|
||||
movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
|
||||
// initialize last object field if constant size is odd
|
||||
if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0)
|
||||
movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero);
|
||||
// initialize remaining object fields: rdx is a multiple of 2
|
||||
{ Label loop;
|
||||
bind(loop);
|
||||
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)),
|
||||
t1_zero);
|
||||
NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)),
|
||||
t1_zero);)
|
||||
decrement(index);
|
||||
jcc(Assembler::notZero, loop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,8 @@
|
||||
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
||||
int con_size_in_bytes, // object size in bytes if known at compile time
|
||||
Register t1, // temp register
|
||||
Register t2 // temp register
|
||||
Register t2, // temp register
|
||||
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
|
||||
);
|
||||
|
||||
// allocation of fixed-size objects
|
||||
|
@ -1040,7 +1040,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
|
||||
__ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
|
||||
|
||||
__ initialize_object(obj, klass, obj_size, 0, t1, t2);
|
||||
__ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
|
||||
__ verify_oop(obj);
|
||||
__ pop(rbx);
|
||||
__ pop(rdi);
|
||||
@ -1053,7 +1053,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
__ eden_allocate(obj, obj_size, 0, t1, slow_path);
|
||||
__ incr_allocated_bytes(thread, obj_size, 0);
|
||||
|
||||
__ initialize_object(obj, klass, obj_size, 0, t1, t2);
|
||||
__ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
|
||||
__ verify_oop(obj);
|
||||
__ pop(rbx);
|
||||
__ pop(rdi);
|
||||
@ -1169,7 +1169,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
__ andptr(t1, Klass::_lh_header_size_mask);
|
||||
__ subptr(arr_size, t1); // body length
|
||||
__ addptr(t1, obj); // body start
|
||||
__ initialize_body(t1, arr_size, 0, t2);
|
||||
if (!ZeroTLAB) {
|
||||
__ initialize_body(t1, arr_size, 0, t2);
|
||||
}
|
||||
__ verify_oop(obj);
|
||||
__ ret(0);
|
||||
|
||||
|
@ -5426,7 +5426,7 @@ Register MacroAssembler::tlab_refill(Label& retry,
|
||||
Label& try_eden,
|
||||
Label& slow_case) {
|
||||
Register top = rax;
|
||||
Register t1 = rcx;
|
||||
Register t1 = rcx; // object size
|
||||
Register t2 = rsi;
|
||||
Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
|
||||
assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
|
||||
@ -5522,12 +5522,76 @@ Register MacroAssembler::tlab_refill(Label& retry,
|
||||
addptr(top, t1);
|
||||
subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
|
||||
movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
|
||||
|
||||
if (ZeroTLAB) {
|
||||
// This is a fast TLAB refill, therefore the GC is not notified of it.
|
||||
// So compiled code must fill the new TLAB with zeroes.
|
||||
movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
|
||||
zero_memory(top, t1, 0, t2);
|
||||
}
|
||||
|
||||
verify_tlab();
|
||||
jmp(retry);
|
||||
|
||||
return thread_reg; // for use by caller
|
||||
}
|
||||
|
||||
// Preserves the contents of address, destroys the contents length_in_bytes and temp.
|
||||
void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
|
||||
assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different");
|
||||
assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord");
|
||||
Label done;
|
||||
|
||||
testptr(length_in_bytes, length_in_bytes);
|
||||
jcc(Assembler::zero, done);
|
||||
|
||||
// initialize topmost word, divide index by 2, check if odd and test if zero
|
||||
// note: for the remaining code to work, index must be a multiple of BytesPerWord
|
||||
#ifdef ASSERT
|
||||
{
|
||||
Label L;
|
||||
testptr(length_in_bytes, BytesPerWord - 1);
|
||||
jcc(Assembler::zero, L);
|
||||
stop("length must be a multiple of BytesPerWord");
|
||||
bind(L);
|
||||
}
|
||||
#endif
|
||||
Register index = length_in_bytes;
|
||||
xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
|
||||
if (UseIncDec) {
|
||||
shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
|
||||
} else {
|
||||
shrptr(index, 2); // use 2 instructions to avoid partial flag stall
|
||||
shrptr(index, 1);
|
||||
}
|
||||
#ifndef _LP64
|
||||
// index could have not been a multiple of 8 (i.e., bit 2 was set)
|
||||
{
|
||||
Label even;
|
||||
// note: if index was a multiple of 8, then it cannot
|
||||
// be 0 now otherwise it must have been 0 before
|
||||
// => if it is even, we don't need to check for 0 again
|
||||
jcc(Assembler::carryClear, even);
|
||||
// clear topmost word (no jump would be needed if conditional assignment worked here)
|
||||
movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
|
||||
// index could be 0 now, must check again
|
||||
jcc(Assembler::zero, done);
|
||||
bind(even);
|
||||
}
|
||||
#endif // !_LP64
|
||||
// initialize remaining object fields: index is a multiple of 2 now
|
||||
{
|
||||
Label loop;
|
||||
bind(loop);
|
||||
movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
|
||||
NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
|
||||
decrement(index);
|
||||
jcc(Assembler::notZero, loop);
|
||||
}
|
||||
|
||||
bind(done);
|
||||
}
|
||||
|
||||
void MacroAssembler::incr_allocated_bytes(Register thread,
|
||||
Register var_size_in_bytes,
|
||||
int con_size_in_bytes,
|
||||
|
@ -522,6 +522,8 @@ class MacroAssembler: public Assembler {
|
||||
Label& slow_case // continuation point if fast allocation fails
|
||||
);
|
||||
Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
|
||||
void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
|
||||
|
||||
void incr_allocated_bytes(Register thread,
|
||||
Register var_size_in_bytes, int con_size_in_bytes,
|
||||
Register t1 = noreg);
|
||||
|
@ -105,7 +105,7 @@ void ThreadLocalAllocBuffer::accumulate_statistics() {
|
||||
// an illusion of a contiguous Eden and optionally retires the tlab.
|
||||
// Waste accounting should be done in caller as appropriate; see,
|
||||
// for example, clear_before_allocation().
|
||||
void ThreadLocalAllocBuffer::make_parsable(bool retire) {
|
||||
void ThreadLocalAllocBuffer::make_parsable(bool retire, bool zap) {
|
||||
if (end() != NULL) {
|
||||
invariants();
|
||||
|
||||
@ -113,7 +113,7 @@ void ThreadLocalAllocBuffer::make_parsable(bool retire) {
|
||||
myThread()->incr_allocated_bytes(used_bytes());
|
||||
}
|
||||
|
||||
CollectedHeap::fill_with_object(top(), hard_end(), retire);
|
||||
CollectedHeap::fill_with_object(top(), hard_end(), retire && zap);
|
||||
|
||||
if (retire || ZeroTLAB) { // "Reset" the TLAB
|
||||
set_start(NULL);
|
||||
|
@ -145,8 +145,8 @@ public:
|
||||
// Initialization at startup
|
||||
static void startup_initialization();
|
||||
|
||||
// Make an in-use tlab parsable, optionally also retiring it.
|
||||
void make_parsable(bool retire);
|
||||
// Make an in-use tlab parsable, optionally retiring and/or zapping it.
|
||||
void make_parsable(bool retire, bool zap = true);
|
||||
|
||||
// Retire in-use tlab before allocation of a new tlab
|
||||
void clear_before_allocation();
|
||||
|
@ -3077,7 +3077,7 @@ bool LibraryCallKit::inline_native_isInterrupted() {
|
||||
set_control( _gvn.transform(new IfTrueNode(iff_arg)));
|
||||
#else
|
||||
// To return true on Windows you must read the _interrupted field
|
||||
// and check the the event state i.e. take the slow path.
|
||||
// and check the event state i.e. take the slow path.
|
||||
#endif // TARGET_OS_FAMILY_windows
|
||||
|
||||
// (d) Otherwise, go to the slow path.
|
||||
|
@ -1813,10 +1813,11 @@ PhaseMacroExpand::initialize_object(AllocateNode* alloc,
|
||||
// there can be two Allocates to one Initialize. The answer in all these
|
||||
// edge cases is safety first. It is always safe to clear immediately
|
||||
// within an Allocate, and then (maybe or maybe not) clear some more later.
|
||||
if (!ZeroTLAB)
|
||||
if (!(UseTLAB && ZeroTLAB)) {
|
||||
rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
|
||||
header_size, size_in_bytes,
|
||||
&_igvn);
|
||||
}
|
||||
} else {
|
||||
if (!init->is_complete()) {
|
||||
// Try to win by zeroing only what the init does not store.
|
||||
|
@ -295,7 +295,7 @@ Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode*
|
||||
// out-edges of the dest, we need to avoid making derived pointers
|
||||
// from it until we have checked its uses.)
|
||||
if (ReduceBulkZeroing
|
||||
&& !ZeroTLAB // pointless if already zeroed
|
||||
&& !(UseTLAB && ZeroTLAB) // pointless if already zeroed
|
||||
&& basic_elem_type != T_CONFLICT // avoid corner case
|
||||
&& !src->eqv_uncast(dest)
|
||||
&& alloc != NULL
|
||||
|
@ -3850,7 +3850,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
|
||||
bool do_zeroing = true; // we might give up if inits are very sparse
|
||||
int big_init_gaps = 0; // how many large gaps have we seen?
|
||||
|
||||
if (ZeroTLAB) do_zeroing = false;
|
||||
if (UseTLAB && ZeroTLAB) do_zeroing = false;
|
||||
if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false;
|
||||
|
||||
for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
|
||||
@ -3951,7 +3951,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
|
||||
remove_extra_zeroes(); // clear out all the zmems left over
|
||||
add_req(inits);
|
||||
|
||||
if (!ZeroTLAB) {
|
||||
if (!(UseTLAB && ZeroTLAB)) {
|
||||
// If anything remains to be zeroed, zero it all now.
|
||||
zeroes_done = align_size_down(zeroes_done, BytesPerInt);
|
||||
// if it is the last unused 4 bytes of an instance, forget about it
|
||||
|
@ -288,6 +288,7 @@ hotspot_compiler_3 = \
|
||||
compiler/jsr292/ \
|
||||
compiler/loopopts/ \
|
||||
compiler/macronodes/ \
|
||||
compiler/memoryinitialization/ \
|
||||
compiler/osr/ \
|
||||
compiler/regalloc/ \
|
||||
compiler/runtime/ \
|
||||
|
37
hotspot/test/compiler/memoryinitialization/ZeroTLABTest.java
Normal file
37
hotspot/test/compiler/memoryinitialization/ZeroTLABTest.java
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8086053
|
||||
* @run main/othervm -Xcomp -XX:+UseTLAB -XX:+ZeroTLAB ZeroTLABTest
|
||||
* @run main/othervm -Xcomp -XX:+UseTLAB -XX:-ZeroTLAB ZeroTLABTest
|
||||
* @run main/othervm -Xcomp -XX:-UseTLAB -XX:+ZeroTLAB ZeroTLABTest
|
||||
* @run main/othervm -Xcomp -XX:-UseTLAB -XX:-ZeroTLAB ZeroTLABTest
|
||||
*/
|
||||
public class ZeroTLABTest {
|
||||
public static void main(String args[]) {
|
||||
System.out.println("Test PASSED");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user