8086053: Address inconsistencies regarding ZeroTLAB
Add zero-initialization to C1 for fast TLAB refills; strenghten C2 conditions for skipping zero-initialization. Reviewed-by: kvn, thartmann
This commit is contained in:
parent
5be1924e89
commit
dfa6539a6a
@ -205,12 +205,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
|
|||||||
|
|
||||||
|
|
||||||
void C1_MacroAssembler::initialize_body(Register base, Register index) {
|
void C1_MacroAssembler::initialize_body(Register base, Register index) {
|
||||||
assert_different_registers(base, index);
|
zero_memory(base, index);
|
||||||
Label loop;
|
|
||||||
bind(loop);
|
|
||||||
subcc(index, HeapWordSize, index);
|
|
||||||
brx(Assembler::greaterEqual, true, Assembler::pt, loop);
|
|
||||||
delayed()->st_ptr(G0, base, index);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -237,7 +232,7 @@ void C1_MacroAssembler::allocate_object(
|
|||||||
}
|
}
|
||||||
try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case);
|
try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case);
|
||||||
|
|
||||||
initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2);
|
initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2, /* is_tlab_allocated */ UseTLAB);
|
||||||
}
|
}
|
||||||
|
|
||||||
void C1_MacroAssembler::initialize_object(
|
void C1_MacroAssembler::initialize_object(
|
||||||
@ -246,7 +241,8 @@ void C1_MacroAssembler::initialize_object(
|
|||||||
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
||||||
int con_size_in_bytes, // object size in bytes if known at compile time
|
int con_size_in_bytes, // object size in bytes if known at compile time
|
||||||
Register t1, // temp register
|
Register t1, // temp register
|
||||||
Register t2 // temp register
|
Register t2, // temp register
|
||||||
|
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
|
||||||
) {
|
) {
|
||||||
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
|
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
|
||||||
|
|
||||||
@ -269,31 +265,33 @@ void C1_MacroAssembler::initialize_object(
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// initialize body
|
if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
|
||||||
const int threshold = 5 * HeapWordSize; // approximate break even point for code size
|
// initialize body
|
||||||
if (var_size_in_bytes != noreg) {
|
const int threshold = 5 * HeapWordSize; // approximate break even point for code size
|
||||||
// use a loop
|
if (var_size_in_bytes != noreg) {
|
||||||
add(obj, hdr_size_in_bytes, t1); // compute address of first element
|
// use a loop
|
||||||
sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body
|
add(obj, hdr_size_in_bytes, t1); // compute address of first element
|
||||||
initialize_body(t1, t2);
|
sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body
|
||||||
|
initialize_body(t1, t2);
|
||||||
#ifndef _LP64
|
#ifndef _LP64
|
||||||
} else if (con_size_in_bytes < threshold * 2) {
|
} else if (con_size_in_bytes < threshold * 2) {
|
||||||
// on v9 we can do double word stores to fill twice as much space.
|
// on v9 we can do double word stores to fill twice as much space.
|
||||||
assert(hdr_size_in_bytes % 8 == 0, "double word aligned");
|
assert(hdr_size_in_bytes % 8 == 0, "double word aligned");
|
||||||
assert(con_size_in_bytes % 8 == 0, "double word aligned");
|
assert(con_size_in_bytes % 8 == 0, "double word aligned");
|
||||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i);
|
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i);
|
||||||
#endif
|
#endif
|
||||||
} else if (con_size_in_bytes <= threshold) {
|
} else if (con_size_in_bytes <= threshold) {
|
||||||
// use explicit NULL stores
|
// use explicit NULL stores
|
||||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i);
|
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i);
|
||||||
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
||||||
// use a loop
|
// use a loop
|
||||||
const Register base = t1;
|
const Register base = t1;
|
||||||
const Register index = t2;
|
const Register index = t2;
|
||||||
add(obj, hdr_size_in_bytes, base); // compute address of first element
|
add(obj, hdr_size_in_bytes, base); // compute address of first element
|
||||||
// compute index = number of words to clear
|
// compute index = number of words to clear
|
||||||
set(con_size_in_bytes - hdr_size_in_bytes, index);
|
set(con_size_in_bytes - hdr_size_in_bytes, index);
|
||||||
initialize_body(base, index);
|
initialize_body(base, index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CURRENT_ENV->dtrace_alloc_probes()) {
|
if (CURRENT_ENV->dtrace_alloc_probes()) {
|
||||||
|
@ -50,7 +50,8 @@
|
|||||||
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
||||||
int con_size_in_bytes, // object size in bytes if known at compile time
|
int con_size_in_bytes, // object size in bytes if known at compile time
|
||||||
Register t1, // temp register
|
Register t1, // temp register
|
||||||
Register t2 // temp register
|
Register t2, // temp register
|
||||||
|
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
|
||||||
);
|
);
|
||||||
|
|
||||||
// allocation of fixed-size objects
|
// allocation of fixed-size objects
|
||||||
|
@ -435,7 +435,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
|||||||
|
|
||||||
__ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
|
__ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
|
||||||
|
|
||||||
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
|
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ true);
|
||||||
__ verify_oop(O0_obj);
|
__ verify_oop(O0_obj);
|
||||||
__ mov(O0, I0);
|
__ mov(O0, I0);
|
||||||
__ ret();
|
__ ret();
|
||||||
@ -447,7 +447,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
|||||||
__ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
|
__ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
|
||||||
__ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);
|
__ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);
|
||||||
|
|
||||||
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
|
__ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ false);
|
||||||
__ verify_oop(O0_obj);
|
__ verify_oop(O0_obj);
|
||||||
__ mov(O0, I0);
|
__ mov(O0, I0);
|
||||||
__ ret();
|
__ ret();
|
||||||
@ -542,7 +542,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
|||||||
__ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
|
__ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
|
||||||
__ sub(G1_arr_size, G3_t1, O1_t2); // body length
|
__ sub(G1_arr_size, G3_t1, O1_t2); // body length
|
||||||
__ add(O0_obj, G3_t1, G3_t1); // body start
|
__ add(O0_obj, G3_t1, G3_t1); // body start
|
||||||
__ initialize_body(G3_t1, O1_t2);
|
if (!ZeroTLAB) {
|
||||||
|
__ initialize_body(G3_t1, O1_t2);
|
||||||
|
}
|
||||||
__ verify_oop(O0_obj);
|
__ verify_oop(O0_obj);
|
||||||
__ retl();
|
__ retl();
|
||||||
__ delayed()->nop();
|
__ delayed()->nop();
|
||||||
|
@ -3459,11 +3459,27 @@ void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case
|
|||||||
add(top, t1, top); // t1 is tlab_size
|
add(top, t1, top); // t1 is tlab_size
|
||||||
sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
|
sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
|
||||||
st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
|
st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
|
||||||
|
|
||||||
|
if (ZeroTLAB) {
|
||||||
|
// This is a fast TLAB refill, therefore the GC is not notified of it.
|
||||||
|
// So compiled code must fill the new TLAB with zeroes.
|
||||||
|
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
|
||||||
|
zero_memory(t2, t1);
|
||||||
|
}
|
||||||
verify_tlab();
|
verify_tlab();
|
||||||
ba(retry);
|
ba(retry);
|
||||||
delayed()->nop();
|
delayed()->nop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::zero_memory(Register base, Register index) {
|
||||||
|
assert_different_registers(base, index);
|
||||||
|
Label loop;
|
||||||
|
bind(loop);
|
||||||
|
subcc(index, HeapWordSize, index);
|
||||||
|
brx(Assembler::greaterEqual, true, Assembler::pt, loop);
|
||||||
|
delayed()->st_ptr(G0, base, index);
|
||||||
|
}
|
||||||
|
|
||||||
void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
|
void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
|
||||||
Register t1, Register t2) {
|
Register t1, Register t2) {
|
||||||
// Bump total bytes allocated by this thread
|
// Bump total bytes allocated by this thread
|
||||||
|
@ -1278,6 +1278,7 @@ public:
|
|||||||
Label& slow_case // continuation point if fast allocation fails
|
Label& slow_case // continuation point if fast allocation fails
|
||||||
);
|
);
|
||||||
void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
|
void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
|
||||||
|
void zero_memory(Register base, Register index);
|
||||||
void incr_allocated_bytes(RegisterOrConstant size_in_bytes,
|
void incr_allocated_bytes(RegisterOrConstant size_in_bytes,
|
||||||
Register t1, Register t2);
|
Register t1, Register t2);
|
||||||
|
|
||||||
|
@ -182,54 +182,13 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
|
|||||||
|
|
||||||
// preserves obj, destroys len_in_bytes
|
// preserves obj, destroys len_in_bytes
|
||||||
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
|
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
|
||||||
|
assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
|
||||||
Label done;
|
Label done;
|
||||||
assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
|
|
||||||
assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
|
|
||||||
Register index = len_in_bytes;
|
|
||||||
// index is positive and ptr sized
|
|
||||||
subptr(index, hdr_size_in_bytes);
|
|
||||||
jcc(Assembler::zero, done);
|
|
||||||
// initialize topmost word, divide index by 2, check if odd and test if zero
|
|
||||||
// note: for the remaining code to work, index must be a multiple of BytesPerWord
|
|
||||||
#ifdef ASSERT
|
|
||||||
{ Label L;
|
|
||||||
testptr(index, BytesPerWord - 1);
|
|
||||||
jcc(Assembler::zero, L);
|
|
||||||
stop("index is not a multiple of BytesPerWord");
|
|
||||||
bind(L);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
xorptr(t1, t1); // use _zero reg to clear memory (shorter code)
|
|
||||||
if (UseIncDec) {
|
|
||||||
shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
|
|
||||||
} else {
|
|
||||||
shrptr(index, 2); // use 2 instructions to avoid partial flag stall
|
|
||||||
shrptr(index, 1);
|
|
||||||
}
|
|
||||||
#ifndef _LP64
|
|
||||||
// index could have been not a multiple of 8 (i.e., bit 2 was set)
|
|
||||||
{ Label even;
|
|
||||||
// note: if index was a multiple of 8, than it cannot
|
|
||||||
// be 0 now otherwise it must have been 0 before
|
|
||||||
// => if it is even, we don't need to check for 0 again
|
|
||||||
jcc(Assembler::carryClear, even);
|
|
||||||
// clear topmost word (no jump needed if conditional assignment would work here)
|
|
||||||
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 0*BytesPerWord), t1);
|
|
||||||
// index could be 0 now, need to check again
|
|
||||||
jcc(Assembler::zero, done);
|
|
||||||
bind(even);
|
|
||||||
}
|
|
||||||
#endif // !_LP64
|
|
||||||
// initialize remaining object fields: rdx is a multiple of 2 now
|
|
||||||
{ Label loop;
|
|
||||||
bind(loop);
|
|
||||||
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 1*BytesPerWord), t1);
|
|
||||||
NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 2*BytesPerWord), t1);)
|
|
||||||
decrement(index);
|
|
||||||
jcc(Assembler::notZero, loop);
|
|
||||||
}
|
|
||||||
|
|
||||||
// done
|
// len_in_bytes is positive and ptr sized
|
||||||
|
subptr(len_in_bytes, hdr_size_in_bytes);
|
||||||
|
jcc(Assembler::zero, done);
|
||||||
|
zero_memory(obj, len_in_bytes, hdr_size_in_bytes, t1);
|
||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -241,47 +200,49 @@ void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2,
|
|||||||
|
|
||||||
try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
|
try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
|
||||||
|
|
||||||
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
|
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
|
||||||
}
|
}
|
||||||
|
|
||||||
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
|
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
|
||||||
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
|
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
|
||||||
"con_size_in_bytes is not multiple of alignment");
|
"con_size_in_bytes is not multiple of alignment");
|
||||||
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
|
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
|
||||||
|
|
||||||
initialize_header(obj, klass, noreg, t1, t2);
|
initialize_header(obj, klass, noreg, t1, t2);
|
||||||
|
|
||||||
// clear rest of allocated space
|
if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
|
||||||
const Register t1_zero = t1;
|
// clear rest of allocated space
|
||||||
const Register index = t2;
|
const Register t1_zero = t1;
|
||||||
const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below)
|
const Register index = t2;
|
||||||
if (var_size_in_bytes != noreg) {
|
const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below)
|
||||||
mov(index, var_size_in_bytes);
|
if (var_size_in_bytes != noreg) {
|
||||||
initialize_body(obj, index, hdr_size_in_bytes, t1_zero);
|
mov(index, var_size_in_bytes);
|
||||||
} else if (con_size_in_bytes <= threshold) {
|
initialize_body(obj, index, hdr_size_in_bytes, t1_zero);
|
||||||
// use explicit null stores
|
} else if (con_size_in_bytes <= threshold) {
|
||||||
// code size = 2 + 3*n bytes (n = number of fields to clear)
|
// use explicit null stores
|
||||||
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
|
// code size = 2 + 3*n bytes (n = number of fields to clear)
|
||||||
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
|
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
|
||||||
movptr(Address(obj, i), t1_zero);
|
for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
|
||||||
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
movptr(Address(obj, i), t1_zero);
|
||||||
// use loop to null out the fields
|
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
||||||
// code size = 16 bytes for even n (n = number of fields to clear)
|
// use loop to null out the fields
|
||||||
// initialize last object field first if odd number of fields
|
// code size = 16 bytes for even n (n = number of fields to clear)
|
||||||
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
|
// initialize last object field first if odd number of fields
|
||||||
movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
|
xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
|
||||||
// initialize last object field if constant size is odd
|
movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
|
||||||
if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0)
|
// initialize last object field if constant size is odd
|
||||||
movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero);
|
if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0)
|
||||||
// initialize remaining object fields: rdx is a multiple of 2
|
movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero);
|
||||||
{ Label loop;
|
// initialize remaining object fields: rdx is a multiple of 2
|
||||||
bind(loop);
|
{ Label loop;
|
||||||
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)),
|
bind(loop);
|
||||||
t1_zero);
|
movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)),
|
||||||
NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)),
|
t1_zero);
|
||||||
t1_zero);)
|
NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)),
|
||||||
decrement(index);
|
t1_zero);)
|
||||||
jcc(Assembler::notZero, loop);
|
decrement(index);
|
||||||
|
jcc(Assembler::notZero, loop);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,7 +65,8 @@
|
|||||||
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
||||||
int con_size_in_bytes, // object size in bytes if known at compile time
|
int con_size_in_bytes, // object size in bytes if known at compile time
|
||||||
Register t1, // temp register
|
Register t1, // temp register
|
||||||
Register t2 // temp register
|
Register t2, // temp register
|
||||||
|
bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
|
||||||
);
|
);
|
||||||
|
|
||||||
// allocation of fixed-size objects
|
// allocation of fixed-size objects
|
||||||
|
@ -1040,7 +1040,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
|||||||
|
|
||||||
__ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
|
__ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
|
||||||
|
|
||||||
__ initialize_object(obj, klass, obj_size, 0, t1, t2);
|
__ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
|
||||||
__ verify_oop(obj);
|
__ verify_oop(obj);
|
||||||
__ pop(rbx);
|
__ pop(rbx);
|
||||||
__ pop(rdi);
|
__ pop(rdi);
|
||||||
@ -1053,7 +1053,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
|||||||
__ eden_allocate(obj, obj_size, 0, t1, slow_path);
|
__ eden_allocate(obj, obj_size, 0, t1, slow_path);
|
||||||
__ incr_allocated_bytes(thread, obj_size, 0);
|
__ incr_allocated_bytes(thread, obj_size, 0);
|
||||||
|
|
||||||
__ initialize_object(obj, klass, obj_size, 0, t1, t2);
|
__ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
|
||||||
__ verify_oop(obj);
|
__ verify_oop(obj);
|
||||||
__ pop(rbx);
|
__ pop(rbx);
|
||||||
__ pop(rdi);
|
__ pop(rdi);
|
||||||
@ -1169,7 +1169,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
|||||||
__ andptr(t1, Klass::_lh_header_size_mask);
|
__ andptr(t1, Klass::_lh_header_size_mask);
|
||||||
__ subptr(arr_size, t1); // body length
|
__ subptr(arr_size, t1); // body length
|
||||||
__ addptr(t1, obj); // body start
|
__ addptr(t1, obj); // body start
|
||||||
__ initialize_body(t1, arr_size, 0, t2);
|
if (!ZeroTLAB) {
|
||||||
|
__ initialize_body(t1, arr_size, 0, t2);
|
||||||
|
}
|
||||||
__ verify_oop(obj);
|
__ verify_oop(obj);
|
||||||
__ ret(0);
|
__ ret(0);
|
||||||
|
|
||||||
|
@ -5426,7 +5426,7 @@ Register MacroAssembler::tlab_refill(Label& retry,
|
|||||||
Label& try_eden,
|
Label& try_eden,
|
||||||
Label& slow_case) {
|
Label& slow_case) {
|
||||||
Register top = rax;
|
Register top = rax;
|
||||||
Register t1 = rcx;
|
Register t1 = rcx; // object size
|
||||||
Register t2 = rsi;
|
Register t2 = rsi;
|
||||||
Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
|
Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
|
||||||
assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
|
assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
|
||||||
@ -5522,12 +5522,76 @@ Register MacroAssembler::tlab_refill(Label& retry,
|
|||||||
addptr(top, t1);
|
addptr(top, t1);
|
||||||
subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
|
subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
|
||||||
movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
|
movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
|
||||||
|
|
||||||
|
if (ZeroTLAB) {
|
||||||
|
// This is a fast TLAB refill, therefore the GC is not notified of it.
|
||||||
|
// So compiled code must fill the new TLAB with zeroes.
|
||||||
|
movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
|
||||||
|
zero_memory(top, t1, 0, t2);
|
||||||
|
}
|
||||||
|
|
||||||
verify_tlab();
|
verify_tlab();
|
||||||
jmp(retry);
|
jmp(retry);
|
||||||
|
|
||||||
return thread_reg; // for use by caller
|
return thread_reg; // for use by caller
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Preserves the contents of address, destroys the contents length_in_bytes and temp.
|
||||||
|
void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
|
||||||
|
assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different");
|
||||||
|
assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord");
|
||||||
|
Label done;
|
||||||
|
|
||||||
|
testptr(length_in_bytes, length_in_bytes);
|
||||||
|
jcc(Assembler::zero, done);
|
||||||
|
|
||||||
|
// initialize topmost word, divide index by 2, check if odd and test if zero
|
||||||
|
// note: for the remaining code to work, index must be a multiple of BytesPerWord
|
||||||
|
#ifdef ASSERT
|
||||||
|
{
|
||||||
|
Label L;
|
||||||
|
testptr(length_in_bytes, BytesPerWord - 1);
|
||||||
|
jcc(Assembler::zero, L);
|
||||||
|
stop("length must be a multiple of BytesPerWord");
|
||||||
|
bind(L);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
Register index = length_in_bytes;
|
||||||
|
xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
|
||||||
|
if (UseIncDec) {
|
||||||
|
shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
|
||||||
|
} else {
|
||||||
|
shrptr(index, 2); // use 2 instructions to avoid partial flag stall
|
||||||
|
shrptr(index, 1);
|
||||||
|
}
|
||||||
|
#ifndef _LP64
|
||||||
|
// index could have not been a multiple of 8 (i.e., bit 2 was set)
|
||||||
|
{
|
||||||
|
Label even;
|
||||||
|
// note: if index was a multiple of 8, then it cannot
|
||||||
|
// be 0 now otherwise it must have been 0 before
|
||||||
|
// => if it is even, we don't need to check for 0 again
|
||||||
|
jcc(Assembler::carryClear, even);
|
||||||
|
// clear topmost word (no jump would be needed if conditional assignment worked here)
|
||||||
|
movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
|
||||||
|
// index could be 0 now, must check again
|
||||||
|
jcc(Assembler::zero, done);
|
||||||
|
bind(even);
|
||||||
|
}
|
||||||
|
#endif // !_LP64
|
||||||
|
// initialize remaining object fields: index is a multiple of 2 now
|
||||||
|
{
|
||||||
|
Label loop;
|
||||||
|
bind(loop);
|
||||||
|
movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
|
||||||
|
NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
|
||||||
|
decrement(index);
|
||||||
|
jcc(Assembler::notZero, loop);
|
||||||
|
}
|
||||||
|
|
||||||
|
bind(done);
|
||||||
|
}
|
||||||
|
|
||||||
void MacroAssembler::incr_allocated_bytes(Register thread,
|
void MacroAssembler::incr_allocated_bytes(Register thread,
|
||||||
Register var_size_in_bytes,
|
Register var_size_in_bytes,
|
||||||
int con_size_in_bytes,
|
int con_size_in_bytes,
|
||||||
|
@ -522,6 +522,8 @@ class MacroAssembler: public Assembler {
|
|||||||
Label& slow_case // continuation point if fast allocation fails
|
Label& slow_case // continuation point if fast allocation fails
|
||||||
);
|
);
|
||||||
Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
|
Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
|
||||||
|
void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
|
||||||
|
|
||||||
void incr_allocated_bytes(Register thread,
|
void incr_allocated_bytes(Register thread,
|
||||||
Register var_size_in_bytes, int con_size_in_bytes,
|
Register var_size_in_bytes, int con_size_in_bytes,
|
||||||
Register t1 = noreg);
|
Register t1 = noreg);
|
||||||
|
@ -105,7 +105,7 @@ void ThreadLocalAllocBuffer::accumulate_statistics() {
|
|||||||
// an illusion of a contiguous Eden and optionally retires the tlab.
|
// an illusion of a contiguous Eden and optionally retires the tlab.
|
||||||
// Waste accounting should be done in caller as appropriate; see,
|
// Waste accounting should be done in caller as appropriate; see,
|
||||||
// for example, clear_before_allocation().
|
// for example, clear_before_allocation().
|
||||||
void ThreadLocalAllocBuffer::make_parsable(bool retire) {
|
void ThreadLocalAllocBuffer::make_parsable(bool retire, bool zap) {
|
||||||
if (end() != NULL) {
|
if (end() != NULL) {
|
||||||
invariants();
|
invariants();
|
||||||
|
|
||||||
@ -113,7 +113,7 @@ void ThreadLocalAllocBuffer::make_parsable(bool retire) {
|
|||||||
myThread()->incr_allocated_bytes(used_bytes());
|
myThread()->incr_allocated_bytes(used_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
CollectedHeap::fill_with_object(top(), hard_end(), retire);
|
CollectedHeap::fill_with_object(top(), hard_end(), retire && zap);
|
||||||
|
|
||||||
if (retire || ZeroTLAB) { // "Reset" the TLAB
|
if (retire || ZeroTLAB) { // "Reset" the TLAB
|
||||||
set_start(NULL);
|
set_start(NULL);
|
||||||
|
@ -145,8 +145,8 @@ public:
|
|||||||
// Initialization at startup
|
// Initialization at startup
|
||||||
static void startup_initialization();
|
static void startup_initialization();
|
||||||
|
|
||||||
// Make an in-use tlab parsable, optionally also retiring it.
|
// Make an in-use tlab parsable, optionally retiring and/or zapping it.
|
||||||
void make_parsable(bool retire);
|
void make_parsable(bool retire, bool zap = true);
|
||||||
|
|
||||||
// Retire in-use tlab before allocation of a new tlab
|
// Retire in-use tlab before allocation of a new tlab
|
||||||
void clear_before_allocation();
|
void clear_before_allocation();
|
||||||
|
@ -3077,7 +3077,7 @@ bool LibraryCallKit::inline_native_isInterrupted() {
|
|||||||
set_control( _gvn.transform(new IfTrueNode(iff_arg)));
|
set_control( _gvn.transform(new IfTrueNode(iff_arg)));
|
||||||
#else
|
#else
|
||||||
// To return true on Windows you must read the _interrupted field
|
// To return true on Windows you must read the _interrupted field
|
||||||
// and check the the event state i.e. take the slow path.
|
// and check the event state i.e. take the slow path.
|
||||||
#endif // TARGET_OS_FAMILY_windows
|
#endif // TARGET_OS_FAMILY_windows
|
||||||
|
|
||||||
// (d) Otherwise, go to the slow path.
|
// (d) Otherwise, go to the slow path.
|
||||||
|
@ -1813,10 +1813,11 @@ PhaseMacroExpand::initialize_object(AllocateNode* alloc,
|
|||||||
// there can be two Allocates to one Initialize. The answer in all these
|
// there can be two Allocates to one Initialize. The answer in all these
|
||||||
// edge cases is safety first. It is always safe to clear immediately
|
// edge cases is safety first. It is always safe to clear immediately
|
||||||
// within an Allocate, and then (maybe or maybe not) clear some more later.
|
// within an Allocate, and then (maybe or maybe not) clear some more later.
|
||||||
if (!ZeroTLAB)
|
if (!(UseTLAB && ZeroTLAB)) {
|
||||||
rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
|
rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
|
||||||
header_size, size_in_bytes,
|
header_size, size_in_bytes,
|
||||||
&_igvn);
|
&_igvn);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!init->is_complete()) {
|
if (!init->is_complete()) {
|
||||||
// Try to win by zeroing only what the init does not store.
|
// Try to win by zeroing only what the init does not store.
|
||||||
|
@ -295,7 +295,7 @@ Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode*
|
|||||||
// out-edges of the dest, we need to avoid making derived pointers
|
// out-edges of the dest, we need to avoid making derived pointers
|
||||||
// from it until we have checked its uses.)
|
// from it until we have checked its uses.)
|
||||||
if (ReduceBulkZeroing
|
if (ReduceBulkZeroing
|
||||||
&& !ZeroTLAB // pointless if already zeroed
|
&& !(UseTLAB && ZeroTLAB) // pointless if already zeroed
|
||||||
&& basic_elem_type != T_CONFLICT // avoid corner case
|
&& basic_elem_type != T_CONFLICT // avoid corner case
|
||||||
&& !src->eqv_uncast(dest)
|
&& !src->eqv_uncast(dest)
|
||||||
&& alloc != NULL
|
&& alloc != NULL
|
||||||
|
@ -3850,7 +3850,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
|
|||||||
bool do_zeroing = true; // we might give up if inits are very sparse
|
bool do_zeroing = true; // we might give up if inits are very sparse
|
||||||
int big_init_gaps = 0; // how many large gaps have we seen?
|
int big_init_gaps = 0; // how many large gaps have we seen?
|
||||||
|
|
||||||
if (ZeroTLAB) do_zeroing = false;
|
if (UseTLAB && ZeroTLAB) do_zeroing = false;
|
||||||
if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false;
|
if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false;
|
||||||
|
|
||||||
for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
|
for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
|
||||||
@ -3951,7 +3951,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
|
|||||||
remove_extra_zeroes(); // clear out all the zmems left over
|
remove_extra_zeroes(); // clear out all the zmems left over
|
||||||
add_req(inits);
|
add_req(inits);
|
||||||
|
|
||||||
if (!ZeroTLAB) {
|
if (!(UseTLAB && ZeroTLAB)) {
|
||||||
// If anything remains to be zeroed, zero it all now.
|
// If anything remains to be zeroed, zero it all now.
|
||||||
zeroes_done = align_size_down(zeroes_done, BytesPerInt);
|
zeroes_done = align_size_down(zeroes_done, BytesPerInt);
|
||||||
// if it is the last unused 4 bytes of an instance, forget about it
|
// if it is the last unused 4 bytes of an instance, forget about it
|
||||||
|
@ -288,6 +288,7 @@ hotspot_compiler_3 = \
|
|||||||
compiler/jsr292/ \
|
compiler/jsr292/ \
|
||||||
compiler/loopopts/ \
|
compiler/loopopts/ \
|
||||||
compiler/macronodes/ \
|
compiler/macronodes/ \
|
||||||
|
compiler/memoryinitialization/ \
|
||||||
compiler/osr/ \
|
compiler/osr/ \
|
||||||
compiler/regalloc/ \
|
compiler/regalloc/ \
|
||||||
compiler/runtime/ \
|
compiler/runtime/ \
|
||||||
|
37
hotspot/test/compiler/memoryinitialization/ZeroTLABTest.java
Normal file
37
hotspot/test/compiler/memoryinitialization/ZeroTLABTest.java
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8086053
|
||||||
|
* @run main/othervm -Xcomp -XX:+UseTLAB -XX:+ZeroTLAB ZeroTLABTest
|
||||||
|
* @run main/othervm -Xcomp -XX:+UseTLAB -XX:-ZeroTLAB ZeroTLABTest
|
||||||
|
* @run main/othervm -Xcomp -XX:-UseTLAB -XX:+ZeroTLAB ZeroTLABTest
|
||||||
|
* @run main/othervm -Xcomp -XX:-UseTLAB -XX:-ZeroTLAB ZeroTLABTest
|
||||||
|
*/
|
||||||
|
public class ZeroTLABTest {
|
||||||
|
public static void main(String args[]) {
|
||||||
|
System.out.println("Test PASSED");
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user