8191989: JDK-8190862 work for arch sparc

Fixed Interpreter never refills TLAB

Reviewed-by: tschatzl, rehn
This commit is contained in:
Jean Christophe Beyler 2017-12-19 19:55:23 -08:00
parent ada54c7d1a
commit 1f15d2bb46

View File

@ -3270,11 +3270,19 @@ void TemplateTable::_new() {
__ br(Assembler::notZero, false, Assembler::pn, slow_case);
__ delayed()->nop();
// allocate the instance
// 1) Try to allocate in the TLAB
// 2) if fail, and the TLAB is not full enough to discard, allocate in the shared Eden
// 3) if the above fails (or is not applicable), go to a slow case
// (creates a new TLAB, etc.)
// Allocate the instance:
// If TLAB is enabled:
// Try to allocate in the TLAB.
// If fails, go to the slow path.
// Else If inline contiguous allocations are enabled:
// Try to allocate in eden.
// If fails due to heap end, go to slow path.
//
// If TLAB is enabled OR inline contiguous is enabled:
// Initialize the allocation.
// Exit.
//
// Go to slow path.
const bool allow_shared_alloc =
Universe::heap()->supports_inline_contig_alloc();
@ -3302,61 +3310,43 @@ void TemplateTable::_new() {
}
__ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
// Allocation does not fit in the TLAB.
__ ba_short(slow_case);
} else {
// Allocation in the shared Eden
if (allow_shared_alloc) {
// Check if tlab should be discarded (refill_waste_limit >= free)
__ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), RtlabWasteLimitValue);
__ sub(RendValue, RoldTopValue, RfreeValue);
__ srlx(RfreeValue, LogHeapWordSize, RfreeValue);
__ cmp_and_brx_short(RtlabWasteLimitValue, RfreeValue, Assembler::greaterEqualUnsigned, Assembler::pt, slow_case); // tlab waste is small
Register RoldTopValue = G1_scratch;
Register RtopAddr = G3_scratch;
Register RnewTopValue = RallocatedObject;
Register RendValue = Rscratch;
// increment waste limit to prevent getting stuck on this slow path
if (Assembler::is_simm13(ThreadLocalAllocBuffer::refill_waste_limit_increment())) {
__ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue);
} else {
// set64 does not use the temp register if the given constant is 32 bit. So
// we can just use any register; using G0 results in ignoring of the upper 32 bit
// of that value.
__ set64(ThreadLocalAllocBuffer::refill_waste_limit_increment(), G4_scratch, G0);
__ add(RtlabWasteLimitValue, G4_scratch, RtlabWasteLimitValue);
}
__ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
} else {
// No allocation in the shared eden.
__ ba_short(slow_case);
__ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
Label retry;
__ bind(retry);
__ set((intptr_t)Universe::heap()->end_addr(), RendValue);
__ ld_ptr(RendValue, 0, RendValue);
__ ld_ptr(RtopAddr, 0, RoldTopValue);
__ add(RoldTopValue, Roffset, RnewTopValue);
// RnewTopValue contains the top address after the new object
// has been allocated.
__ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
__ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
// if someone beat us on the allocation, try again, otherwise continue
__ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
// bump total bytes allocated by this thread
// RoldTopValue and RtopAddr are dead, so can use G1 and G3
__ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
}
}
// Allocation in the shared Eden
if (allow_shared_alloc) {
Register RoldTopValue = G1_scratch;
Register RtopAddr = G3_scratch;
Register RnewTopValue = RallocatedObject;
Register RendValue = Rscratch;
__ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
Label retry;
__ bind(retry);
__ set((intptr_t)Universe::heap()->end_addr(), RendValue);
__ ld_ptr(RendValue, 0, RendValue);
__ ld_ptr(RtopAddr, 0, RoldTopValue);
__ add(RoldTopValue, Roffset, RnewTopValue);
// RnewTopValue contains the top address after the new object
// has been allocated.
__ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
__ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
// if someone beat us on the allocation, try again, otherwise continue
__ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
// bump total bytes allocated by this thread
// RoldTopValue and RtopAddr are dead, so can use G1 and G3
__ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
}
if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
// If UseTLAB or allow_shared_alloc are true, the object is created above and
// there is an initialize need. Otherwise, skip and go to the slow path.
if (UseTLAB || allow_shared_alloc) {
// clear object fields
__ bind(initialize_object);
__ deccc(Roffset, sizeof(oopDesc));