From 1f15d2bb46c67d85ab8744686918a1ca056cc3d2 Mon Sep 17 00:00:00 2001 From: Jean Christophe Beyler Date: Tue, 19 Dec 2017 19:55:23 -0800 Subject: [PATCH] 8191989: JDK-8190862 work for arch sparc Fixed Interpreter never refills TLAB Reviewed-by: tschatzl, rehn --- src/hotspot/cpu/sparc/templateTable_sparc.cpp | 100 ++++++++---------- 1 file changed, 45 insertions(+), 55 deletions(-) diff --git a/src/hotspot/cpu/sparc/templateTable_sparc.cpp b/src/hotspot/cpu/sparc/templateTable_sparc.cpp index 92c453e6f6a..f478fac9383 100644 --- a/src/hotspot/cpu/sparc/templateTable_sparc.cpp +++ b/src/hotspot/cpu/sparc/templateTable_sparc.cpp @@ -3270,11 +3270,19 @@ void TemplateTable::_new() { __ br(Assembler::notZero, false, Assembler::pn, slow_case); __ delayed()->nop(); - // allocate the instance - // 1) Try to allocate in the TLAB - // 2) if fail, and the TLAB is not full enough to discard, allocate in the shared Eden - // 3) if the above fails (or is not applicable), go to a slow case - // (creates a new TLAB, etc.) + // Allocate the instance: + // If TLAB is enabled: + // Try to allocate in the TLAB. + // If fails, go to the slow path. + // Else If inline contiguous allocations are enabled: + // Try to allocate in eden. + // If fails due to heap end, go to slow path. + // + // If TLAB is enabled OR inline contiguous is enabled: + // Initialize the allocation. + // Exit. + // + // Go to slow path. const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc(); @@ -3302,61 +3310,43 @@ void TemplateTable::_new() { } __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset())); + // Allocation does not fit in the TLAB. + __ ba_short(slow_case); + } else { + // Allocation in the shared Eden if (allow_shared_alloc) { - // Check if tlab should be discarded (refill_waste_limit >= free) - __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), RtlabWasteLimitValue); - __ sub(RendValue, RoldTopValue, RfreeValue); - __ srlx(RfreeValue, LogHeapWordSize, RfreeValue); - __ cmp_and_brx_short(RtlabWasteLimitValue, RfreeValue, Assembler::greaterEqualUnsigned, Assembler::pt, slow_case); // tlab waste is small + Register RoldTopValue = G1_scratch; + Register RtopAddr = G3_scratch; + Register RnewTopValue = RallocatedObject; + Register RendValue = Rscratch; - // increment waste limit to prevent getting stuck on this slow path - if (Assembler::is_simm13(ThreadLocalAllocBuffer::refill_waste_limit_increment())) { - __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue); - } else { - // set64 does not use the temp register if the given constant is 32 bit. So - // we can just use any register; using G0 results in ignoring of the upper 32 bit - // of that value. - __ set64(ThreadLocalAllocBuffer::refill_waste_limit_increment(), G4_scratch, G0); - __ add(RtlabWasteLimitValue, G4_scratch, RtlabWasteLimitValue); - } - __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); - } else { - // No allocation in the shared eden. - __ ba_short(slow_case); + __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr); + + Label retry; + __ bind(retry); + __ set((intptr_t)Universe::heap()->end_addr(), RendValue); + __ ld_ptr(RendValue, 0, RendValue); + __ ld_ptr(RtopAddr, 0, RoldTopValue); + __ add(RoldTopValue, Roffset, RnewTopValue); + + // RnewTopValue contains the top address after the new object + // has been allocated. + __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case); + + __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue); + + // if someone beat us on the allocation, try again, otherwise continue + __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry); + + // bump total bytes allocated by this thread + // RoldTopValue and RtopAddr are dead, so can use G1 and G3 + __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch); } } - // Allocation in the shared Eden - if (allow_shared_alloc) { - Register RoldTopValue = G1_scratch; - Register RtopAddr = G3_scratch; - Register RnewTopValue = RallocatedObject; - Register RendValue = Rscratch; - - __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr); - - Label retry; - __ bind(retry); - __ set((intptr_t)Universe::heap()->end_addr(), RendValue); - __ ld_ptr(RendValue, 0, RendValue); - __ ld_ptr(RtopAddr, 0, RoldTopValue); - __ add(RoldTopValue, Roffset, RnewTopValue); - - // RnewTopValue contains the top address after the new object - // has been allocated. - __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case); - - __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue); - - // if someone beat us on the allocation, try again, otherwise continue - __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry); - - // bump total bytes allocated by this thread - // RoldTopValue and RtopAddr are dead, so can use G1 and G3 - __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch); - } - - if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { + // If UseTLAB or allow_shared_alloc are true, the object is created above and + // there is an initialize need. Otherwise, skip and go to the slow path. + if (UseTLAB || allow_shared_alloc) { // clear object fields __ bind(initialize_object); __ deccc(Roffset, sizeof(oopDesc));