From e81359f14802ef520ad4dbb01202a74313c9dc7f Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Thu, 17 Nov 2022 08:23:57 +0000 Subject: [PATCH] 8296170: Refactor stack-locking path in C2_MacroAssembler::fast_unlock() Reviewed-by: thartmann, phh --- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 34 +++---------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 2154e867d92..ec03a1e4844 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -704,11 +704,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp #if INCLUDE_RTM_OPT } // use_rtm() #endif - // DONE_LABEL is a hot target - we'd really like to place it at the - // start of cache line by padding with NOPs. - // See the AMD and Intel software optimization manuals for the - // most efficient "long" NOP encodings. - // Unfortunately none of our alignment mechanisms suffice. bind(DONE_LABEL); // ZFlag == 1 count in fast path @@ -770,7 +765,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t assert(boxReg == rax, ""); assert_different_registers(objReg, boxReg, tmpReg); - Label DONE_LABEL, Stacked, CheckSucc, COUNT, NO_COUNT; + Label DONE_LABEL, Stacked, COUNT, NO_COUNT; #if INCLUDE_RTM_OPT if (UseRTMForStackLocks && use_rtm) { @@ -829,8 +824,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // each other and there's no need for an explicit barrier (fence). // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. #ifndef _LP64 - get_thread (boxReg); - // Note that we could employ various encoding schemes to reduce // the number of loads below (currently 4) to just 2 or 3. // Refer to the comments in synchronizer.cpp. @@ -840,30 +833,12 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t jccb (Assembler::notZero, DONE_LABEL); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - jccb (Assembler::notZero, CheckSucc); + jccb (Assembler::notZero, DONE_LABEL); movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); jmpb (DONE_LABEL); - - bind (Stacked); - // It's not inflated and it's not recursively stack-locked. - // It must be stack-locked. - // Try to reset the header to displaced header. - // The "box" value on the stack is stable, so we can reload - // and be assured we observe the same value as above. - movptr(tmpReg, Address(boxReg, 0)); - lock(); - cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box - // Intention fall-thru into DONE_LABEL - - // DONE_LABEL is a hot target - we'd really like to place it at the - // start of cache line by padding with NOPs. - // See the AMD and Intel software optimization manuals for the - // most efficient "long" NOP encodings. - // Unfortunately none of our alignment mechanisms suffice. - bind (CheckSucc); #else // _LP64 // It's inflated - Label LNotRecursive, LSuccess, LGoSlowPath; + Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath; cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0); jccb(Assembler::equal, LNotRecursive); @@ -936,13 +911,14 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL); +#endif if (!UseHeavyMonitors) { bind (Stacked); movptr(tmpReg, Address (boxReg, 0)); // re-fetch lock(); cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box + // Intentional fall-thru into DONE_LABEL } -#endif bind(DONE_LABEL); // ZFlag == 1 count in fast path