From 18cea823a173e1b8b48d276daeca67b2a5cf3584 Mon Sep 17 00:00:00 2001 From: Axel Boldt-Christmas Date: Fri, 16 Feb 2024 07:18:31 +0000 Subject: [PATCH] 8319801: Recursive lightweight locking: aarch64 implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Stefan Karlsson Co-authored-by: Erik Ă–sterlund Reviewed-by: rkennke, coleenp, dcubed, aph --- src/hotspot/cpu/aarch64/aarch64.ad | 37 ++- .../cpu/aarch64/c1_MacroAssembler_aarch64.cpp | 11 +- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 301 ++++++++++++++++-- .../cpu/aarch64/c2_MacroAssembler_aarch64.hpp | 6 +- .../cpu/aarch64/interp_masm_aarch64.cpp | 19 +- .../cpu/aarch64/macroAssembler_aarch64.cpp | 158 +++++---- .../cpu/aarch64/macroAssembler_aarch64.hpp | 6 +- .../cpu/aarch64/sharedRuntime_aarch64.cpp | 5 +- .../cpu/aarch64/vm_version_aarch64.hpp | 3 +- 9 files changed, 410 insertions(+), 136 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 501d9df08c1..a07ff041c48 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -16433,13 +16433,12 @@ instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl) instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2, iRegPNoSp tmp3) %{ + predicate(LockingMode != LM_LIGHTWEIGHT); match(Set cr (FastLock object box)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - // TODO - // identify correct cost ins_cost(5 * INSN_COST); - format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %} + format %{ "fastlock $object,$box\t! kills $tmp,$tmp2,$tmp3" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $tmp2$$Register, $tmp3$$Register); @@ -16450,6 +16449,7 @@ instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegP instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) %{ + predicate(LockingMode != LM_LIGHTWEIGHT); match(Set cr (FastUnlock object box)); effect(TEMP tmp, TEMP tmp2); @@ -16463,6 +16463,37 @@ instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRe ins_pipe(pipe_serial); %} +instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) +%{ + predicate(LockingMode == LM_LIGHTWEIGHT); + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP tmp2); + + ins_cost(5 * INSN_COST); + format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %} + + ins_encode %{ + __ fast_lock_lightweight($object$$Register, $box$$Register, $tmp$$Register, $tmp2$$Register); + %} + + ins_pipe(pipe_serial); +%} + +instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) +%{ + predicate(LockingMode == LM_LIGHTWEIGHT); + match(Set cr (FastUnlock object box)); + effect(TEMP tmp, TEMP tmp2); + + ins_cost(5 * INSN_COST); + format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %} + + ins_encode %{ + __ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp$$Register, $tmp2$$Register); + %} + + ins_pipe(pipe_serial); +%} // ============================================================================ // Safepoint Instructions diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index c0d1d1747ab..50d35f281e5 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -80,12 +80,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr br(Assembler::NE, slow_case); } - // Load object header - ldr(hdr, Address(obj, hdr_offset)); if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(obj, hdr, temp, rscratch2, slow_case); } else if (LockingMode == LM_LEGACY) { Label done; + // Load object header + ldr(hdr, Address(obj, hdr_offset)); // and mark it as unlocked orr(hdr, hdr, markWord::unlocked_value); // save unlocked object header into the displaced header location on the stack @@ -144,11 +144,6 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ verify_oop(obj); if (LockingMode == LM_LIGHTWEIGHT) { - ldr(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); - // We cannot use tbnz here, the target might be too far away and cannot - // be encoded. - tst(hdr, markWord::monitor_value); - br(Assembler::NE, slow_case); lightweight_unlock(obj, hdr, temp, rscratch2, slow_case); } else if (LockingMode == LM_LEGACY) { // test if object header is pointing to the displaced header, and if so, restore diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 8910fba97a5..7e3ceb1f020 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -32,6 +32,7 @@ #include "opto/output.hpp" #include "opto/subnode.hpp" #include "runtime/stubRoutines.hpp" +#include "utilities/globalDefinitions.hpp" #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ @@ -55,6 +56,7 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register Label object_has_monitor; Label count, no_count; + assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight"); assert_different_registers(oop, box, tmp, disp_hdr); // Load markWord from object into displaced_header. @@ -73,7 +75,8 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register if (LockingMode == LM_MONITOR) { tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0. b(cont); - } else if (LockingMode == LM_LEGACY) { + } else { + assert(LockingMode == LM_LEGACY, "must be"); // Set tmp to be (markWord of object | UNLOCK_VALUE). orr(tmp, disp_hdr, markWord::unlocked_value); @@ -102,10 +105,6 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); b(cont); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - lightweight_lock(oop, disp_hdr, tmp, tmp3Reg, no_count); - b(count); } // Handle existing monitor. @@ -119,14 +118,13 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true, /*release*/ true, /*weak*/ false, tmp3Reg); // Sets flags for result - if (LockingMode != LM_LIGHTWEIGHT) { - // Store a non-null value into the box to avoid looking like a re-entrant - // lock. The fast-path monitor unlock code checks for - // markWord::monitor_value so use markWord::unused_mark which has the - // relevant bit set, and also matches ObjectSynchronizer::enter. - mov(tmp, (address)markWord::unused_mark().value()); - str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - } + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for + // markWord::monitor_value so use markWord::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::enter. + mov(tmp, (address)markWord::unused_mark().value()); + str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + br(Assembler::EQ, cont); // CAS success means locking succeeded cmp(tmp3Reg, rthread); @@ -157,6 +155,7 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe Label object_has_monitor; Label count, no_count; + assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight"); assert_different_registers(oop, box, tmp, disp_hdr); if (LockingMode == LM_LEGACY) { @@ -175,7 +174,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe if (LockingMode == LM_MONITOR) { tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0. b(cont); - } else if (LockingMode == LM_LEGACY) { + } else { + assert(LockingMode == LM_LEGACY, "must be"); // Check if it is still a light weight lock, this is is true if we // see the stack address of the basicLock in the markWord of the // object. @@ -183,10 +183,6 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false, tmp); b(cont); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - lightweight_unlock(oop, tmp, box, disp_hdr, no_count); - b(count); } assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); @@ -196,19 +192,6 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe STATIC_ASSERT(markWord::monitor_value <= INT_MAX); add(tmp, tmp, -(int)markWord::monitor_value); // monitor - if (LockingMode == LM_LIGHTWEIGHT) { - // If the owner is anonymous, we need to fix it -- in an outline stub. - Register tmp2 = disp_hdr; - ldr(tmp2, Address(tmp, ObjectMonitor::owner_offset())); - // We cannot use tbnz here, the target might be too far away and cannot - // be encoded. - tst(tmp2, (uint64_t)ObjectMonitor::ANONYMOUS_OWNER); - C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2); - Compile::current()->output()->add_stub(stub); - br(Assembler::NE, stub->entry()); - bind(stub->continuation()); - } - ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); Label notRecursive; @@ -241,6 +224,262 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe bind(no_count); } +void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register t1, + Register t2, Register t3) { + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + assert_different_registers(obj, t1, t2, t3); + + // Handle inflated monitor. + Label inflated; + // Finish fast lock successfully. MUST branch to with flag == EQ + Label locked; + // Finish fast lock unsuccessfully. MUST branch to with flag == NE + Label slow_path; + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(t1, obj); + ldrw(t1, Address(t1, Klass::access_flags_offset())); + tstw(t1, JVM_ACC_IS_VALUE_BASED_CLASS); + br(Assembler::NE, slow_path); + } + + const Register t1_mark = t1; + + { // Lightweight locking + + // Push lock to the lock stack and finish successfully. MUST branch to with flag == EQ + Label push; + + const Register t2_top = t2; + const Register t3_t = t3; + + // Check if lock-stack is full. + ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); + cmpw(t2_top, (unsigned)LockStack::end_offset() - 1); + br(Assembler::GT, slow_path); + + // Check if recursive. + subw(t3_t, t2_top, oopSize); + ldr(t3_t, Address(rthread, t3_t)); + cmp(obj, t3_t); + br(Assembler::EQ, push); + + // Relaxed normal load to check for monitor. Optimization for monitor case. + ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); + tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated); + + // Not inflated + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea"); + + // Try to lock. Transition lock-bits 0b01 => 0b00 + orr(t1_mark, t1_mark, markWord::unlocked_value); + eor(t3_t, t1_mark, markWord::unlocked_value); + cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword, + /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg); + br(Assembler::NE, slow_path); + + bind(push); + // After successful lock, push object on lock-stack. + str(obj, Address(rthread, t2_top)); + addw(t2_top, t2_top, oopSize); + strw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); + b(locked); + } + + { // Handle inflated monitor. + bind(inflated); + + // mark contains the tagged ObjectMonitor*. + const Register t1_tagged_monitor = t1_mark; + const uintptr_t monitor_tag = markWord::monitor_value; + const Register t2_owner_addr = t2; + const Register t3_owner = t3; + + // Compute owner address. + lea(t2_owner_addr, Address(t1_tagged_monitor, (in_bytes(ObjectMonitor::owner_offset()) - monitor_tag))); + + // CAS owner (null => current thread). + cmpxchg(t2_owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true, + /*release*/ false, /*weak*/ false, t3_owner); + br(Assembler::EQ, locked); + + // Check if recursive. + cmp(t3_owner, rthread); + br(Assembler::NE, slow_path); + + // Recursive. + increment(Address(t1_tagged_monitor, in_bytes(ObjectMonitor::recursions_offset()) - monitor_tag), 1); + } + + bind(locked); + increment(Address(rthread, JavaThread::held_monitor_count_offset())); + +#ifdef ASSERT + // Check that locked label is reached with Flags == EQ. + Label flag_correct; + br(Assembler::EQ, flag_correct); + stop("Fast Lock Flag != EQ"); +#endif + + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with Flags == NE. + br(Assembler::NE, flag_correct); + stop("Fast Lock Flag != NE"); + bind(flag_correct); +#endif + // C2 uses the value of Flags (NE vs EQ) to determine the continuation. +} + +void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Register t2, + Register t3) { + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + assert_different_registers(obj, t1, t2, t3); + + // Handle inflated monitor. + Label inflated, inflated_load_monitor; + // Finish fast unlock successfully. MUST branch to with flag == EQ + Label unlocked; + // Finish fast unlock unsuccessfully. MUST branch to with flag == NE + Label slow_path; + + const Register t1_mark = t1; + const Register t2_top = t2; + const Register t3_t = t3; + + { // Lightweight unlock + + // Check if obj is top of lock-stack. + ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); + subw(t2_top, t2_top, oopSize); + ldr(t3_t, Address(rthread, t2_top)); + cmp(obj, t3_t); + // Top of lock stack was not obj. Must be monitor. + br(Assembler::NE, inflated_load_monitor); + + // Pop lock-stack. + DEBUG_ONLY(str(zr, Address(rthread, t2_top));) + strw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); + + // Check if recursive. + subw(t3_t, t2_top, oopSize); + ldr(t3_t, Address(rthread, t3_t)); + cmp(obj, t3_t); + br(Assembler::EQ, unlocked); + + // Not recursive. + // Load Mark. + ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); + + // Check header for monitor (0b10). + tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated); + + // Try to unlock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); + orr(t3_t, t1_mark, markWord::unlocked_value); + cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword, + /*acquire*/ false, /*release*/ true, /*weak*/ false, noreg); + br(Assembler::EQ, unlocked); + + // Compare and exchange failed. + // Restore lock-stack and handle the unlock in runtime. + DEBUG_ONLY(str(obj, Address(rthread, t2_top));) + addw(t2_top, t2_top, oopSize); + str(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); + b(slow_path); + } + + + { // Handle inflated monitor. + bind(inflated_load_monitor); + ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); +#ifdef ASSERT + tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated); + stop("Fast Unlock not monitor"); +#endif + + bind(inflated); + +#ifdef ASSERT + Label check_done; + subw(t2_top, t2_top, oopSize); + cmpw(t2_top, in_bytes(JavaThread::lock_stack_base_offset())); + br(Assembler::LT, check_done); + ldr(t3_t, Address(rthread, t2_top)); + cmp(obj, t3_t); + br(Assembler::NE, inflated); + stop("Fast Unlock lock on stack"); + bind(check_done); +#endif + + // mark contains the tagged ObjectMonitor*. + const Register t1_monitor = t1_mark; + const uintptr_t monitor_tag = markWord::monitor_value; + + // Untag the monitor. + sub(t1_monitor, t1_mark, monitor_tag); + + const Register t2_recursions = t2; + Label not_recursive; + + // Check if recursive. + ldr(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset())); + cbz(t2_recursions, not_recursive); + + // Recursive unlock. + sub(t2_recursions, t2_recursions, 1u); + str(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset())); + // Set flag == EQ + cmp(t2_recursions, t2_recursions); + b(unlocked); + + bind(not_recursive); + + Label release; + const Register t2_owner_addr = t2; + + // Compute owner address. + lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset())); + + // Check if the entry lists are empty. + ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset())); + ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset())); + orr(rscratch1, rscratch1, t3_t); + cmp(rscratch1, zr); + br(Assembler::EQ, release); + + // The owner may be anonymous and we removed the last obj entry in + // the lock-stack. This loses the information about the owner. + // Write the thread to the owner field so the runtime knows the owner. + str(rthread, Address(t2_owner_addr)); + b(slow_path); + + bind(release); + // Set owner to null. + // Release to satisfy the JMM + stlr(zr, t2_owner_addr); + } + + bind(unlocked); + decrement(Address(rthread, JavaThread::held_monitor_count_offset())); + +#ifdef ASSERT + // Check that unlocked label is reached with Flags == EQ. + Label flag_correct; + br(Assembler::EQ, flag_correct); + stop("Fast Unlock Flag != EQ"); +#endif + + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with Flags == NE. + br(Assembler::NE, flag_correct); + stop("Fast Unlock Flag != NE"); + bind(flag_correct); +#endif + // C2 uses the value of Flags (NE vs EQ) to determine the continuation. +} + // Search for str1 in str2 and return index or -1 // Clobbers: rscratch1, rscratch2, rflags. May also clobber v0-v1, when icnt1==-1. void C2_MacroAssembler::string_indexof(Register str2, Register str1, diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index dfa7d88cb93..1481f975020 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,9 +36,11 @@ public: // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. - // See full description in macroAssembler_aarch64.cpp. void fast_lock(Register object, Register box, Register tmp, Register tmp2, Register tmp3); void fast_unlock(Register object, Register box, Register tmp, Register tmp2); + // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file. + void fast_lock_lightweight(Register object, Register t1, Register t2, Register t3); + void fast_unlock_lightweight(Register object, Register t1, Register t2, Register t3); void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp index 69a61e281f3..b5625b7fc61 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -701,7 +701,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) } if (LockingMode == LM_LIGHTWEIGHT) { - ldr(tmp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case); b(count); } else if (LockingMode == LM_LEGACY) { @@ -818,22 +817,6 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) if (LockingMode == LM_LIGHTWEIGHT) { Label slow_case; - - // Check for non-symmetric locking. This is allowed by the spec and the interpreter - // must handle it. - Register tmp = rscratch1; - // First check for lock-stack underflow. - ldrw(tmp, Address(rthread, JavaThread::lock_stack_top_offset())); - cmpw(tmp, (unsigned)LockStack::start_offset()); - br(Assembler::LE, slow_case); - // Then check if the top of the lock-stack matches the unlocked object. - subw(tmp, tmp, oopSize); - ldr(tmp, Address(rthread, tmp)); - cmpoop(tmp, obj_reg); - br(Assembler::NE, slow_case); - - ldr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - tbnz(header_reg, exact_log2(markWord::monitor_value), slow_case); lightweight_unlock(obj_reg, header_reg, swap_reg, tmp_reg, slow_case); b(count); bind(slow_case); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index b19587ebe76..e88e7ff1f62 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -23,8 +23,6 @@ * */ -#include - #include "precompiled.hpp" #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" @@ -56,6 +54,7 @@ #include "runtime/jniHandles.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/powerOfTwo.hpp" #ifdef COMPILER1 #include "c1/c1_LIRAssembler.hpp" @@ -67,6 +66,8 @@ #include "opto/output.hpp" #endif +#include + #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ #else @@ -6381,97 +6382,122 @@ void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { } // Implements lightweight-locking. -// Branches to slow upon failure to lock the object, with ZF cleared. -// Falls through upon success with ZF set. // // - obj: the object to be locked -// - hdr: the header, already loaded from obj, will be destroyed -// - t1, t2: temporary registers, will be destroyed -void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register t1, Register t2, Label& slow) { +// - t1, t2, t3: temporary registers, will be destroyed +// - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding). +void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, hdr, t1, t2, rscratch1); + assert_different_registers(obj, t1, t2, t3, rscratch1); - // Check if we would have space on lock-stack for the object. - ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); - cmpw(t1, (unsigned)LockStack::end_offset() - 1); - br(Assembler::GT, slow); + Label push; + const Register top = t1; + const Register mark = t2; + const Register t = t3; - // Load (object->mark() | 1) into hdr - orr(hdr, hdr, markWord::unlocked_value); - // Clear lock-bits, into t2 - eor(t2, hdr, markWord::unlocked_value); - // Try to swing header from unlocked to locked - // Clobbers rscratch1 when UseLSE is false - cmpxchg(/*addr*/ obj, /*expected*/ hdr, /*new*/ t2, Assembler::xword, - /*acquire*/ true, /*release*/ true, /*weak*/ false, t1); + // Preload the markWord. It is important that this is the first + // instruction emitted as it is part of C1's null check semantics. + ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + + // Check if the lock-stack is full. + ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset())); + cmpw(top, (unsigned)LockStack::end_offset()); + br(Assembler::GE, slow); + + // Check for recursion. + subw(t, top, oopSize); + ldr(t, Address(rthread, t)); + cmp(obj, t); + br(Assembler::EQ, push); + + // Check header for monitor (0b10). + tst(mark, markWord::monitor_value); br(Assembler::NE, slow); - // After successful lock, push object on lock-stack - ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); - str(obj, Address(rthread, t1)); - addw(t1, t1, oopSize); - strw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + // Try to lock. Transition lock bits 0b01 => 0b00 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); + orr(mark, mark, markWord::unlocked_value); + eor(t, mark, markWord::unlocked_value); + cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword, + /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg); + br(Assembler::NE, slow); + + bind(push); + // After successful lock, push object on lock-stack. + str(obj, Address(rthread, top)); + addw(top, top, oopSize); + strw(top, Address(rthread, JavaThread::lock_stack_top_offset())); } // Implements lightweight-unlocking. -// Branches to slow upon failure, with ZF cleared. -// Falls through upon success, with ZF set. // // - obj: the object to be unlocked -// - hdr: the (pre-loaded) header of the object -// - t1, t2: temporary registers -void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register t1, Register t2, Label& slow) { +// - t1, t2, t3: temporary registers +// - slow: branched to if unlocking fails, absolute offset may larger than 32KB (imm14 encoding). +void MacroAssembler::lightweight_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, hdr, t1, t2, rscratch1); + // cmpxchg clobbers rscratch1. + assert_different_registers(obj, t1, t2, t3, rscratch1); #ifdef ASSERT { - // The following checks rely on the fact that LockStack is only ever modified by - // its owning thread, even if the lock got inflated concurrently; removal of LockStack - // entries after inflation will happen delayed in that case. - // Check for lock-stack underflow. Label stack_ok; ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); cmpw(t1, (unsigned)LockStack::start_offset()); - br(Assembler::GT, stack_ok); + br(Assembler::GE, stack_ok); STOP("Lock-stack underflow"); bind(stack_ok); } - { - // Check if the top of the lock-stack matches the unlocked object. - Label tos_ok; - subw(t1, t1, oopSize); - ldr(t1, Address(rthread, t1)); - cmpoop(t1, obj); - br(Assembler::EQ, tos_ok); - STOP("Top of lock-stack does not match the unlocked object"); - bind(tos_ok); - } - { - // Check that hdr is fast-locked. - Label hdr_ok; - tst(hdr, markWord::lock_mask_in_place); - br(Assembler::EQ, hdr_ok); - STOP("Header is not fast-locked"); - bind(hdr_ok); - } #endif - // Load the new header (unlocked) into t1 - orr(t1, hdr, markWord::unlocked_value); + Label unlocked, push_and_slow; + const Register top = t1; + const Register mark = t2; + const Register t = t3; - // Try to swing header from locked to unlocked - // Clobbers rscratch1 when UseLSE is false - cmpxchg(obj, hdr, t1, Assembler::xword, - /*acquire*/ true, /*release*/ true, /*weak*/ false, t2); + // Check if obj is top of lock-stack. + ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset())); + subw(top, top, oopSize); + ldr(t, Address(rthread, top)); + cmp(obj, t); br(Assembler::NE, slow); - // After successful unlock, pop object from lock-stack - ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); - subw(t1, t1, oopSize); + // Pop lock-stack. + DEBUG_ONLY(str(zr, Address(rthread, top));) + strw(top, Address(rthread, JavaThread::lock_stack_top_offset())); + + // Check if recursive. + subw(t, top, oopSize); + ldr(t, Address(rthread, t)); + cmp(obj, t); + br(Assembler::EQ, unlocked); + + // Not recursive. Check header for monitor (0b10). + ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + tbnz(mark, log2i_exact(markWord::monitor_value), push_and_slow); + #ifdef ASSERT - str(zr, Address(rthread, t1)); + // Check header not unlocked (0b01). + Label not_unlocked; + tbz(mark, log2i_exact(markWord::unlocked_value), not_unlocked); + stop("lightweight_unlock already unlocked"); + bind(not_unlocked); #endif - strw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + + // Try to unlock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); + orr(t, mark, markWord::unlocked_value); + cmpxchg(obj, mark, t, Assembler::xword, + /*acquire*/ false, /*release*/ true, /*weak*/ false, noreg); + br(Assembler::EQ, unlocked); + + bind(push_and_slow); + // Restore lock-stack and handle the unlock in runtime. + DEBUG_ONLY(str(obj, Address(rthread, top));) + addw(top, top, oopSize); + strw(top, Address(rthread, JavaThread::lock_stack_top_offset())); + b(slow); + + bind(unlocked); } diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 990e725d099..dad7ec4d497 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1602,8 +1602,8 @@ public: // Code for java.lang.Thread::onSpinWait() intrinsic. void spin_wait(); - void lightweight_lock(Register obj, Register hdr, Register t1, Register t2, Label& slow); - void lightweight_unlock(Register obj, Register hdr, Register t1, Register t2, Label& slow); + void lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow); + void lightweight_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow); private: // Check the current thread doesn't need a cross modify fence. diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 216c1ff3509..97a10afde7a 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -1796,7 +1796,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ br(Assembler::NE, slow_path_lock); } else { assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - __ ldr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); __ lightweight_lock(obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock); } __ bind(count); @@ -1939,8 +1938,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ decrement(Address(rthread, JavaThread::held_monitor_count_offset())); } else { assert(LockingMode == LM_LIGHTWEIGHT, ""); - __ ldr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ tbnz(old_hdr, exact_log2(markWord::monitor_value), slow_path_unlock); __ lightweight_unlock(obj_reg, old_hdr, swap_reg, lock_tmp, slow_path_unlock); __ decrement(Address(rthread, JavaThread::held_monitor_count_offset())); } diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index 0a85d339a55..6883dc0d93e 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -169,6 +169,7 @@ enum Ampere_CPU_Model { // Aarch64 supports fast class initialization checks static bool supports_fast_class_init_checks() { return true; } constexpr static bool supports_stack_watermark_barrier() { return true; } + constexpr static bool supports_recursive_lightweight_locking() { return true; } static void get_compatible_board(char *buf, int buflen);