8319900: Recursive lightweight locking: riscv64 implementation

Co-authored-by: Axel Boldt-Christmas <aboldtch@openjdk.org>
Reviewed-by: fyang
This commit is contained in:
Gui Cao 2024-03-05 02:04:21 +00:00 committed by Fei Yang
parent 045eea1130
commit e1b661f8c1
9 changed files with 493 additions and 196 deletions

View File

@ -69,13 +69,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
bnez(temp, slow_case, true /* is_far */); bnez(temp, slow_case, true /* is_far */);
} }
// Load object header
ld(hdr, Address(obj, hdr_offset));
if (LockingMode == LM_LIGHTWEIGHT) { if (LockingMode == LM_LIGHTWEIGHT) {
lightweight_lock(obj, hdr, temp, t1, slow_case); lightweight_lock(obj, hdr, temp, t1, slow_case);
} else if (LockingMode == LM_LEGACY) { } else if (LockingMode == LM_LEGACY) {
Label done; Label done;
// Load object header
ld(hdr, Address(obj, hdr_offset));
// and mark it as unlocked // and mark it as unlocked
ori(hdr, hdr, markWord::unlocked_value); ori(hdr, hdr, markWord::unlocked_value);
// save unlocked object header into the displaced header location on the stack // save unlocked object header into the displaced header location on the stack
@ -134,9 +133,6 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
verify_oop(obj); verify_oop(obj);
if (LockingMode == LM_LIGHTWEIGHT) { if (LockingMode == LM_LIGHTWEIGHT) {
ld(hdr, Address(obj, oopDesc::mark_offset_in_bytes()));
test_bit(temp, hdr, exact_log2(markWord::monitor_value));
bnez(temp, slow_case, /* is_far */ true);
lightweight_unlock(obj, hdr, temp, t1, slow_case); lightweight_unlock(obj, hdr, temp, t1, slow_case);
} else if (LockingMode == LM_LEGACY) { } else if (LockingMode == LM_LEGACY) {
// test if object header is pointing to the displaced header, and if so, restore // test if object header is pointing to the displaced header, and if so, restore

View File

@ -32,6 +32,7 @@
#include "opto/output.hpp" #include "opto/output.hpp"
#include "opto/subnode.hpp" #include "opto/subnode.hpp"
#include "runtime/stubRoutines.hpp" #include "runtime/stubRoutines.hpp"
#include "utilities/globalDefinitions.hpp"
#ifdef PRODUCT #ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */ #define BLOCK_COMMENT(str) /* nothing */
@ -51,30 +52,35 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg,
Register box = boxReg; Register box = boxReg;
Register disp_hdr = tmp1Reg; Register disp_hdr = tmp1Reg;
Register tmp = tmp2Reg; Register tmp = tmp2Reg;
Label cont;
Label object_has_monitor; Label object_has_monitor;
Label count, no_count; // Finish fast lock successfully. MUST branch to with flag == 0
Label locked;
// Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0
Label slow_path;
assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
assert_different_registers(oop, box, tmp, disp_hdr, flag, tmp3Reg, t0); assert_different_registers(oop, box, tmp, disp_hdr, flag, tmp3Reg, t0);
mv(flag, 1);
// Load markWord from object into displaced_header. // Load markWord from object into displaced_header.
ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
if (DiagnoseSyncOnValueBasedClasses != 0) { if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(flag, oop); load_klass(tmp, oop);
lwu(flag, Address(flag, Klass::access_flags_offset())); lwu(tmp, Address(tmp, Klass::access_flags_offset()));
test_bit(flag, flag, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); test_bit(tmp, tmp, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
bnez(flag, cont, true /* is_far */); bnez(tmp, slow_path);
} }
// Check for existing monitor // Check for existing monitor
test_bit(t0, disp_hdr, exact_log2(markWord::monitor_value)); test_bit(tmp, disp_hdr, exact_log2(markWord::monitor_value));
bnez(t0, object_has_monitor); bnez(tmp, object_has_monitor);
if (LockingMode == LM_MONITOR) { if (LockingMode == LM_MONITOR) {
mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path j(slow_path);
j(cont); } else {
} else if (LockingMode == LM_LEGACY) { assert(LockingMode == LM_LEGACY, "must be");
// Set tmp to be (markWord of object | UNLOCK_VALUE). // Set tmp to be (markWord of object | UNLOCK_VALUE).
ori(tmp, disp_hdr, markWord::unlocked_value); ori(tmp, disp_hdr, markWord::unlocked_value);
@ -84,39 +90,27 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg,
// Compare object markWord with an unlocked value (tmp) and if // Compare object markWord with an unlocked value (tmp) and if
// equal exchange the stack address of our box with object markWord. // equal exchange the stack address of our box with object markWord.
// On failure disp_hdr contains the possibly locked markWord. // On failure disp_hdr contains the possibly locked markWord.
cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64,
Assembler::rl, /*result*/disp_hdr); Assembler::aq, Assembler::rl, /*result*/disp_hdr);
mv(flag, zr); beq(disp_hdr, tmp, locked);
beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// If the compare-and-exchange succeeded, then we found an unlocked // If the compare-and-exchange succeeded, then we found an unlocked
// object, will have now locked it will continue at label cont // object, will have now locked it will continue at label locked
// We did not see an unlocked object so try the fast recursive case. // We did not see an unlocked object so try the fast recursive case.
// Check if the owner is self by comparing the value in the // Check if the owner is self by comparing the value in the
// markWord of object (disp_hdr) with the stack pointer. // markWord of object (disp_hdr) with the stack pointer.
sub(disp_hdr, disp_hdr, sp); sub(disp_hdr, disp_hdr, sp);
mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
// If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto label locked,
// hence we can store 0 as the displaced header in the box, which indicates that it is a // hence we can store 0 as the displaced header in the box, which indicates that it is a
// recursive lock. // recursive lock.
andr(tmp/*==0?*/, disp_hdr, tmp); andr(tmp/*==0?*/, disp_hdr, tmp);
sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
mv(flag, tmp); // we can use the value of tmp as the result here beqz(tmp, locked);
j(cont); j(slow_path);
} else {
assert(LockingMode == LM_LIGHTWEIGHT, "");
Label slow;
lightweight_lock(oop, disp_hdr, tmp, tmp3Reg, slow);
// Indicate success on completion.
mv(flag, zr);
j(count);
bind(slow);
mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
j(no_count);
} }
// Handle existing monitor. // Handle existing monitor.
@ -126,35 +120,42 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg,
// //
// Try to CAS m->owner from null to current thread. // Try to CAS m->owner from null to current thread.
add(tmp, disp_hdr, (in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value)); add(tmp, disp_hdr, (in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value));
cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64,
Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) Assembler::aq, Assembler::rl, /*result*/tmp3Reg); // cas succeeds if tmp3Reg == zr(expected)
if (LockingMode != LM_LIGHTWEIGHT) {
// Store a non-null value into the box to avoid looking like a re-entrant // Store a non-null value into the box to avoid looking like a re-entrant
// lock. The fast-path monitor unlock code checks for // lock. The fast-path monitor unlock code checks for
// markWord::monitor_value so use markWord::unused_mark which has the // markWord::monitor_value so use markWord::unused_mark which has the
// relevant bit set, and also matches ObjectSynchronizer::slow_enter. // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
mv(tmp, (address)markWord::unused_mark().value()); mv(tmp, (address)markWord::unused_mark().value());
sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
}
beqz(flag, cont); // CAS success means locking succeeded beqz(tmp3Reg, locked); // CAS success means locking succeeded
bne(flag, xthread, cont); // Check for recursive locking bne(tmp3Reg, xthread, slow_path); // Check for recursive locking
// Recursive lock case // Recursive lock case
increment(Address(disp_hdr, in_bytes(ObjectMonitor::recursions_offset()) - markWord::monitor_value), 1, tmp2Reg, tmp3Reg);
bind(locked);
mv(flag, zr); mv(flag, zr);
increment(Address(disp_hdr, in_bytes(ObjectMonitor::recursions_offset()) - markWord::monitor_value), 1, t0, tmp); increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2Reg, tmp3Reg);
bind(cont); #ifdef ASSERT
// zero flag indicates success // Check that locked label is reached with flag == 0.
// non-zero flag indicates failure Label flag_correct;
bnez(flag, no_count); beqz(flag, flag_correct);
stop("Fast Lock Flag != 0");
#endif
bind(count); bind(slow_path);
increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp); #ifdef ASSERT
// Check that slow_path label is reached with flag != 0.
bind(no_count); bnez(flag, flag_correct);
stop("Fast Lock Flag == 0");
bind(flag_correct);
#endif
// C2 uses the value of flag (0 vs !0) to determine the continuation.
} }
void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
@ -165,19 +166,23 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
Register box = boxReg; Register box = boxReg;
Register disp_hdr = tmp1Reg; Register disp_hdr = tmp1Reg;
Register tmp = tmp2Reg; Register tmp = tmp2Reg;
Label cont;
Label object_has_monitor; Label object_has_monitor;
Label count, no_count; // Finish fast lock successfully. MUST branch to with flag == 0
Label unlocked;
// Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0
Label slow_path;
assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
assert_different_registers(oop, box, tmp, disp_hdr, flag, t0); assert_different_registers(oop, box, tmp, disp_hdr, flag, t0);
mv(flag, 1);
if (LockingMode == LM_LEGACY) { if (LockingMode == LM_LEGACY) {
// Find the lock address and load the displaced header from the stack. // Find the lock address and load the displaced header from the stack.
ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
// If the displaced header is 0, we have a recursive unlock. // If the displaced header is 0, we have a recursive unlock.
mv(flag, disp_hdr); beqz(disp_hdr, unlocked);
beqz(disp_hdr, cont);
} }
// Handle existing monitor. // Handle existing monitor.
@ -186,28 +191,17 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
bnez(t0, object_has_monitor); bnez(t0, object_has_monitor);
if (LockingMode == LM_MONITOR) { if (LockingMode == LM_MONITOR) {
mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path j(slow_path);
j(cont); } else {
} else if (LockingMode == LM_LEGACY) { assert(LockingMode == LM_LEGACY, "must be");
// Check if it is still a light weight lock, this is true if we // Check if it is still a light weight lock, this is true if we
// see the stack address of the basicLock in the markWord of the // see the stack address of the basicLock in the markWord of the
// object. // object.
cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64,
Assembler::rl, /*result*/tmp); Assembler::relaxed, Assembler::rl, /*result*/tmp);
xorr(flag, box, tmp); // box == tmp if cas succeeds beq(box, tmp, unlocked); // box == tmp if cas succeeds
j(cont); j(slow_path);
} else {
assert(LockingMode == LM_LIGHTWEIGHT, "");
Label slow;
lightweight_unlock(oop, tmp, box, disp_hdr, slow);
// Indicate success on completion.
mv(flag, zr);
j(count);
bind(slow);
mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
j(no_count);
} }
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
@ -217,17 +211,6 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
STATIC_ASSERT(markWord::monitor_value <= INT_MAX); STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
add(tmp, tmp, -(int)markWord::monitor_value); // monitor add(tmp, tmp, -(int)markWord::monitor_value); // monitor
if (LockingMode == LM_LIGHTWEIGHT) {
// If the owner is anonymous, we need to fix it -- in an outline stub.
Register tmp2 = disp_hdr;
ld(tmp2, Address(tmp, ObjectMonitor::owner_offset()));
test_bit(t0, tmp2, exact_log2(ObjectMonitor::ANONYMOUS_OWNER));
C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2);
Compile::current()->output()->add_stub(stub);
bnez(t0, stub->entry(), /* is_far */ true);
bind(stub->continuation());
}
ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset()));
Label notRecursive; Label notRecursive;
@ -236,28 +219,304 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
// Recursive lock // Recursive lock
addi(disp_hdr, disp_hdr, -1); addi(disp_hdr, disp_hdr, -1);
sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset()));
mv(flag, zr); j(unlocked);
j(cont);
bind(notRecursive); bind(notRecursive);
ld(flag, Address(tmp, ObjectMonitor::EntryList_offset())); ld(t0, Address(tmp, ObjectMonitor::EntryList_offset()));
ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset()));
orr(flag, flag, disp_hdr); // Will be 0 if both are 0. orr(t0, t0, disp_hdr); // Will be 0 if both are 0.
bnez(flag, cont); bnez(t0, slow_path);
// need a release store here // need a release store here
la(tmp, Address(tmp, ObjectMonitor::owner_offset())); la(tmp, Address(tmp, ObjectMonitor::owner_offset()));
membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
sd(zr, Address(tmp)); // set unowned sd(zr, Address(tmp)); // set unowned
bind(cont); bind(unlocked);
// zero flag indicates success mv(flag, zr);
// non-zero flag indicates failure decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp1Reg, tmp2Reg);
bnez(flag, no_count);
bind(count); #ifdef ASSERT
decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp); // Check that unlocked label is reached with flag == 0.
Label flag_correct;
beqz(flag, flag_correct);
stop("Fast Lock Flag != 0");
#endif
bind(no_count); bind(slow_path);
#ifdef ASSERT
// Check that slow_path label is reached with flag != 0.
bnez(flag, flag_correct);
stop("Fast Lock Flag == 0");
bind(flag_correct);
#endif
// C2 uses the value of flag (0 vs !0) to determine the continuation.
}
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Register tmp2, Register tmp3) {
// Flag register, zero for success; non-zero for failure.
Register flag = t1;
assert(LockingMode == LM_LIGHTWEIGHT, "must be");
assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0);
mv(flag, 1);
// Handle inflated monitor.
Label inflated;
// Finish fast lock successfully. MUST branch to with flag == 0
Label locked;
// Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0
Label slow_path;
if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(tmp1, obj);
lwu(tmp1, Address(tmp1, Klass::access_flags_offset()));
test_bit(tmp1, tmp1, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
bnez(tmp1, slow_path);
}
const Register tmp1_mark = tmp1;
{ // Lightweight locking
// Push lock to the lock stack and finish successfully. MUST branch to with flag == 0
Label push;
const Register tmp2_top = tmp2;
const Register tmp3_t = tmp3;
// Check if lock-stack is full.
lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset()));
mv(tmp3_t, (unsigned)LockStack::end_offset());
bge(tmp2_top, tmp3_t, slow_path);
// Check if recursive.
add(tmp3_t, xthread, tmp2_top);
ld(tmp3_t, Address(tmp3_t, -oopSize));
beq(obj, tmp3_t, push);
// Relaxed normal load to check for monitor. Optimization for monitor case.
ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value));
bnez(tmp3_t, inflated);
// Not inflated
assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la");
// Try to lock. Transition lock-bits 0b01 => 0b00
ori(tmp1_mark, tmp1_mark, markWord::unlocked_value);
xori(tmp3_t, tmp1_mark, markWord::unlocked_value);
cmpxchg(/*addr*/ obj, /*expected*/ tmp1_mark, /*new*/ tmp3_t, Assembler::int64,
/*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_t);
bne(tmp1_mark, tmp3_t, slow_path);
bind(push);
// After successful lock, push object on lock-stack.
add(tmp3_t, xthread, tmp2_top);
sd(obj, Address(tmp3_t));
addw(tmp2_top, tmp2_top, oopSize);
sw(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset()));
j(locked);
}
{ // Handle inflated monitor.
bind(inflated);
// mark contains the tagged ObjectMonitor*.
const Register tmp1_tagged_monitor = tmp1_mark;
const uintptr_t monitor_tag = markWord::monitor_value;
const Register tmp2_owner_addr = tmp2;
const Register tmp3_owner = tmp3;
// Compute owner address.
la(tmp2_owner_addr, Address(tmp1_tagged_monitor, (in_bytes(ObjectMonitor::owner_offset()) - monitor_tag)));
// CAS owner (null => current thread).
cmpxchg(/*addr*/ tmp2_owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64,
/*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_owner);
beqz(tmp3_owner, locked);
// Check if recursive.
bne(tmp3_owner, xthread, slow_path);
// Recursive.
increment(Address(tmp1_tagged_monitor, in_bytes(ObjectMonitor::recursions_offset()) - monitor_tag), 1, tmp2, tmp3);
}
bind(locked);
mv(flag, zr);
increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2, tmp3);
#ifdef ASSERT
// Check that locked label is reached with flag == 0.
Label flag_correct;
beqz(flag, flag_correct);
stop("Fast Lock Flag != 0");
#endif
bind(slow_path);
#ifdef ASSERT
// Check that slow_path label is reached with flag != 0.
bnez(flag, flag_correct);
stop("Fast Lock Flag == 0");
bind(flag_correct);
#endif
// C2 uses the value of flag (0 vs !0) to determine the continuation.
}
void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Register tmp2,
Register tmp3) {
// Flag register, zero for success; non-zero for failure.
Register flag = t1;
assert(LockingMode == LM_LIGHTWEIGHT, "must be");
assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0);
mv(flag, 1);
// Handle inflated monitor.
Label inflated, inflated_load_monitor;
// Finish fast unlock successfully. unlocked MUST branch to with flag == 0
Label unlocked;
// Finish fast unlock unsuccessfully. MUST branch to with flag != 0
Label slow_path;
const Register tmp1_mark = tmp1;
const Register tmp2_top = tmp2;
const Register tmp3_t = tmp3;
{ // Lightweight unlock
// Check if obj is top of lock-stack.
lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset()));
subw(tmp2_top, tmp2_top, oopSize);
add(tmp3_t, xthread, tmp2_top);
ld(tmp3_t, Address(tmp3_t));
// Top of lock stack was not obj. Must be monitor.
bne(obj, tmp3_t, inflated_load_monitor);
// Pop lock-stack.
DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);)
DEBUG_ONLY(sd(zr, Address(tmp3_t));)
sw(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset()));
// Check if recursive.
add(tmp3_t, xthread, tmp2_top);
ld(tmp3_t, Address(tmp3_t, -oopSize));
beq(obj, tmp3_t, unlocked);
// Not recursive.
// Load Mark.
ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
// Check header for monitor (0b10).
test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value));
bnez(tmp3_t, inflated);
// Try to unlock. Transition lock bits 0b00 => 0b01
assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
ori(tmp3_t, tmp1_mark, markWord::unlocked_value);
cmpxchg(/*addr*/ obj, /*expected*/ tmp1_mark, /*new*/ tmp3_t, Assembler::int64,
/*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ tmp3_t);
beq(tmp1_mark, tmp3_t, unlocked);
// Compare and exchange failed.
// Restore lock-stack and handle the unlock in runtime.
DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);)
DEBUG_ONLY(sd(obj, Address(tmp3_t));)
addw(tmp2_top, tmp2_top, oopSize);
sd(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset()));
j(slow_path);
}
{ // Handle inflated monitor.
bind(inflated_load_monitor);
ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
#ifdef ASSERT
test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value));
bnez(tmp3_t, inflated);
stop("Fast Unlock not monitor");
#endif
bind(inflated);
#ifdef ASSERT
Label check_done;
subw(tmp2_top, tmp2_top, oopSize);
mv(tmp3_t, in_bytes(JavaThread::lock_stack_base_offset()));
blt(tmp2_top, tmp3_t, check_done);
add(tmp3_t, xthread, tmp2_top);
ld(tmp3_t, Address(tmp3_t));
bne(obj, tmp3_t, inflated);
stop("Fast Unlock lock on stack");
bind(check_done);
#endif
// mark contains the tagged ObjectMonitor*.
const Register tmp1_monitor = tmp1_mark;
const uintptr_t monitor_tag = markWord::monitor_value;
// Untag the monitor.
sub(tmp1_monitor, tmp1_mark, monitor_tag);
const Register tmp2_recursions = tmp2;
Label not_recursive;
// Check if recursive.
ld(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset()));
beqz(tmp2_recursions, not_recursive);
// Recursive unlock.
addi(tmp2_recursions, tmp2_recursions, -1);
sd(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset()));
j(unlocked);
bind(not_recursive);
Label release;
const Register tmp2_owner_addr = tmp2;
// Compute owner address.
la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset()));
// Check if the entry lists are empty.
ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset()));
ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset()));
orr(t0, t0, tmp3_t);
beqz(t0, release);
// The owner may be anonymous and we removed the last obj entry in
// the lock-stack. This loses the information about the owner.
// Write the thread to the owner field so the runtime knows the owner.
sd(xthread, Address(tmp2_owner_addr));
j(slow_path);
bind(release);
// Set owner to null.
membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
sd(zr, Address(tmp2_owner_addr));
}
bind(unlocked);
mv(flag, zr);
decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2, tmp3);
#ifdef ASSERT
// Check that unlocked label is reached with flag == 0.
Label flag_correct;
beqz(flag, flag_correct);
stop("Fast Lock Flag != 0");
#endif
bind(slow_path);
#ifdef ASSERT
// Check that slow_path label is reached with flag != 0.
bnez(flag, flag_correct);
stop("Fast Lock Flag == 0");
bind(flag_correct);
#endif
// C2 uses the value of flag (0 vs !0) to determine the continuation.
} }
// short string // short string

View File

@ -44,9 +44,11 @@
public: public:
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
// See full description in macroAssembler_riscv.cpp.
void fast_lock(Register object, Register box, Register tmp1, Register tmp2, Register tmp3); void fast_lock(Register object, Register box, Register tmp1, Register tmp2, Register tmp3);
void fast_unlock(Register object, Register box, Register tmp1, Register tmp2); void fast_unlock(Register object, Register box, Register tmp1, Register tmp2);
// Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file.
void fast_lock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3);
void fast_unlock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3);
void string_compare(Register str1, Register str2, void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result, Register cnt1, Register cnt2, Register result,

View File

@ -763,7 +763,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
} }
if (LockingMode == LM_LIGHTWEIGHT) { if (LockingMode == LM_LIGHTWEIGHT) {
ld(tmp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case); lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case);
j(count); j(count);
} else if (LockingMode == LM_LEGACY) { } else if (LockingMode == LM_LEGACY) {
@ -860,24 +859,6 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
if (LockingMode == LM_LIGHTWEIGHT) { if (LockingMode == LM_LIGHTWEIGHT) {
Label slow_case; Label slow_case;
// Check for non-symmetric locking. This is allowed by the spec and the interpreter
// must handle it.
Register tmp1 = t0;
Register tmp2 = header_reg;
// First check for lock-stack underflow.
lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
mv(tmp2, (unsigned)LockStack::start_offset());
ble(tmp1, tmp2, slow_case);
// Then check if the top of the lock-stack matches the unlocked object.
subw(tmp1, tmp1, oopSize);
add(tmp1, xthread, tmp1);
ld(tmp1, Address(tmp1, 0));
bne(tmp1, obj_reg, slow_case);
ld(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
test_bit(t0, header_reg, exact_log2(markWord::monitor_value));
bnez(t0, slow_case);
lightweight_unlock(obj_reg, header_reg, swap_reg, tmp_reg, slow_case); lightweight_unlock(obj_reg, header_reg, swap_reg, tmp_reg, slow_case);
j(count); j(count);

View File

@ -49,6 +49,7 @@
#include "runtime/jniHandles.inline.hpp" #include "runtime/jniHandles.inline.hpp"
#include "runtime/sharedRuntime.hpp" #include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp" #include "runtime/stubRoutines.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/powerOfTwo.hpp" #include "utilities/powerOfTwo.hpp"
#ifdef COMPILER2 #ifdef COMPILER2
#include "opto/compile.hpp" #include "opto/compile.hpp"
@ -5042,98 +5043,124 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) {
} }
// Implements lightweight-locking. // Implements lightweight-locking.
// Branches to slow upon failure to lock the object.
// Falls through upon success.
// //
// - obj: the object to be locked // - obj: the object to be locked
// - hdr: the header, already loaded from obj, will be destroyed // - tmp1, tmp2, tmp3: temporary registers, will be destroyed
// - tmp1, tmp2: temporary registers, will be destroyed // - slow: branched to if locking fails
void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
assert_different_registers(obj, hdr, tmp1, tmp2, t0); assert_different_registers(obj, tmp1, tmp2, tmp3, t0);
// Check if we would have space on lock-stack for the object. Label push;
lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); const Register top = tmp1;
mv(tmp2, (unsigned)LockStack::end_offset()); const Register mark = tmp2;
bge(tmp1, tmp2, slow, /* is_far */ true); const Register t = tmp3;
// Load (object->mark() | 1) into hdr // Preload the markWord. It is important that this is the first
ori(hdr, hdr, markWord::unlocked_value); // instruction emitted as it is part of C1's null check semantics.
// Clear lock-bits, into tmp2 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
xori(tmp2, hdr, markWord::unlocked_value);
// Try to swing header from unlocked to locked // Check if the lock-stack is full.
Label success; lwu(top, Address(xthread, JavaThread::lock_stack_top_offset()));
cmpxchgptr(hdr, tmp2, obj, tmp1, success, &slow); mv(t, (unsigned)LockStack::end_offset());
bind(success); bge(top, t, slow, /* is_far */ true);
// After successful lock, push object on lock-stack // Check for recursion.
lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); add(t, xthread, top);
add(tmp2, xthread, tmp1); ld(t, Address(t, -oopSize));
sd(obj, Address(tmp2, 0)); beq(obj, t, push);
addw(tmp1, tmp1, oopSize);
sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); // Check header for monitor (0b10).
test_bit(t, mark, exact_log2(markWord::monitor_value));
bnez(t, slow, /* is_far */ true);
// Try to lock. Transition lock-bits 0b01 => 0b00
assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la");
ori(mark, mark, markWord::unlocked_value);
xori(t, mark, markWord::unlocked_value);
cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64,
/*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t);
bne(mark, t, slow, /* is_far */ true);
bind(push);
// After successful lock, push object on lock-stack.
add(t, xthread, top);
sd(obj, Address(t));
addw(top, top, oopSize);
sw(top, Address(xthread, JavaThread::lock_stack_top_offset()));
} }
// Implements ligthweight-unlocking. // Implements ligthweight-unlocking.
// Branches to slow upon failure.
// Falls through upon success.
// //
// - obj: the object to be unlocked // - obj: the object to be unlocked
// - hdr: the (pre-loaded) header of the object // - tmp1, tmp2, tmp3: temporary registers
// - tmp1, tmp2: temporary registers // - slow: branched to if unlocking fails
void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
assert_different_registers(obj, hdr, tmp1, tmp2, t0); assert_different_registers(obj, tmp1, tmp2, tmp3, t0);
#ifdef ASSERT #ifdef ASSERT
{ {
// The following checks rely on the fact that LockStack is only ever modified by
// its owning thread, even if the lock got inflated concurrently; removal of LockStack
// entries after inflation will happen delayed in that case.
// Check for lock-stack underflow. // Check for lock-stack underflow.
Label stack_ok; Label stack_ok;
lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
mv(tmp2, (unsigned)LockStack::start_offset()); mv(tmp2, (unsigned)LockStack::start_offset());
bgt(tmp1, tmp2, stack_ok); bge(tmp1, tmp2, stack_ok);
STOP("Lock-stack underflow"); STOP("Lock-stack underflow");
bind(stack_ok); bind(stack_ok);
} }
{
// Check if the top of the lock-stack matches the unlocked object.
Label tos_ok;
subw(tmp1, tmp1, oopSize);
add(tmp1, xthread, tmp1);
ld(tmp1, Address(tmp1, 0));
beq(tmp1, obj, tos_ok);
STOP("Top of lock-stack does not match the unlocked object");
bind(tos_ok);
}
{
// Check that hdr is fast-locked.
Label hdr_ok;
andi(tmp1, hdr, markWord::lock_mask_in_place);
beqz(tmp1, hdr_ok);
STOP("Header is not fast-locked");
bind(hdr_ok);
}
#endif #endif
// Load the new header (unlocked) into tmp1 Label unlocked, push_and_slow;
ori(tmp1, hdr, markWord::unlocked_value); const Register top = tmp1;
const Register mark = tmp2;
const Register t = tmp3;
// Try to swing header from locked to unlocked // Check if obj is top of lock-stack.
Label success; lwu(top, Address(xthread, JavaThread::lock_stack_top_offset()));
cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow); subw(top, top, oopSize);
bind(success); add(t, xthread, top);
ld(t, Address(t));
bne(obj, t, slow, /* is_far */ true);
// Pop lock-stack.
DEBUG_ONLY(add(t, xthread, top);)
DEBUG_ONLY(sd(zr, Address(t));)
sw(top, Address(xthread, JavaThread::lock_stack_top_offset()));
// Check if recursive.
add(t, xthread, top);
ld(t, Address(t, -oopSize));
beq(obj, t, unlocked);
// Not recursive. Check header for monitor (0b10).
ld(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
test_bit(t, mark, exact_log2(markWord::monitor_value));
bnez(t, push_and_slow);
// After successful unlock, pop object from lock-stack
lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
subw(tmp1, tmp1, oopSize);
#ifdef ASSERT #ifdef ASSERT
add(tmp2, xthread, tmp1); // Check header not unlocked (0b01).
sd(zr, Address(tmp2, 0)); Label not_unlocked;
test_bit(t, mark, exact_log2(markWord::unlocked_value));
beqz(t, not_unlocked);
stop("lightweight_unlock already unlocked");
bind(not_unlocked);
#endif #endif
sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
// Try to unlock. Transition lock bits 0b00 => 0b01
assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
ori(t, mark, markWord::unlocked_value);
cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64,
/*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t);
beq(mark, t, unlocked);
bind(push_and_slow);
// Restore lock-stack and handle the unlock in runtime.
DEBUG_ONLY(add(t, xthread, top);)
DEBUG_ONLY(sd(obj, Address(t));)
addw(top, top, oopSize);
sw(top, Address(xthread, JavaThread::lock_stack_top_offset()));
j(slow);
bind(unlocked);
} }

View File

@ -1519,8 +1519,8 @@ private:
void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release);
public: public:
void lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); void lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
void lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
}; };
#ifdef ASSERT #ifdef ASSERT

View File

@ -10469,10 +10469,11 @@ instruct tlsLoadP(javaThread_RegP dst)
// using t1 as the 'flag' register to bridge the BoolNode producers and consumers // using t1 as the 'flag' register to bridge the BoolNode producers and consumers
instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
%{ %{
predicate(LockingMode != LM_LIGHTWEIGHT);
match(Set cr (FastLock object box)); match(Set cr (FastLock object box));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); ins_cost(10 * DEFAULT_COST);
format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2,$tmp3, #@cmpFastLock" %} format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2,$tmp3, #@cmpFastLock" %}
ins_encode %{ ins_encode %{
@ -10485,10 +10486,11 @@ instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iReg
// using t1 as the 'flag' register to bridge the BoolNode producers and consumers // using t1 as the 'flag' register to bridge the BoolNode producers and consumers
instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
%{ %{
predicate(LockingMode != LM_LIGHTWEIGHT);
match(Set cr (FastUnlock object box)); match(Set cr (FastUnlock object box));
effect(TEMP tmp1, TEMP tmp2); effect(TEMP tmp1, TEMP tmp2);
ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); ins_cost(10 * DEFAULT_COST);
format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %} format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %}
ins_encode %{ ins_encode %{
@ -10498,6 +10500,38 @@ instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iR
ins_pipe(pipe_serial); ins_pipe(pipe_serial);
%} %}
instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2)
%{
predicate(LockingMode == LM_LIGHTWEIGHT);
match(Set cr (FastLock object box));
effect(TEMP tmp1, TEMP tmp2, USE_KILL box);
ins_cost(10 * DEFAULT_COST);
format %{ "fastlock $object,$box\t! kills $box,$tmp1,$tmp2 #@cmpFastLockLightweight" %}
ins_encode %{
__ fast_lock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register);
%}
ins_pipe(pipe_serial);
%}
instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2)
%{
predicate(LockingMode == LM_LIGHTWEIGHT);
match(Set cr (FastUnlock object box));
effect(TEMP tmp1, TEMP tmp2, USE_KILL box);
ins_cost(10 * DEFAULT_COST);
format %{ "fastunlock $object,$box\t! kills $box,$tmp1,$tmp2, #@cmpFastUnlockLightweight" %}
ins_encode %{
__ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register);
%}
ins_pipe(pipe_serial);
%}
// Tail Call; Jump from runtime stub to Java code. // Tail Call; Jump from runtime stub to Java code.
// Also known as an 'interprocedural jump'. // Also known as an 'interprocedural jump'.
// Target of jump will eventually return to caller. // Target of jump will eventually return to caller.

View File

@ -1679,8 +1679,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ sd(swap_reg, Address(lock_reg, mark_word_offset)); __ sd(swap_reg, Address(lock_reg, mark_word_offset));
__ bnez(swap_reg, slow_path_lock); __ bnez(swap_reg, slow_path_lock);
} else { } else {
assert(LockingMode == LM_LIGHTWEIGHT, ""); assert(LockingMode == LM_LIGHTWEIGHT, "must be");
__ ld(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
__ lightweight_lock(obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock); __ lightweight_lock(obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock);
} }
@ -1806,9 +1805,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ decrement(Address(xthread, JavaThread::held_monitor_count_offset())); __ decrement(Address(xthread, JavaThread::held_monitor_count_offset()));
} else { } else {
assert(LockingMode == LM_LIGHTWEIGHT, ""); assert(LockingMode == LM_LIGHTWEIGHT, "");
__ ld(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
__ test_bit(t0, old_hdr, exact_log2(markWord::monitor_value));
__ bnez(t0, slow_path_unlock);
__ lightweight_unlock(obj_reg, old_hdr, swap_reg, lock_tmp, slow_path_unlock); __ lightweight_unlock(obj_reg, old_hdr, swap_reg, lock_tmp, slow_path_unlock);
__ decrement(Address(xthread, JavaThread::held_monitor_count_offset())); __ decrement(Address(xthread, JavaThread::held_monitor_count_offset()));
} }

View File

@ -217,6 +217,8 @@ class VM_Version : public Abstract_VM_Version {
constexpr static bool supports_stack_watermark_barrier() { return true; } constexpr static bool supports_stack_watermark_barrier() { return true; }
constexpr static bool supports_recursive_lightweight_locking() { return true; }
static bool supports_on_spin_wait() { return UseZihintpause; } static bool supports_on_spin_wait() { return UseZihintpause; }
// RISCV64 supports fast class initialization checks // RISCV64 supports fast class initialization checks