8315856: RISC-V: Use Zacas extension for cmpxchg

Reviewed-by: rehn, fyang
This commit is contained in:
Ludovic Henry 2023-12-19 14:15:24 +00:00
parent 3bc5679cab
commit 6313223bcd
5 changed files with 184 additions and 28 deletions

View File

@ -758,6 +758,8 @@ enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};
INSN(amomax_d , 0b0101111, 0b011, 0b10100);
INSN(amominu_d, 0b0101111, 0b011, 0b11000);
INSN(amomaxu_d, 0b0101111, 0b011, 0b11100);
INSN(amocas_w, 0b0101111, 0b010, 0b00101);
INSN(amocas_d, 0b0101111, 0b011, 0b00101);
#undef INSN
enum operand_size { int8, int16, int32, uint32, int64 };

View File

@ -105,6 +105,7 @@ define_pd_global(intx, InlineSmallCode, 1000);
product(bool, UseZba, false, "Use Zba instructions") \
product(bool, UseZbb, false, "Use Zbb instructions") \
product(bool, UseZbs, false, "Use Zbs instructions") \
product(bool, UseZacas, false, EXPERIMENTAL, "Use Zacas instructions") \
product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \
product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \
product(bool, UseZicbop, false, EXPERIMENTAL, "Use Zicbop instructions") \

View File

@ -2732,20 +2732,29 @@ void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Reg
// oldv holds comparison value
// newv holds value to write in exchange
// addr identifies memory word to compare against/update
Label retry_load, nope;
bind(retry_load);
// Load reserved from the memory location
load_reserved(tmp, addr, int64, Assembler::aqrl);
// Fail and exit if it is not what we expect
bne(tmp, oldv, nope);
// If the store conditional succeeds, tmp will be zero
store_conditional(tmp, newv, addr, int64, Assembler::rl);
beqz(tmp, succeed);
// Retry only when the store conditional failed
j(retry_load);
if (UseZacas) {
mv(tmp, oldv);
atomic_cas(tmp, newv, addr, Assembler::int64, Assembler::aq, Assembler::rl);
beq(tmp, oldv, succeed);
} else {
Label retry_load, nope;
bind(retry_load);
// Load reserved from the memory location
load_reserved(tmp, addr, int64, Assembler::aqrl);
// Fail and exit if it is not what we expect
bne(tmp, oldv, nope);
// If the store conditional succeeds, tmp will be zero
store_conditional(tmp, newv, addr, int64, Assembler::rl);
beqz(tmp, succeed);
// Retry only when the store conditional failed
j(retry_load);
bind(nope);
bind(nope);
}
// neither amocas nor lr/sc have an implied barrier in the failing case
membar(AnyAny);
mv(oldv, tmp);
if (fail != nullptr) {
j(*fail);
@ -2819,7 +2828,7 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte
}
sll(mask, mask, shift);
xori(not_mask, mask, -1);
notr(not_mask, mask);
sll(expected, expected, shift);
andr(expected, expected, mask);
@ -2829,7 +2838,7 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte
}
// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w or amocas.w,
// which are forced to work with 4-byte aligned address.
void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
Register new_val,
@ -2844,14 +2853,29 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
Label retry, fail, done;
bind(retry);
lr_w(old, aligned_addr, acquire);
andr(tmp, old, mask);
bne(tmp, expected, fail);
andr(tmp, old, not_mask);
orr(tmp, tmp, new_val);
sc_w(tmp, tmp, aligned_addr, release);
bnez(tmp, retry);
if (UseZacas) {
lw(old, aligned_addr);
// if old & mask != expected
andr(tmp, old, mask);
bne(tmp, expected, fail);
andr(tmp, old, not_mask);
orr(tmp, tmp, new_val);
atomic_cas(old, tmp, aligned_addr, operand_size::int32, acquire, release);
bne(tmp, old, retry);
} else {
lr_w(old, aligned_addr, acquire);
andr(tmp, old, mask);
bne(tmp, expected, fail);
andr(tmp, old, not_mask);
orr(tmp, tmp, new_val);
sc_w(tmp, tmp, aligned_addr, release);
bnez(tmp, retry);
}
if (result_as_bool) {
mv(result, 1);
@ -2891,14 +2915,28 @@ void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
Label fail, done;
lr_w(old, aligned_addr, acquire);
andr(tmp, old, mask);
bne(tmp, expected, fail);
if (UseZacas) {
lw(old, aligned_addr);
andr(tmp, old, not_mask);
orr(tmp, tmp, new_val);
sc_w(tmp, tmp, aligned_addr, release);
bnez(tmp, fail);
// if old & mask != expected
andr(tmp, old, mask);
bne(tmp, expected, fail);
andr(tmp, old, not_mask);
orr(tmp, tmp, new_val);
atomic_cas(tmp, new_val, addr, operand_size::int32, acquire, release);
bne(tmp, old, fail);
} else {
lr_w(old, aligned_addr, acquire);
andr(tmp, old, mask);
bne(tmp, expected, fail);
andr(tmp, old, not_mask);
orr(tmp, tmp, new_val);
sc_w(tmp, tmp, aligned_addr, release);
bnez(tmp, fail);
}
// Success
mv(result, 1);
@ -2921,6 +2959,19 @@ void MacroAssembler::cmpxchg(Register addr, Register expected,
assert_different_registers(expected, t0);
assert_different_registers(new_val, t0);
if (UseZacas) {
if (result_as_bool) {
mv(t0, expected);
atomic_cas(t0, new_val, addr, size, acquire, release);
xorr(t0, t0, expected);
seqz(result, t0);
} else {
mv(result, expected);
atomic_cas(result, new_val, addr, size, acquire, release);
}
return;
}
Label retry_load, done, ne_done;
bind(retry_load);
load_reserved(t0, addr, size, acquire);
@ -2952,6 +3003,11 @@ void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
enum operand_size size,
Assembler::Aqrl acquire, Assembler::Aqrl release,
Register result) {
if (UseZacas) {
cmpxchg(addr, expected, new_val, size, acquire, release, result, true);
return;
}
assert_different_registers(addr, t0);
assert_different_registers(expected, t0);
assert_different_registers(new_val, t0);
@ -3018,6 +3074,89 @@ ATOMIC_XCHGU(xchgalwu, xchgalw)
#undef ATOMIC_XCHGU
#define ATOMIC_CAS(OP, AOP, ACQUIRE, RELEASE) \
void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \
assert(UseZacas, "invariant"); \
prev = prev->is_valid() ? prev : zr; \
AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
return; \
}
ATOMIC_CAS(cas, amocas_d, Assembler::relaxed, Assembler::relaxed)
ATOMIC_CAS(casw, amocas_w, Assembler::relaxed, Assembler::relaxed)
ATOMIC_CAS(casl, amocas_d, Assembler::relaxed, Assembler::rl)
ATOMIC_CAS(caslw, amocas_w, Assembler::relaxed, Assembler::rl)
ATOMIC_CAS(casal, amocas_d, Assembler::aq, Assembler::rl)
ATOMIC_CAS(casalw, amocas_w, Assembler::aq, Assembler::rl)
#undef ATOMIC_CAS
#define ATOMIC_CASU(OP1, OP2) \
void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \
atomic_##OP2(prev, newv, addr); \
zero_extend(prev, prev, 32); \
return; \
}
ATOMIC_CASU(caswu, casw)
ATOMIC_CASU(caslwu, caslw)
ATOMIC_CASU(casalwu, casalw)
#undef ATOMIC_CASU
void MacroAssembler::atomic_cas(
Register prev, Register newv, Register addr, enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) {
switch (size) {
case int64:
switch ((Assembler::Aqrl)(acquire | release)) {
case Assembler::relaxed:
atomic_cas(prev, newv, addr);
break;
case Assembler::rl:
atomic_casl(prev, newv, addr);
break;
case Assembler::aqrl:
atomic_casal(prev, newv, addr);
break;
default:
ShouldNotReachHere();
}
break;
case int32:
switch ((Assembler::Aqrl)(acquire | release)) {
case Assembler::relaxed:
atomic_casw(prev, newv, addr);
break;
case Assembler::rl:
atomic_caslw(prev, newv, addr);
break;
case Assembler::aqrl:
atomic_casalw(prev, newv, addr);
break;
default:
ShouldNotReachHere();
}
break;
case uint32:
switch ((Assembler::Aqrl)(acquire | release)) {
case Assembler::relaxed:
atomic_caswu(prev, newv, addr);
break;
case Assembler::rl:
atomic_caslwu(prev, newv, addr);
break;
case Assembler::aqrl:
atomic_casalwu(prev, newv, addr);
break;
default:
ShouldNotReachHere();
}
break;
default:
ShouldNotReachHere();
}
}
void MacroAssembler::far_jump(const Address &entry, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != nullptr,

View File

@ -1063,6 +1063,19 @@ public:
void atomic_xchgwu(Register prev, Register newv, Register addr);
void atomic_xchgalwu(Register prev, Register newv, Register addr);
void atomic_cas(Register prev, Register newv, Register addr);
void atomic_casw(Register prev, Register newv, Register addr);
void atomic_casl(Register prev, Register newv, Register addr);
void atomic_caslw(Register prev, Register newv, Register addr);
void atomic_casal(Register prev, Register newv, Register addr);
void atomic_casalw(Register prev, Register newv, Register addr);
void atomic_caswu(Register prev, Register newv, Register addr);
void atomic_caslwu(Register prev, Register newv, Register addr);
void atomic_casalwu(Register prev, Register newv, Register addr);
void atomic_cas(Register prev, Register newv, Register addr, enum operand_size size,
Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);
// Emit a far call/jump. Only invalidates the tmp register which
// is used to keep the entry address for jalr.
// The address must be inside the code cache.

View File

@ -142,6 +142,7 @@ class VM_Version : public Abstract_VM_Version {
decl(ext_Zic64b , "Zic64b" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZic64b)) \
decl(ext_Ztso , "Ztso" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZtso)) \
decl(ext_Zihintpause , "Zihintpause" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZihintpause)) \
decl(ext_Zacas , "Zacas" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZacas)) \
decl(mvendorid , "VendorId" , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT) \
decl(marchid , "ArchId" , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT) \
decl(mimpid , "ImpId" , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT) \