8135157: DMB elimination in AArch64 C2 synchronization implementation
Reduce memory barrier usage in C2 fast lock and unlock. Co-authored-by: Wei Tang <wei.tang@linaro.org> Reviewed-by: kvn
This commit is contained in:
parent
e88940fae6
commit
518c5cacbc
@ -3803,82 +3803,38 @@ encode %{
|
||||
|
||||
enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
Register old_reg = as_Register($oldval$$reg);
|
||||
Register new_reg = as_Register($newval$$reg);
|
||||
Register base = as_Register($mem$$base);
|
||||
Register addr_reg;
|
||||
int index = $mem$$index;
|
||||
int scale = $mem$$scale;
|
||||
int disp = $mem$$disp;
|
||||
if (index == -1) {
|
||||
if (disp != 0) {
|
||||
__ lea(rscratch2, Address(base, disp));
|
||||
addr_reg = rscratch2;
|
||||
} else {
|
||||
// TODO
|
||||
// should we ever get anything other than this case?
|
||||
addr_reg = base;
|
||||
}
|
||||
} else {
|
||||
Register index_reg = as_Register(index);
|
||||
if (disp == 0) {
|
||||
__ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
|
||||
addr_reg = rscratch2;
|
||||
} else {
|
||||
__ lea(rscratch2, Address(base, disp));
|
||||
__ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
|
||||
addr_reg = rscratch2;
|
||||
}
|
||||
}
|
||||
Label retry_load, done;
|
||||
__ bind(retry_load);
|
||||
__ ldxr(rscratch1, addr_reg);
|
||||
__ cmp(rscratch1, old_reg);
|
||||
__ br(Assembler::NE, done);
|
||||
__ stlxr(rscratch1, new_reg, addr_reg);
|
||||
__ cbnzw(rscratch1, retry_load);
|
||||
__ bind(done);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
&Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
Register old_reg = as_Register($oldval$$reg);
|
||||
Register new_reg = as_Register($newval$$reg);
|
||||
Register base = as_Register($mem$$base);
|
||||
Register addr_reg;
|
||||
int index = $mem$$index;
|
||||
int scale = $mem$$scale;
|
||||
int disp = $mem$$disp;
|
||||
if (index == -1) {
|
||||
if (disp != 0) {
|
||||
__ lea(rscratch2, Address(base, disp));
|
||||
addr_reg = rscratch2;
|
||||
} else {
|
||||
// TODO
|
||||
// should we ever get anything other than this case?
|
||||
addr_reg = base;
|
||||
}
|
||||
} else {
|
||||
Register index_reg = as_Register(index);
|
||||
if (disp == 0) {
|
||||
__ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
|
||||
addr_reg = rscratch2;
|
||||
} else {
|
||||
__ lea(rscratch2, Address(base, disp));
|
||||
__ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
|
||||
addr_reg = rscratch2;
|
||||
}
|
||||
}
|
||||
Label retry_load, done;
|
||||
__ bind(retry_load);
|
||||
__ ldxrw(rscratch1, addr_reg);
|
||||
__ cmpw(rscratch1, old_reg);
|
||||
__ br(Assembler::NE, done);
|
||||
__ stlxrw(rscratch1, new_reg, addr_reg);
|
||||
__ cbnzw(rscratch1, retry_load);
|
||||
__ bind(done);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
&Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
|
||||
%}
|
||||
|
||||
|
||||
// The only difference between aarch64_enc_cmpxchg and
|
||||
// aarch64_enc_cmpxchg_acq is that we use load-acquire in the
|
||||
// CompareAndSwap sequence to serve as a barrier on acquiring a
|
||||
// lock.
|
||||
enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
&Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
&Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
|
||||
%}
|
||||
|
||||
|
||||
// auxiliary used for CompareAndSwapX to set result register
|
||||
enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
@ -4398,13 +4354,10 @@ encode %{
|
||||
|
||||
// Compare object markOop with mark and if equal exchange scratch1
|
||||
// with object markOop.
|
||||
// Note that this is simply a CAS: it does not generate any
|
||||
// barriers. These are separately generated by
|
||||
// membar_acquire_lock().
|
||||
{
|
||||
Label retry_load;
|
||||
__ bind(retry_load);
|
||||
__ ldxr(tmp, oop);
|
||||
__ ldaxr(tmp, oop);
|
||||
__ cmp(tmp, disp_hdr);
|
||||
__ br(Assembler::NE, cas_failed);
|
||||
// use stlxr to ensure update is immediately visible
|
||||
@ -4454,7 +4407,7 @@ encode %{
|
||||
{
|
||||
Label retry_load, fail;
|
||||
__ bind(retry_load);
|
||||
__ ldxr(rscratch1, tmp);
|
||||
__ ldaxr(rscratch1, tmp);
|
||||
__ cmp(disp_hdr, rscratch1);
|
||||
__ br(Assembler::NE, fail);
|
||||
// use stlxr to ensure update is immediately visible
|
||||
@ -8017,10 +7970,10 @@ instruct membar_acquire_lock() %{
|
||||
match(MemBarAcquireLock);
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
|
||||
format %{ "membar_acquire_lock" %}
|
||||
format %{ "membar_acquire_lock (elided)" %}
|
||||
|
||||
ins_encode %{
|
||||
__ membar(Assembler::LoadLoad|Assembler::LoadStore);
|
||||
__ block_comment("membar_acquire_lock (elided)");
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_serial);
|
||||
@ -8080,10 +8033,10 @@ instruct membar_release_lock() %{
|
||||
match(MemBarReleaseLock);
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
|
||||
format %{ "membar_release_lock" %}
|
||||
format %{ "membar_release_lock (elided)" %}
|
||||
|
||||
ins_encode %{
|
||||
__ membar(Assembler::LoadStore|Assembler::StoreStore);
|
||||
__ block_comment("membar_release_lock (elided)");
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_serial);
|
||||
@ -8369,7 +8322,11 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
// this has to be implemented as a CAS
|
||||
|
||||
// storeLConditional is used by PhaseMacroExpand::expand_lock_node
|
||||
// when attempting to rebias a lock towards the current thread. We
|
||||
// must use the acquire form of cmpxchg in order to guarantee acquire
|
||||
// semantics in this case.
|
||||
instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
|
||||
%{
|
||||
match(Set cr (StoreLConditional mem (Binary oldval newval)));
|
||||
@ -8381,12 +8338,14 @@ instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFl
|
||||
"cmpw rscratch1, zr\t# EQ on successful write"
|
||||
%}
|
||||
|
||||
ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
|
||||
ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
|
||||
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// this has to be implemented as a CAS
|
||||
// storeIConditional also has acquire semantics, for no better reason
|
||||
// than matching storeLConditional. At the time of writing this
|
||||
// comment storeIConditional was not used anywhere by AArch64.
|
||||
instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
|
||||
%{
|
||||
match(Set cr (StoreIConditional mem (Binary oldval newval)));
|
||||
@ -8398,7 +8357,7 @@ instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFl
|
||||
"cmpw rscratch1, zr\t# EQ on successful write"
|
||||
%}
|
||||
|
||||
ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
|
||||
ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
|
||||
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
@ -917,6 +917,8 @@ public:
|
||||
|
||||
void cmpptr(Register src1, Address src2);
|
||||
|
||||
// Various forms of CAS
|
||||
|
||||
void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
|
||||
Label &suceed, Label *fail);
|
||||
|
||||
@ -938,6 +940,23 @@ public:
|
||||
str(rscratch2, adr);
|
||||
}
|
||||
|
||||
// A generic CAS; success or failure is in the EQ flag.
|
||||
template <typename T1, typename T2>
|
||||
void cmpxchg(Register addr, Register expected, Register new_val,
|
||||
T1 load_insn,
|
||||
void (MacroAssembler::*cmp_insn)(Register, Register),
|
||||
T2 store_insn,
|
||||
Register tmp = rscratch1) {
|
||||
Label retry_load, done;
|
||||
bind(retry_load);
|
||||
(this->*load_insn)(tmp, addr);
|
||||
(this->*cmp_insn)(tmp, expected);
|
||||
br(Assembler::NE, done);
|
||||
(this->*store_insn)(tmp, new_val, addr);
|
||||
cbnzw(tmp, retry_load);
|
||||
bind(done);
|
||||
}
|
||||
|
||||
// Calls
|
||||
|
||||
address trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
|
||||
|
Loading…
Reference in New Issue
Block a user