8135157: DMB elimination in AArch64 C2 synchronization implementation

Reduce memory barrier usage in C2 fast lock and unlock.

Co-authored-by: Wei Tang <wei.tang@linaro.org>
Reviewed-by: kvn
This commit is contained in:
Andrew Haley 2015-09-08 14:08:58 +01:00
parent e88940fae6
commit 518c5cacbc
2 changed files with 61 additions and 83 deletions

View File

@ -3803,82 +3803,38 @@ encode %{
enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
MacroAssembler _masm(&cbuf);
Register old_reg = as_Register($oldval$$reg);
Register new_reg = as_Register($newval$$reg);
Register base = as_Register($mem$$base);
Register addr_reg;
int index = $mem$$index;
int scale = $mem$$scale;
int disp = $mem$$disp;
if (index == -1) {
if (disp != 0) {
__ lea(rscratch2, Address(base, disp));
addr_reg = rscratch2;
} else {
// TODO
// should we ever get anything other than this case?
addr_reg = base;
}
} else {
Register index_reg = as_Register(index);
if (disp == 0) {
__ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
addr_reg = rscratch2;
} else {
__ lea(rscratch2, Address(base, disp));
__ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
addr_reg = rscratch2;
}
}
Label retry_load, done;
__ bind(retry_load);
__ ldxr(rscratch1, addr_reg);
__ cmp(rscratch1, old_reg);
__ br(Assembler::NE, done);
__ stlxr(rscratch1, new_reg, addr_reg);
__ cbnzw(rscratch1, retry_load);
__ bind(done);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
&Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
%}
enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf);
Register old_reg = as_Register($oldval$$reg);
Register new_reg = as_Register($newval$$reg);
Register base = as_Register($mem$$base);
Register addr_reg;
int index = $mem$$index;
int scale = $mem$$scale;
int disp = $mem$$disp;
if (index == -1) {
if (disp != 0) {
__ lea(rscratch2, Address(base, disp));
addr_reg = rscratch2;
} else {
// TODO
// should we ever get anything other than this case?
addr_reg = base;
}
} else {
Register index_reg = as_Register(index);
if (disp == 0) {
__ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
addr_reg = rscratch2;
} else {
__ lea(rscratch2, Address(base, disp));
__ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
addr_reg = rscratch2;
}
}
Label retry_load, done;
__ bind(retry_load);
__ ldxrw(rscratch1, addr_reg);
__ cmpw(rscratch1, old_reg);
__ br(Assembler::NE, done);
__ stlxrw(rscratch1, new_reg, addr_reg);
__ cbnzw(rscratch1, retry_load);
__ bind(done);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
&Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
%}
// The only difference between aarch64_enc_cmpxchg and
// aarch64_enc_cmpxchg_acq is that we use load-acquire in the
// CompareAndSwap sequence to serve as a barrier on acquiring a
// lock.
enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
&Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
%}
enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
&Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
%}
// auxiliary used for CompareAndSwapX to set result register
enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
MacroAssembler _masm(&cbuf);
@ -4398,13 +4354,10 @@ encode %{
// Compare object markOop with mark and if equal exchange scratch1
// with object markOop.
// Note that this is simply a CAS: it does not generate any
// barriers. These are separately generated by
// membar_acquire_lock().
{
Label retry_load;
__ bind(retry_load);
__ ldxr(tmp, oop);
__ ldaxr(tmp, oop);
__ cmp(tmp, disp_hdr);
__ br(Assembler::NE, cas_failed);
// use stlxr to ensure update is immediately visible
@ -4454,7 +4407,7 @@ encode %{
{
Label retry_load, fail;
__ bind(retry_load);
__ ldxr(rscratch1, tmp);
__ ldaxr(rscratch1, tmp);
__ cmp(disp_hdr, rscratch1);
__ br(Assembler::NE, fail);
// use stlxr to ensure update is immediately visible
@ -8017,10 +7970,10 @@ instruct membar_acquire_lock() %{
match(MemBarAcquireLock);
ins_cost(VOLATILE_REF_COST);
format %{ "membar_acquire_lock" %}
format %{ "membar_acquire_lock (elided)" %}
ins_encode %{
__ membar(Assembler::LoadLoad|Assembler::LoadStore);
__ block_comment("membar_acquire_lock (elided)");
%}
ins_pipe(pipe_serial);
@ -8080,10 +8033,10 @@ instruct membar_release_lock() %{
match(MemBarReleaseLock);
ins_cost(VOLATILE_REF_COST);
format %{ "membar_release_lock" %}
format %{ "membar_release_lock (elided)" %}
ins_encode %{
__ membar(Assembler::LoadStore|Assembler::StoreStore);
__ block_comment("membar_release_lock (elided)");
%}
ins_pipe(pipe_serial);
@ -8369,7 +8322,11 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla
ins_pipe(pipe_serial);
%}
// this has to be implemented as a CAS
// storeLConditional is used by PhaseMacroExpand::expand_lock_node
// when attempting to rebias a lock towards the current thread. We
// must use the acquire form of cmpxchg in order to guarantee acquire
// semantics in this case.
instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
%{
match(Set cr (StoreLConditional mem (Binary oldval newval)));
@ -8381,12 +8338,14 @@ instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFl
"cmpw rscratch1, zr\t# EQ on successful write"
%}
ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
ins_pipe(pipe_slow);
%}
// this has to be implemented as a CAS
// storeIConditional also has acquire semantics, for no better reason
// than matching storeLConditional. At the time of writing this
// comment storeIConditional was not used anywhere by AArch64.
instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
%{
match(Set cr (StoreIConditional mem (Binary oldval newval)));
@ -8398,7 +8357,7 @@ instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFl
"cmpw rscratch1, zr\t# EQ on successful write"
%}
ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
ins_pipe(pipe_slow);
%}

View File

@ -917,6 +917,8 @@ public:
void cmpptr(Register src1, Address src2);
// Various forms of CAS
void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
Label &suceed, Label *fail);
@ -938,6 +940,23 @@ public:
str(rscratch2, adr);
}
// A generic CAS; success or failure is in the EQ flag.
template <typename T1, typename T2>
void cmpxchg(Register addr, Register expected, Register new_val,
T1 load_insn,
void (MacroAssembler::*cmp_insn)(Register, Register),
T2 store_insn,
Register tmp = rscratch1) {
Label retry_load, done;
bind(retry_load);
(this->*load_insn)(tmp, addr);
(this->*cmp_insn)(tmp, expected);
br(Assembler::NE, done);
(this->*store_insn)(tmp, new_val, addr);
cbnzw(tmp, retry_load);
bind(done);
}
// Calls
address trampoline_call(Address entry, CodeBuffer *cbuf = NULL);