8292640: C2: Remove unused scratch register usages on x86

Reviewed-by: kvn
This commit is contained in:
Vladimir Ivanov 2022-08-23 20:25:56 +00:00
parent f3be6731d3
commit 926380d3b7
5 changed files with 377 additions and 383 deletions

View File

@ -217,7 +217,7 @@ void C2_MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg,
if (RTMLockingCalculationDelay > 0) {
// Delay calculation
movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()));
testptr(tmpReg, tmpReg);
jccb(Assembler::equal, L_done);
}
@ -966,45 +966,45 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
//-------------------------------------------------------------------------------------------
// Generic instructions support for use in .ad files C2 code generation
void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src) {
if (dst != src) {
movdqu(dst, src);
}
if (opcode == Op_AbsVD) {
andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), noreg);
} else {
assert((opcode == Op_NegVD),"opcode should be Op_NegD");
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg);
}
}
void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len) {
if (opcode == Op_AbsVD) {
vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr);
vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, noreg);
} else {
assert((opcode == Op_NegVD),"opcode should be Op_NegD");
vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr);
vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, noreg);
}
}
void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src) {
if (dst != src) {
movdqu(dst, src);
}
if (opcode == Op_AbsVF) {
andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr);
andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), noreg);
} else {
assert((opcode == Op_NegVF),"opcode should be Op_NegF");
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr);
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), noreg);
}
}
void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len) {
if (opcode == Op_AbsVF) {
vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr);
vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, noreg);
} else {
assert((opcode == Op_NegVF),"opcode should be Op_NegF");
vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr);
vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, noreg);
}
}
@ -1177,9 +1177,7 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
}
// Float/Double signum
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
XMMRegister zero, XMMRegister one,
Register scratch) {
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) {
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");
Label DONE_LABEL;
@ -1191,7 +1189,7 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
movflt(dst, one);
jcc(Assembler::above, DONE_LABEL);
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch);
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), noreg);
} else if (opcode == Op_SignumD) {
assert(UseSSE > 1, "required");
ucomisd(dst, zero);
@ -1199,7 +1197,7 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
movdbl(dst, one);
jcc(Assembler::above, DONE_LABEL);
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch);
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg);
}
bind(DONE_LABEL);
@ -1458,7 +1456,7 @@ void C2_MacroAssembler::varshiftq(int opcode, XMMRegister dst, XMMRegister src,
}
// Variable shift src by shift using vtmp and scratch as TEMPs giving word result in dst
void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp) {
assert(opcode == Op_LShiftVB ||
opcode == Op_RShiftVB ||
opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]);
@ -1467,13 +1465,13 @@ void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src,
vextendbd(sign, dst, src, 1);
vpmovzxbd(vtmp, shift, 1);
varshiftd(opcode, dst, dst, vtmp, 1);
vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, scratch);
vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, noreg);
vextracti128_high(vtmp, dst);
vpackusdw(dst, dst, vtmp, 0);
}
// Variable shift src by shift using vtmp and scratch as TEMPs giving byte result in dst
void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp) {
assert(opcode == Op_LShiftVB ||
opcode == Op_RShiftVB ||
opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]);
@ -1482,7 +1480,7 @@ void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src,
vextendbw(sign, dst, src, ext_vector_len);
vpmovzxbw(vtmp, shift, ext_vector_len);
varshiftw(opcode, dst, dst, vtmp, ext_vector_len);
vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, scratch);
vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, noreg);
if (vector_len == 0) {
vextracti128_high(vtmp, dst);
vpackuswb(dst, dst, vtmp, vector_len);
@ -1627,12 +1625,11 @@ void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int v
}
}
void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp,
Register tmp, bool novlbwdq, int vlen_enc) {
void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc) {
if (novlbwdq) {
vpmovsxbd(xtmp, src, vlen_enc);
evpcmpd(dst, k0, xtmp, ExternalAddress(StubRoutines::x86::vector_int_mask_cmp_bits()),
Assembler::eq, true, vlen_enc, tmp);
Assembler::eq, true, vlen_enc, noreg);
} else {
vpxor(xtmp, xtmp, xtmp, vlen_enc);
vpsubb(xtmp, xtmp, src, vlen_enc);
@ -1692,19 +1689,19 @@ void C2_MacroAssembler::load_constant_vector(BasicType bt, XMMRegister dst, Inte
}
}
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes) {
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
if (vlen_in_bytes <= 4) {
movdl(dst, addr);
} else if (vlen_in_bytes == 8) {
movq(dst, addr);
} else if (vlen_in_bytes == 16) {
movdqu(dst, addr, scratch);
movdqu(dst, addr, noreg);
} else if (vlen_in_bytes == 32) {
vmovdqu(dst, addr, scratch);
vmovdqu(dst, addr, noreg);
} else {
assert(vlen_in_bytes == 64, "%d", vlen_in_bytes);
evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch);
evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, noreg);
}
}
@ -2336,7 +2333,7 @@ void C2_MacroAssembler::get_elem(BasicType typ, Register dst, XMMRegister src, i
}
}
void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp, XMMRegister vtmp) {
void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp) {
int esize = type2aelembytes(typ);
int elem_per_lane = 16/esize;
int eindex = elemindex % elem_per_lane;
@ -2365,12 +2362,11 @@ void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src
// Zero upper bits
if (typ == T_FLOAT) {
if (UseAVX == 0) {
assert((vtmp != xnoreg) && (tmp != noreg), "required.");
movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), tmp);
assert(vtmp != xnoreg, "required.");
movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), noreg);
pand(dst, vtmp);
} else {
assert((tmp != noreg), "required.");
vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, tmp);
vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, noreg);
}
}
}
@ -2399,23 +2395,25 @@ void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask
}
}
void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(adr), "missing");
switch(typ) {
case T_BOOLEAN:
case T_BYTE:
evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, rscratch);
break;
case T_CHAR:
case T_SHORT:
evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, rscratch);
break;
case T_INT:
case T_FLOAT:
evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, rscratch);
break;
case T_LONG:
case T_DOUBLE:
evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, rscratch);
break;
default:
assert(false,"Should not reach here.");
@ -4364,7 +4362,7 @@ void C2_MacroAssembler::vector_cast_float_special_cases_avx(XMMRegister dst, XMM
Register scratch, AddressLiteral float_sign_flip,
int vec_enc) {
Label done;
vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc);
vmovdqu(xtmp1, float_sign_flip, vec_enc, scratch);
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
vptest(xtmp2, xtmp2, vec_enc);
jccb(Assembler::equal, done);
@ -4969,7 +4967,7 @@ void C2_MacroAssembler::vector_popcount_byte(XMMRegister dst, XMMRegister src, X
vpsrlw(dst, src, 4, vec_enc);
vpand(dst, dst, xtmp1, vec_enc);
vpand(xtmp1, src, xtmp1, vec_enc);
vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), rtmp, vec_enc);
vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), vec_enc, noreg);
vpshufb(xtmp1, xtmp2, xtmp1, vec_enc);
vpshufb(dst, xtmp2, dst, vec_enc);
vpaddb(dst, dst, xtmp1, vec_enc);
@ -5074,7 +5072,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg
if (VM_Version::supports_avx512vlbw()) {
// Get the reverse bit sequence of lower nibble of each byte.
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), rtmp, vec_enc);
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), vec_enc, noreg);
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
vpandq(dst, xtmp2, src, vec_enc);
vpshufb(dst, xtmp1, dst, vec_enc);
@ -5088,7 +5086,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg
// Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and
// right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte.
vporq(xtmp2, dst, xtmp2, vec_enc);
vector_reverse_byte(bt, dst, xtmp2, rtmp, vec_enc);
vector_reverse_byte(bt, dst, xtmp2, vec_enc);
} else if(vec_enc == Assembler::AVX_512bit) {
// Shift based bit reversal.
@ -5107,7 +5105,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg
evmovdqul(xtmp1, k0, dst, true, vec_enc);
vector_reverse_byte64(bt, dst, xtmp1, xtmp1, xtmp2, rtmp, vec_enc);
} else {
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), rtmp, vec_enc);
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), vec_enc, rtmp);
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
// Get the reverse bit sequence of lower nibble of each byte.
@ -5123,7 +5121,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg
// Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and
// right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte.
vpor(xtmp2, dst, xtmp2, vec_enc);
vector_reverse_byte(bt, dst, xtmp2, rtmp, vec_enc);
vector_reverse_byte(bt, dst, xtmp2, vec_enc);
}
}
@ -5134,7 +5132,7 @@ void C2_MacroAssembler::vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, X
assert(VM_Version::supports_gfni(), "");
vpbroadcastq(xtmp, mask, vec_enc, rtmp);
vgf2p8affineqb(xtmp, src, xtmp, 0, vec_enc);
vector_reverse_byte(bt, dst, xtmp, rtmp, vec_enc);
vector_reverse_byte(bt, dst, xtmp, vec_enc);
}
void C2_MacroAssembler::vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
@ -5177,7 +5175,7 @@ void C2_MacroAssembler::vector_reverse_byte64(BasicType bt, XMMRegister dst, XMM
}
}
void C2_MacroAssembler::vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, Register rtmp, int vec_enc) {
void C2_MacroAssembler::vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc) {
if (bt == T_BYTE) {
if (VM_Version::supports_avx512vl() || vec_enc == Assembler::AVX_512bit) {
evmovdquq(dst, k0, src, true, vec_enc);
@ -5190,14 +5188,14 @@ void C2_MacroAssembler::vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRe
// pre-computed shuffle indices.
switch(bt) {
case T_LONG:
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_long()), rtmp, vec_enc);
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_long()), vec_enc, noreg);
break;
case T_INT:
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_int()), rtmp, vec_enc);
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_int()), vec_enc, noreg);
break;
case T_CHAR:
case T_SHORT:
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_short()), rtmp, vec_enc);
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_short()), vec_enc, noreg);
break;
default:
fatal("Unsupported type %s", type2name(bt));

View File

@ -70,10 +70,10 @@ public:
#endif
// Generic instructions support for use in .ad files C2 code generation
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src);
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
XMMRegister tmp = xnoreg);
@ -90,9 +90,7 @@ public:
KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
int vlen_enc);
void signum_fp(int opcode, XMMRegister dst,
XMMRegister zero, XMMRegister one,
Register scratch);
void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one);
void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
bool merge, BasicType bt, int vec_enc);
@ -121,8 +119,8 @@ public:
void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
void insert(BasicType typ, XMMRegister dst, Register val, int idx);
void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
@ -137,7 +135,7 @@ public:
void extract(BasicType typ, Register dst, XMMRegister src, int idx);
XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg);
void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
// vector test
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
@ -150,17 +148,17 @@ public:
#endif
// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register rscratch = rscratch1);
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, Register tmp, bool novlbwdq, int vlen_enc);
void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
void load_vector(XMMRegister dst, Address src, int vlen_in_bytes);
void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = rscratch1);
void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
void load_iota_indices(XMMRegister dst, int vlen_in_bytes);
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
@ -390,7 +388,7 @@ public:
void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp,
AddressLiteral mask, Register rtmp, int vec_enc);
void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, Register rtmp, int vec_enc);
void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc);
void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc);

View File

@ -310,9 +310,7 @@ void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
// scratch register is not used,
// it is defined to match parameters of 64-bit version of this method.
void MacroAssembler::movptr(Register dst, AddressLiteral src) {
if (src.is_lval()) {
mov_literal32(dst, (intptr_t)src.target(), src.rspec());
} else {
@ -662,15 +660,15 @@ void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
movq(dst, rscratch1);
}
void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
void MacroAssembler::movptr(Register dst, AddressLiteral src) {
if (src.is_lval()) {
mov_literal64(dst, (intptr_t)src.target(), src.rspec());
} else {
if (reachable(src)) {
movq(dst, as_Address(src));
} else {
lea(scratch, src);
movq(dst, Address(scratch, 0));
lea(dst, src);
movq(dst, Address(dst, 0));
}
}
}
@ -2541,61 +2539,67 @@ void MacroAssembler::movptr(Address dst, Register src) {
}
void MacroAssembler::movdqu(Address dst, XMMRegister src) {
assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
}
void MacroAssembler::movdqu(XMMRegister dst, Address src) {
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
}
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
}
void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
movdqu(dst, as_Address(src));
} else {
lea(scratchReg, src);
movdqu(dst, Address(scratchReg, 0));
lea(rscratch, src);
movdqu(dst, Address(rscratch, 0));
}
}
void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
}
void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
}
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
}
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
vmovdqu(dst, as_Address(src));
}
else {
lea(scratch_reg, src);
vmovdqu(dst, Address(scratch_reg, 0));
lea(rscratch, src);
vmovdqu(dst, Address(rscratch, 0));
}
}
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len) {
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (vector_len == AVX_512bit) {
evmovdquq(dst, src, AVX_512bit, scratch_reg);
evmovdquq(dst, src, AVX_512bit, rscratch);
} else if (vector_len == AVX_256bit) {
vmovdqu(dst, src, scratch_reg);
vmovdqu(dst, src, rscratch);
} else {
movdqu(dst, src, scratch_reg);
movdqu(dst, src, rscratch);
}
}
@ -2653,12 +2657,14 @@ void MacroAssembler::kmovql(KRegister dst, AddressLiteral src, Register scratch_
}
}
void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
kmovwl(dst, as_Address(src));
} else {
lea(scratch_reg, src);
kmovwl(dst, Address(scratch_reg, 0));
lea(rscratch, src);
kmovwl(dst, Address(rscratch, 0));
}
}
@ -2682,13 +2688,14 @@ void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral s
}
}
void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
lea(rscratch, src);
Assembler::evmovdqul(dst, mask, Address(rscratch, 0), merge, vector_len);
}
}
@ -3145,12 +3152,14 @@ void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
}
}
void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg) {
void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::roundsd(dst, as_Address(src), rmode);
} else {
lea(scratch_reg, src);
Assembler::roundsd(dst, Address(scratch_reg, 0), rmode);
lea(rscratch, src);
Assembler::roundsd(dst, Address(rscratch, 0), rmode);
}
}
@ -3181,14 +3190,16 @@ void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
}
}
void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::xorpd(dst, as_Address(src));
} else {
lea(scratch_reg, src);
Assembler::xorpd(dst, Address(scratch_reg, 0));
lea(rscratch, src);
Assembler::xorpd(dst, Address(rscratch, 0));
}
}
@ -3209,14 +3220,16 @@ void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) {
}
}
void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::xorps(dst, as_Address(src));
} else {
lea(scratch_reg, src);
Assembler::xorps(dst, Address(scratch_reg, 0));
lea(rscratch, src);
Assembler::xorps(dst, Address(rscratch, 0));
}
}
@ -3254,6 +3267,8 @@ void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src
void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
assert(UseAVX > 0, "requires some form of AVX");
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::vpaddb(dst, nds, as_Address(src), vector_len);
} else {
@ -3304,12 +3319,14 @@ void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int v
Assembler::vpaddw(dst, nds, src, vector_len);
}
void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::vpand(dst, nds, as_Address(src), vector_len);
} else {
lea(scratch_reg, src);
Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
lea(rscratch, src);
Assembler::vpand(dst, nds, Address(rscratch, 0), vector_len);
}
}

View File

@ -1162,17 +1162,17 @@ public:
void divss(XMMRegister dst, AddressLiteral src);
// Move Unaligned Double Quadword
void movdqu(Address dst, XMMRegister src);
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
void movdqu(Address dst, XMMRegister src);
void movdqu(XMMRegister dst, XMMRegister src);
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); }
void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); }
@ -1195,18 +1195,19 @@ public:
void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len);
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
// AVX512 Unaligned
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); }
void evmovdqub(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); }
void evmovdqub(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); }
void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() != src->encoding() || mask != k0) {
Assembler::evmovdqub(dst, mask, src, merge, vector_len);
@ -1240,9 +1241,9 @@ public:
Assembler::evmovdqul(dst, mask, src, merge, vector_len);
}
}
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
if (dst->encoding() != src->encoding()) {
@ -1311,9 +1312,9 @@ public:
void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
void sqrtsd(XMMRegister dst, AddressLiteral src);
void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg);
void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register rscratch);
void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
@ -1336,14 +1337,14 @@ public:
void ucomisd(XMMRegister dst, AddressLiteral src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void xorpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
void xorpd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
void xorps(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void xorps(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
// Shuffle Bytes
void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
@ -1362,8 +1363,8 @@ public:
void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch);
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@ -1373,9 +1374,9 @@ public:
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
using Assembler::vpbroadcastd;
void vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
@ -1845,23 +1846,14 @@ public:
void mov_metadata(Register dst, Metadata* obj);
void mov_metadata(Address dst, Metadata* obj);
void movptr(ArrayAddress dst, Register src);
// can this do an lea?
void movptr(Register dst, ArrayAddress src);
void movptr(Register dst, Address src);
#ifdef _LP64
void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1);
#else
void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit
#endif
void movptr(Register dst, intptr_t src);
void movptr(Register dst, Register src);
void movptr(Address dst, intptr_t src);
void movptr(Address dst, Register src);
void movptr(Register dst, Register src);
void movptr(Register dst, Address src);
void movptr(Register dst, AddressLiteral src);
void movptr(Register dst, ArrayAddress src);
void movptr(Register dst, intptr_t src);
void movptr(Address dst, Register src);
void movptr(Address dst, intptr_t src);
void movptr(ArrayAddress dst, Register src);
void movptr(Register dst, RegisterOrConstant src) {
if (src.is_constant()) movptr(dst, src.as_constant());

File diff suppressed because it is too large Load Diff