From 926380d3b748fd591f45abc99c497abc62c52565 Mon Sep 17 00:00:00 2001 From: Vladimir Ivanov Date: Tue, 23 Aug 2022 20:25:56 +0000 Subject: [PATCH] 8292640: C2: Remove unused scratch register usages on x86 Reviewed-by: kvn --- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 98 ++-- src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp | 26 +- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 111 +++-- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 96 ++-- src/hotspot/cpu/x86/x86.ad | 429 +++++++++--------- 5 files changed, 377 insertions(+), 383 deletions(-) diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index de99f867b1d..0c1d0c17de6 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -217,7 +217,7 @@ void C2_MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg, if (RTMLockingCalculationDelay > 0) { // Delay calculation - movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg); + movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr())); testptr(tmpReg, tmpReg); jccb(Assembler::equal, L_done); } @@ -966,45 +966,45 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t //------------------------------------------------------------------------------------------- // Generic instructions support for use in .ad files C2 code generation -void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) { +void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src) { if (dst != src) { movdqu(dst, src); } if (opcode == Op_AbsVD) { - andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr); + andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), noreg); } else { assert((opcode == Op_NegVD),"opcode should be Op_NegD"); - xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr); + xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg); } } -void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) { +void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len) { if (opcode == Op_AbsVD) { - vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr); + vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, noreg); } else { assert((opcode == Op_NegVD),"opcode should be Op_NegD"); - vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr); + vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, noreg); } } -void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr) { +void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src) { if (dst != src) { movdqu(dst, src); } if (opcode == Op_AbsVF) { - andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr); + andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), noreg); } else { assert((opcode == Op_NegVF),"opcode should be Op_NegF"); - xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr); + xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), noreg); } } -void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) { +void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len) { if (opcode == Op_AbsVF) { - vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr); + vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, noreg); } else { assert((opcode == Op_NegVF),"opcode should be Op_NegF"); - vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr); + vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, noreg); } } @@ -1177,9 +1177,7 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, } // Float/Double signum -void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, - XMMRegister zero, XMMRegister one, - Register scratch) { +void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) { assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity"); Label DONE_LABEL; @@ -1191,7 +1189,7 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN movflt(dst, one); jcc(Assembler::above, DONE_LABEL); - xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch); + xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), noreg); } else if (opcode == Op_SignumD) { assert(UseSSE > 1, "required"); ucomisd(dst, zero); @@ -1199,7 +1197,7 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN movdbl(dst, one); jcc(Assembler::above, DONE_LABEL); - xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch); + xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg); } bind(DONE_LABEL); @@ -1458,7 +1456,7 @@ void C2_MacroAssembler::varshiftq(int opcode, XMMRegister dst, XMMRegister src, } // Variable shift src by shift using vtmp and scratch as TEMPs giving word result in dst -void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) { +void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp) { assert(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]); @@ -1467,13 +1465,13 @@ void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, vextendbd(sign, dst, src, 1); vpmovzxbd(vtmp, shift, 1); varshiftd(opcode, dst, dst, vtmp, 1); - vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, scratch); + vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, noreg); vextracti128_high(vtmp, dst); vpackusdw(dst, dst, vtmp, 0); } // Variable shift src by shift using vtmp and scratch as TEMPs giving byte result in dst -void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) { +void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp) { assert(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]); @@ -1482,7 +1480,7 @@ void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, vextendbw(sign, dst, src, ext_vector_len); vpmovzxbw(vtmp, shift, ext_vector_len); varshiftw(opcode, dst, dst, vtmp, ext_vector_len); - vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, scratch); + vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, noreg); if (vector_len == 0) { vextracti128_high(vtmp, dst); vpackuswb(dst, dst, vtmp, vector_len); @@ -1627,12 +1625,11 @@ void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int v } } -void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, - Register tmp, bool novlbwdq, int vlen_enc) { +void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc) { if (novlbwdq) { vpmovsxbd(xtmp, src, vlen_enc); evpcmpd(dst, k0, xtmp, ExternalAddress(StubRoutines::x86::vector_int_mask_cmp_bits()), - Assembler::eq, true, vlen_enc, tmp); + Assembler::eq, true, vlen_enc, noreg); } else { vpxor(xtmp, xtmp, xtmp, vlen_enc); vpsubb(xtmp, xtmp, src, vlen_enc); @@ -1692,19 +1689,19 @@ void C2_MacroAssembler::load_constant_vector(BasicType bt, XMMRegister dst, Inte } } -void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) { +void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes) { ExternalAddress addr(StubRoutines::x86::vector_iota_indices()); if (vlen_in_bytes <= 4) { movdl(dst, addr); } else if (vlen_in_bytes == 8) { movq(dst, addr); } else if (vlen_in_bytes == 16) { - movdqu(dst, addr, scratch); + movdqu(dst, addr, noreg); } else if (vlen_in_bytes == 32) { - vmovdqu(dst, addr, scratch); + vmovdqu(dst, addr, noreg); } else { assert(vlen_in_bytes == 64, "%d", vlen_in_bytes); - evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch); + evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, noreg); } } @@ -2336,7 +2333,7 @@ void C2_MacroAssembler::get_elem(BasicType typ, Register dst, XMMRegister src, i } } -void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp, XMMRegister vtmp) { +void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp) { int esize = type2aelembytes(typ); int elem_per_lane = 16/esize; int eindex = elemindex % elem_per_lane; @@ -2365,12 +2362,11 @@ void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src // Zero upper bits if (typ == T_FLOAT) { if (UseAVX == 0) { - assert((vtmp != xnoreg) && (tmp != noreg), "required."); - movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), tmp); + assert(vtmp != xnoreg, "required."); + movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), noreg); pand(dst, vtmp); } else { - assert((tmp != noreg), "required."); - vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, tmp); + vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, noreg); } } } @@ -2399,23 +2395,25 @@ void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask } } -void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) { +void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register rscratch) { + assert(rscratch != noreg || always_reachable(adr), "missing"); + switch(typ) { case T_BOOLEAN: case T_BYTE: - evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch); + evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, rscratch); break; case T_CHAR: case T_SHORT: - evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch); + evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, rscratch); break; case T_INT: case T_FLOAT: - evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch); + evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, rscratch); break; case T_LONG: case T_DOUBLE: - evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch); + evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, rscratch); break; default: assert(false,"Should not reach here."); @@ -4364,7 +4362,7 @@ void C2_MacroAssembler::vector_cast_float_special_cases_avx(XMMRegister dst, XMM Register scratch, AddressLiteral float_sign_flip, int vec_enc) { Label done; - vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc); + vmovdqu(xtmp1, float_sign_flip, vec_enc, scratch); vpcmpeqd(xtmp2, dst, xtmp1, vec_enc); vptest(xtmp2, xtmp2, vec_enc); jccb(Assembler::equal, done); @@ -4969,7 +4967,7 @@ void C2_MacroAssembler::vector_popcount_byte(XMMRegister dst, XMMRegister src, X vpsrlw(dst, src, 4, vec_enc); vpand(dst, dst, xtmp1, vec_enc); vpand(xtmp1, src, xtmp1, vec_enc); - vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), rtmp, vec_enc); + vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), vec_enc, noreg); vpshufb(xtmp1, xtmp2, xtmp1, vec_enc); vpshufb(dst, xtmp2, dst, vec_enc); vpaddb(dst, dst, xtmp1, vec_enc); @@ -5074,7 +5072,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg if (VM_Version::supports_avx512vlbw()) { // Get the reverse bit sequence of lower nibble of each byte. - vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), rtmp, vec_enc); + vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), vec_enc, noreg); vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc); vpandq(dst, xtmp2, src, vec_enc); vpshufb(dst, xtmp1, dst, vec_enc); @@ -5088,7 +5086,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg // Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and // right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte. vporq(xtmp2, dst, xtmp2, vec_enc); - vector_reverse_byte(bt, dst, xtmp2, rtmp, vec_enc); + vector_reverse_byte(bt, dst, xtmp2, vec_enc); } else if(vec_enc == Assembler::AVX_512bit) { // Shift based bit reversal. @@ -5107,7 +5105,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg evmovdqul(xtmp1, k0, dst, true, vec_enc); vector_reverse_byte64(bt, dst, xtmp1, xtmp1, xtmp2, rtmp, vec_enc); } else { - vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), rtmp, vec_enc); + vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), vec_enc, rtmp); vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc); // Get the reverse bit sequence of lower nibble of each byte. @@ -5123,7 +5121,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg // Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and // right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte. vpor(xtmp2, dst, xtmp2, vec_enc); - vector_reverse_byte(bt, dst, xtmp2, rtmp, vec_enc); + vector_reverse_byte(bt, dst, xtmp2, vec_enc); } } @@ -5134,7 +5132,7 @@ void C2_MacroAssembler::vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, X assert(VM_Version::supports_gfni(), ""); vpbroadcastq(xtmp, mask, vec_enc, rtmp); vgf2p8affineqb(xtmp, src, xtmp, 0, vec_enc); - vector_reverse_byte(bt, dst, xtmp, rtmp, vec_enc); + vector_reverse_byte(bt, dst, xtmp, vec_enc); } void C2_MacroAssembler::vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src, @@ -5177,7 +5175,7 @@ void C2_MacroAssembler::vector_reverse_byte64(BasicType bt, XMMRegister dst, XMM } } -void C2_MacroAssembler::vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, Register rtmp, int vec_enc) { +void C2_MacroAssembler::vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc) { if (bt == T_BYTE) { if (VM_Version::supports_avx512vl() || vec_enc == Assembler::AVX_512bit) { evmovdquq(dst, k0, src, true, vec_enc); @@ -5190,14 +5188,14 @@ void C2_MacroAssembler::vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRe // pre-computed shuffle indices. switch(bt) { case T_LONG: - vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_long()), rtmp, vec_enc); + vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_long()), vec_enc, noreg); break; case T_INT: - vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_int()), rtmp, vec_enc); + vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_int()), vec_enc, noreg); break; case T_CHAR: case T_SHORT: - vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_short()), rtmp, vec_enc); + vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_short()), vec_enc, noreg); break; default: fatal("Unsupported type %s", type2name(bt)); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 6a6dd060aca..5628429158a 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -70,10 +70,10 @@ public: #endif // Generic instructions support for use in .ad files C2 code generation - void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr); - void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); - void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr); - void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); + void vabsnegd(int opcode, XMMRegister dst, XMMRegister src); + void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len); + void vabsnegf(int opcode, XMMRegister dst, XMMRegister src); + void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len); void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister tmp = xnoreg); @@ -90,9 +90,7 @@ public: KRegister ktmp, XMMRegister atmp, XMMRegister btmp, int vlen_enc); - void signum_fp(int opcode, XMMRegister dst, - XMMRegister zero, XMMRegister one, - Register scratch); + void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one); void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask, bool merge, BasicType bt, int vec_enc); @@ -121,8 +119,8 @@ public: void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg); - void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch); - void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch); + void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); + void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); void insert(BasicType typ, XMMRegister dst, Register val, int idx); void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx); @@ -137,7 +135,7 @@ public: void extract(BasicType typ, Register dst, XMMRegister src, int idx); XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex); - void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg); + void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg); // vector test void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, @@ -150,17 +148,17 @@ public: #endif // blend - void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1); + void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register rscratch = rscratch1); void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); - void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, Register tmp, bool novlbwdq, int vlen_enc); + void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc); void load_vector(XMMRegister dst, Address src, int vlen_in_bytes); void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = rscratch1); void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen); - void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes); + void load_iota_indices(XMMRegister dst, int vlen_in_bytes); // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. @@ -390,7 +388,7 @@ public: void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp, AddressLiteral mask, Register rtmp, int vec_enc); - void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, Register rtmp, int vec_enc); + void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc); void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, Register rtmp, int vec_enc); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 3ce1a9cd7ec..285ee857211 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -310,9 +310,7 @@ void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } -void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { - // scratch register is not used, - // it is defined to match parameters of 64-bit version of this method. +void MacroAssembler::movptr(Register dst, AddressLiteral src) { if (src.is_lval()) { mov_literal32(dst, (intptr_t)src.target(), src.rspec()); } else { @@ -662,15 +660,15 @@ void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { movq(dst, rscratch1); } -void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { +void MacroAssembler::movptr(Register dst, AddressLiteral src) { if (src.is_lval()) { mov_literal64(dst, (intptr_t)src.target(), src.rspec()); } else { if (reachable(src)) { movq(dst, as_Address(src)); } else { - lea(scratch, src); - movq(dst, Address(scratch, 0)); + lea(dst, src); + movq(dst, Address(dst, 0)); } } } @@ -2541,61 +2539,67 @@ void MacroAssembler::movptr(Address dst, Register src) { } void MacroAssembler::movdqu(Address dst, XMMRegister src) { - assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); - Assembler::movdqu(dst, src); + assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + Assembler::movdqu(dst, src); } void MacroAssembler::movdqu(XMMRegister dst, Address src) { - assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); - Assembler::movdqu(dst, src); + assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + Assembler::movdqu(dst, src); } void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { - assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); - Assembler::movdqu(dst, src); + assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + Assembler::movdqu(dst, src); } -void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) { +void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + if (reachable(src)) { movdqu(dst, as_Address(src)); } else { - lea(scratchReg, src); - movdqu(dst, Address(scratchReg, 0)); + lea(rscratch, src); + movdqu(dst, Address(rscratch, 0)); } } void MacroAssembler::vmovdqu(Address dst, XMMRegister src) { - assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); - Assembler::vmovdqu(dst, src); + assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + Assembler::vmovdqu(dst, src); } void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { - assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); - Assembler::vmovdqu(dst, src); + assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + Assembler::vmovdqu(dst, src); } void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { - assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); - Assembler::vmovdqu(dst, src); + assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + Assembler::vmovdqu(dst, src); } -void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) { +void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + if (reachable(src)) { vmovdqu(dst, as_Address(src)); } else { - lea(scratch_reg, src); - vmovdqu(dst, Address(scratch_reg, 0)); + lea(rscratch, src); + vmovdqu(dst, Address(rscratch, 0)); } } -void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len) { +void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + if (vector_len == AVX_512bit) { - evmovdquq(dst, src, AVX_512bit, scratch_reg); + evmovdquq(dst, src, AVX_512bit, rscratch); } else if (vector_len == AVX_256bit) { - vmovdqu(dst, src, scratch_reg); + vmovdqu(dst, src, rscratch); } else { - movdqu(dst, src, scratch_reg); + movdqu(dst, src, rscratch); } } @@ -2653,12 +2657,14 @@ void MacroAssembler::kmovql(KRegister dst, AddressLiteral src, Register scratch_ } } -void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) { +void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + if (reachable(src)) { kmovwl(dst, as_Address(src)); } else { - lea(scratch_reg, src); - kmovwl(dst, Address(scratch_reg, 0)); + lea(rscratch, src); + kmovwl(dst, Address(rscratch, 0)); } } @@ -2682,13 +2688,14 @@ void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral s } } -void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, - int vector_len, Register scratch_reg) { +void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + if (reachable(src)) { Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len); } else { - lea(scratch_reg, src); - Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len); + lea(rscratch, src); + Assembler::evmovdqul(dst, mask, Address(rscratch, 0), merge, vector_len); } } @@ -3145,12 +3152,14 @@ void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { } } -void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg) { +void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + if (reachable(src)) { Assembler::roundsd(dst, as_Address(src), rmode); } else { - lea(scratch_reg, src); - Assembler::roundsd(dst, Address(scratch_reg, 0), rmode); + lea(rscratch, src); + Assembler::roundsd(dst, Address(rscratch, 0), rmode); } } @@ -3181,14 +3190,16 @@ void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { } } -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + // Used in sign-bit flipping with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::xorpd(dst, as_Address(src)); } else { - lea(scratch_reg, src); - Assembler::xorpd(dst, Address(scratch_reg, 0)); + lea(rscratch, src); + Assembler::xorpd(dst, Address(rscratch, 0)); } } @@ -3209,14 +3220,16 @@ void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) { } } -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + // Used in sign-bit flipping with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::xorps(dst, as_Address(src)); } else { - lea(scratch_reg, src); - Assembler::xorps(dst, Address(scratch_reg, 0)); + lea(rscratch, src); + Assembler::xorps(dst, Address(rscratch, 0)); } } @@ -3254,6 +3267,8 @@ void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) { assert(UseAVX > 0, "requires some form of AVX"); + assert(rscratch != noreg || always_reachable(src), "missing"); + if (reachable(src)) { Assembler::vpaddb(dst, nds, as_Address(src), vector_len); } else { @@ -3304,12 +3319,14 @@ void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int v Assembler::vpaddw(dst, nds, src, vector_len); } -void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { +void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + if (reachable(src)) { Assembler::vpand(dst, nds, as_Address(src), vector_len); } else { - lea(scratch_reg, src); - Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len); + lea(rscratch, src); + Assembler::vpand(dst, nds, Address(rscratch, 0), vector_len); } } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 08c53d28e89..e76a242b73e 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1162,17 +1162,17 @@ public: void divss(XMMRegister dst, AddressLiteral src); // Move Unaligned Double Quadword - void movdqu(Address dst, XMMRegister src); - void movdqu(XMMRegister dst, Address src); - void movdqu(XMMRegister dst, XMMRegister src); - void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1); + void movdqu(Address dst, XMMRegister src); + void movdqu(XMMRegister dst, XMMRegister src); + void movdqu(XMMRegister dst, Address src); + void movdqu(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1); - void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); } - void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); } - void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); } - void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); - void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); } - void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); } + void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); } + void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); } + void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); } + void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); } + void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); } + void kmovwl(KRegister dst, AddressLiteral src, Register rscratch = rscratch1); void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); } void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); } @@ -1195,18 +1195,19 @@ public: void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1); // AVX Unaligned forms - void vmovdqu(Address dst, XMMRegister src); - void vmovdqu(XMMRegister dst, Address src); - void vmovdqu(XMMRegister dst, XMMRegister src); - void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); - void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len); + void vmovdqu(Address dst, XMMRegister src); + void vmovdqu(XMMRegister dst, Address src); + void vmovdqu(XMMRegister dst, XMMRegister src); + void vmovdqu(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1); + void vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1); // AVX512 Unaligned - void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len); - void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len); + void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len); + void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len); void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); } - void evmovdqub(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); } + void evmovdqub(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); } + void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { if (dst->encoding() != src->encoding() || mask != k0) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); @@ -1240,9 +1241,9 @@ public: Assembler::evmovdqul(dst, mask, src, merge, vector_len); } } - void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } - void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } - void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); + void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } + void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } + void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch); void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { if (dst->encoding() != src->encoding()) { @@ -1311,9 +1312,9 @@ public: void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, AddressLiteral src); - void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } - void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } - void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg); + void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } + void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } + void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register rscratch); void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } @@ -1336,14 +1337,14 @@ public: void ucomisd(XMMRegister dst, AddressLiteral src); // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values - void xorpd(XMMRegister dst, XMMRegister src); - void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } - void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); + void xorpd(XMMRegister dst, XMMRegister src); + void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } + void xorpd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1); // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values - void xorps(XMMRegister dst, XMMRegister src); - void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } - void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); + void xorps(XMMRegister dst, XMMRegister src); + void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } + void xorps(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1); // Shuffle Bytes void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } @@ -1362,8 +1363,8 @@ public: void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); - void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); - void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch); void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -1373,9 +1374,9 @@ public: void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg); - void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } - void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } - void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); + void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } + void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } + void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = rscratch1); using Assembler::vpbroadcastd; void vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1); @@ -1845,23 +1846,14 @@ public: void mov_metadata(Register dst, Metadata* obj); void mov_metadata(Address dst, Metadata* obj); - void movptr(ArrayAddress dst, Register src); - // can this do an lea? - void movptr(Register dst, ArrayAddress src); - - void movptr(Register dst, Address src); - -#ifdef _LP64 - void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1); -#else - void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit -#endif - - void movptr(Register dst, intptr_t src); - void movptr(Register dst, Register src); - void movptr(Address dst, intptr_t src); - - void movptr(Address dst, Register src); + void movptr(Register dst, Register src); + void movptr(Register dst, Address src); + void movptr(Register dst, AddressLiteral src); + void movptr(Register dst, ArrayAddress src); + void movptr(Register dst, intptr_t src); + void movptr(Address dst, Register src); + void movptr(Address dst, intptr_t src); + void movptr(ArrayAddress dst, Register src); void movptr(Register dst, RegisterOrConstant src) { if (src.is_constant()) movptr(dst, src.as_constant()); diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 3c2eee1bf88..bdbbd574356 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -3742,40 +3742,39 @@ instruct reinterpret(vec dst) %{ ins_pipe( pipe_slow ); %} -instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ +instruct reinterpret_expand(vec dst, vec src) %{ predicate(UseAVX == 0 && (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst match(Set dst (VectorReinterpret src)); ins_cost(125); - effect(TEMP dst, TEMP scratch); - format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} + effect(TEMP dst); + format %{ "vector_reinterpret_expand $dst,$src" %} ins_encode %{ assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); if (src_vlen_in_bytes == 4) { - __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); } else { assert(src_vlen_in_bytes == 8, ""); - __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); } __ pand($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ +instruct vreinterpret_expand4(legVec dst, vec src) %{ predicate(UseAVX > 0 && !n->bottom_type()->isa_vectmask() && (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst match(Set dst (VectorReinterpret src)); ins_cost(125); - effect(TEMP scratch); - format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} + format %{ "vector_reinterpret_expand $dst,$src" %} ins_encode %{ - __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register); + __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); %} ins_pipe( pipe_slow ); %} @@ -3843,14 +3842,13 @@ instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ ins_pipe(pipe_slow); %} -instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ +instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ match(Set dst (RoundDoubleMode con rmode)); - effect(TEMP scratch_reg); format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ assert(UseSSE >= 4, "required"); - __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); + __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); %} ins_pipe(pipe_slow); %} @@ -4011,9 +4009,9 @@ instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE if (vlen_enc == Assembler::AVX_128bit) { - __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); + __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); } else { - __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); + __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); } __ lea($tmp$$Register, $mem$$Address); __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); @@ -4034,7 +4032,7 @@ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE - __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); + __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); __ lea($tmp$$Register, $mem$$Address); __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); %} @@ -4075,7 +4073,7 @@ instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE - __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); + __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); __ lea($tmp$$Register, $mem$$Address); __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); %} @@ -5613,28 +5611,28 @@ instruct vsubD_mem(vec dst, vec src, memory mem) %{ // --------------------------------- MUL -------------------------------------- // Byte vector mul -instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ +instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp) %{ predicate(Matcher::vector_length(n) == 4 || Matcher::vector_length(n) == 8); match(Set dst (MulVB src1 src2)); - effect(TEMP dst, TEMP tmp, TEMP scratch); + effect(TEMP dst, TEMP tmp); format %{"vector_mulB $dst,$src1,$src2" %} ins_encode %{ assert(UseSSE > 3, "required"); __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); - __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); __ pand($dst$$XMMRegister, $tmp$$XMMRegister); __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ +instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2) %{ predicate(Matcher::vector_length(n) == 16 && UseAVX <= 1); match(Set dst (MulVB src1 src2)); - effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + effect(TEMP dst, TEMP tmp1, TEMP tmp2); format %{"vector_mulB $dst,$src1,$src2" %} ins_encode %{ assert(UseSSE > 3, "required"); @@ -5646,7 +5644,7 @@ instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scrat __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); - __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); @@ -5654,17 +5652,17 @@ instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scrat ins_pipe( pipe_slow ); %} -instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ +instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp) %{ predicate(Matcher::vector_length(n) == 16 && UseAVX > 1); match(Set dst (MulVB src1 src2)); - effect(TEMP dst, TEMP tmp, TEMP scratch); + effect(TEMP dst, TEMP tmp); format %{"vector_mulB $dst,$src1,$src2" %} ins_encode %{ int vlen_enc = Assembler::AVX_256bit; __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); - __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5672,10 +5670,10 @@ instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ ins_pipe( pipe_slow ); %} -instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ +instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2) %{ predicate(Matcher::vector_length(n) == 32); match(Set dst (MulVB src1 src2)); - effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + effect(TEMP dst, TEMP tmp1, TEMP tmp2); format %{"vector_mulB $dst,$src1,$src2" %} ins_encode %{ assert(UseAVX > 1, "required"); @@ -5688,7 +5686,7 @@ instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); - __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); @@ -5698,10 +5696,10 @@ instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI ins_pipe( pipe_slow ); %} -instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ +instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2) %{ predicate(Matcher::vector_length(n) == 64); match(Set dst (MulVB src1 src2)); - effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + effect(TEMP dst, TEMP tmp1, TEMP tmp2); format %{"vector_mulB $dst,$src1,$src2\n\t" %} ins_encode %{ assert(UseAVX > 2, "required"); @@ -5714,12 +5712,12 @@ instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); - __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); - __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); + __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); %} ins_pipe( pipe_slow ); @@ -6183,24 +6181,24 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm // --------------------------------- Signum/CopySign --------------------------- -instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{ +instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ match(Set dst (SignumF dst (Binary zero one))); - effect(TEMP scratch, KILL cr); - format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %} + effect(KILL cr); + format %{ "signumF $dst, $dst" %} ins_encode %{ int opcode = this->ideal_Opcode(); - __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); + __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{ +instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ match(Set dst (SignumD dst (Binary zero one))); - effect(TEMP scratch, KILL cr); - format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %} + effect(KILL cr); + format %{ "signumD $dst, $dst" %} ins_encode %{ int opcode = this->ideal_Opcode(); - __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); + __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -6393,12 +6391,12 @@ instruct vshiftcnt(vec dst, rRegI cnt) %{ %} // Byte vector shift -instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ +instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); + effect(TEMP dst, USE src, USE shift, TEMP tmp); format %{"vector_byte_shift $dst,$src,$shift" %} ins_encode %{ assert(UseSSE > 3, "required"); @@ -6406,20 +6404,20 @@ instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ bool sign = (opcode != Op_URShiftVB); __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); - __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); __ pand($dst$$XMMRegister, $tmp$$XMMRegister); __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ +instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && UseAVX <= 1); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); + effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); format %{"vector_byte_shift $dst,$src,$shift" %} ins_encode %{ assert(UseSSE > 3, "required"); @@ -6430,7 +6428,7 @@ instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratc __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); - __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); @@ -6438,13 +6436,13 @@ instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratc ins_pipe( pipe_slow ); %} -instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ +instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && UseAVX > 1); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, TEMP tmp, TEMP scratch); + effect(TEMP dst, TEMP tmp); format %{"vector_byte_shift $dst,$src,$shift" %} ins_encode %{ int opcode = this->ideal_Opcode(); @@ -6452,19 +6450,19 @@ instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ int vlen_enc = Assembler::AVX_256bit; __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); - __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); + __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} -instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ +instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, TEMP tmp, TEMP scratch); + effect(TEMP dst, TEMP tmp); format %{"vector_byte_shift $dst,$src,$shift" %} ins_encode %{ assert(UseAVX > 1, "required"); @@ -6476,20 +6474,20 @@ instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); - __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); - __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); + __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ +instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); match(Set dst ( LShiftVB src shift)); match(Set dst (RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + effect(TEMP dst, TEMP tmp1, TEMP tmp2); format %{"vector_byte_shift $dst,$src,$shift" %} ins_encode %{ assert(UseAVX > 2, "required"); @@ -6501,12 +6499,12 @@ instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI sc __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); - __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); - __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); + __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); %} ins_pipe( pipe_slow ); @@ -6643,10 +6641,10 @@ instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ // -------------------ArithmeticRightShift ----------------------------------- // Long vector arithmetic right shift -instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ +instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); match(Set dst (RShiftVL src shift)); - effect(TEMP dst, TEMP tmp, TEMP scratch); + effect(TEMP dst, TEMP tmp); format %{ "vshiftq $dst,$src,$shift" %} ins_encode %{ uint vlen = Matcher::vector_length(this); @@ -6654,7 +6652,7 @@ instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) assert(UseSSE >= 2, "required"); __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); - __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); + __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); @@ -6663,7 +6661,7 @@ instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) assert(UseAVX > 1, "required"); int vlen_enc = Assembler::AVX_256bit; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); - __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); @@ -6685,47 +6683,47 @@ instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ // ------------------- Variable Shift ----------------------------- // Byte variable shift -instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ +instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ predicate(Matcher::vector_length(n) <= 8 && n->as_ShiftV()->is_var_shift() && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, TEMP vtmp, TEMP scratch); - format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp); + format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} ins_encode %{ assert(UseAVX >= 2, "required"); int opcode = this->ideal_Opcode(); int vlen_enc = Assembler::AVX_128bit; - __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); + __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} -instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ +instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ predicate(Matcher::vector_length(n) == 16 && n->as_ShiftV()->is_var_shift() && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); - format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); + format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ assert(UseAVX >= 2, "required"); int opcode = this->ideal_Opcode(); int vlen_enc = Assembler::AVX_128bit; // Shift lower half and get word result in dst - __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); + __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); // Shift upper half and get word result in vtmp1 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); - __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); + __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); // Merge and down convert the two word results to byte in dst __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); @@ -6733,34 +6731,34 @@ instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, r ins_pipe( pipe_slow ); %} -instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ +instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ predicate(Matcher::vector_length(n) == 32 && n->as_ShiftV()->is_var_shift() && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch); - format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); + format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} ins_encode %{ assert(UseAVX >= 2, "required"); int opcode = this->ideal_Opcode(); int vlen_enc = Assembler::AVX_128bit; // Process lower 128 bits and get result in dst - __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); + __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); - __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); + __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); // Process higher 128 bits and get result in vtmp3 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); - __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register); + __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); - __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); + __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); // Merge the two results in dst @@ -6769,57 +6767,57 @@ instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, v ins_pipe( pipe_slow ); %} -instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ +instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ predicate(Matcher::vector_length(n) <= 32 && n->as_ShiftV()->is_var_shift() && VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, TEMP vtmp, TEMP scratch); - format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp); + format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} ins_encode %{ assert(UseAVX > 2, "required"); int opcode = this->ideal_Opcode(); int vlen_enc = vector_length_encoding(this); - __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); + __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ +instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ predicate(Matcher::vector_length(n) == 64 && n->as_ShiftV()->is_var_shift() && VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); - effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); - format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); + format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ assert(UseAVX > 2, "required"); int opcode = this->ideal_Opcode(); int vlen_enc = Assembler::AVX_256bit; - __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); + __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); - __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); + __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} // Short variable shift -instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ +instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ predicate(Matcher::vector_length(n) <= 8 && n->as_ShiftV()->is_var_shift() && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVS src shift)); match(Set dst ( RShiftVS src shift)); match(Set dst (URShiftVS src shift)); - effect(TEMP dst, TEMP vtmp, TEMP scratch); + effect(TEMP dst, TEMP vtmp); format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} ins_encode %{ assert(UseAVX >= 2, "required"); @@ -6830,21 +6828,21 @@ instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); - __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} -instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ +instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ predicate(Matcher::vector_length(n) == 16 && n->as_ShiftV()->is_var_shift() && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVS src shift)); match(Set dst ( RShiftVS src shift)); match(Set dst (URShiftVS src shift)); - effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); + effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} ins_encode %{ assert(UseAVX >= 2, "required"); @@ -6856,7 +6854,7 @@ instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, r __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); - __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); + __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); // Shift upper half, with result in dst using vtmp1 as TEMP __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); @@ -6864,7 +6862,7 @@ instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, r __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); - __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); // Merge lower and upper half result into dst __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); @@ -7076,34 +7074,33 @@ instruct vcastBtoX(vec dst, vec src) %{ ins_pipe( pipe_slow ); %} -instruct castStoX(vec dst, vec src, rRegP scratch) %{ +instruct castStoX(vec dst, vec src) %{ predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && Matcher::vector_length(n->in(1)) <= 8 && // src Matcher::vector_element_basic_type(n) == T_BYTE); - effect(TEMP scratch); match(Set dst (VectorCastS2X src)); - format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} + format %{ "vector_cast_s2x $dst,$src" %} ins_encode %{ assert(UseAVX > 0, "required"); - __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register); + __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} -instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ +instruct vcastStoX(vec dst, vec src, vec vtmp) %{ predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && Matcher::vector_length(n->in(1)) == 16 && // src Matcher::vector_element_basic_type(n) == T_BYTE); - effect(TEMP dst, TEMP vtmp, TEMP scratch); + effect(TEMP dst, TEMP vtmp); match(Set dst (VectorCastS2X src)); - format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} + format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} ins_encode %{ assert(UseAVX > 0, "required"); int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); - __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); + __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); %} @@ -7149,13 +7146,12 @@ instruct vcastStoX_evex(vec dst, vec src) %{ ins_pipe( pipe_slow ); %} -instruct castItoX(vec dst, vec src, rRegP scratch) %{ +instruct castItoX(vec dst, vec src) %{ predicate(UseAVX <= 2 && (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src match(Set dst (VectorCastI2X src)); - format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} - effect(TEMP scratch); + format %{ "vector_cast_i2x $dst,$src" %} ins_encode %{ assert(UseAVX > 0, "required"); @@ -7163,25 +7159,25 @@ instruct castItoX(vec dst, vec src, rRegP scratch) %{ int vlen_enc = vector_length_encoding(this, $src); if (to_elem_bt == T_BYTE) { - __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); + __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); } else { assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); - __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); + __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); } %} ins_pipe( pipe_slow ); %} -instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ +instruct vcastItoX(vec dst, vec src, vec vtmp) %{ predicate(UseAVX <= 2 && (Matcher::vector_length_in_bytes(n->in(1)) == 32) && (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src match(Set dst (VectorCastI2X src)); - format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} - effect(TEMP dst, TEMP vtmp, TEMP scratch); + format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} + effect(TEMP dst, TEMP vtmp); ins_encode %{ assert(UseAVX > 0, "required"); @@ -7189,13 +7185,13 @@ instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ int vlen_enc = vector_length_encoding(this, $src); if (to_elem_bt == T_BYTE) { - __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); + __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); } else { assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); - __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); + __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); } @@ -7243,12 +7239,11 @@ instruct vcastItoX_evex(vec dst, vec src) %{ ins_pipe( pipe_slow ); %} -instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ +instruct vcastLtoBS(vec dst, vec src) %{ predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && UseAVX <= 2); match(Set dst (VectorCastL2X src)); - effect(TEMP scratch); - format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %} + format %{ "vector_cast_l2x $dst,$src" %} ins_encode %{ assert(UseAVX > 0, "required"); @@ -7258,13 +7253,13 @@ instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ : ExternalAddress(vector_int_to_short_mask()); if (vlen <= 16) { __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); - __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); } else { assert(vlen <= 32, "required"); __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); - __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); } if (to_elem_bt == T_BYTE) { @@ -7346,60 +7341,60 @@ instruct vcastFtoD_reg(vec dst, vec src) %{ %} -instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{ +instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ // F2I conversion for < 64 byte vector using AVX instructions // AVX512 platforms that dont support avx512vl also use AVX instructions to support F2I predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) < 64 && Matcher::vector_element_basic_type(n) == T_INT); match(Set dst (VectorCastF2X src)); - effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr); - format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %} + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); + format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); __ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, - ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); + ExternalAddress(vector_float_signflip()), noreg, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ +instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && Matcher::vector_element_basic_type(n) == T_INT); match(Set dst (VectorCastF2X src)); - effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); - format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); + format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); __ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, - ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); + ExternalAddress(vector_float_signflip()), noreg, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ +instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ // F2X conversion for integral non T_INT target using AVX512 instructions // Platforms that dont support avx512vl can only support 64 byte vectors predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_INT); match(Set dst (VectorCastF2X src)); - effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); - format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); + format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} ins_encode %{ BasicType to_elem_bt = Matcher::vector_element_basic_type(this); if (to_elem_bt == T_LONG) { int vlen_enc = vector_length_encoding(this); __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, - ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc); + ExternalAddress(vector_double_signflip()), noreg, vlen_enc); } else { int vlen_enc = vector_length_encoding(this, $src); __ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, - ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); + ExternalAddress(vector_float_signflip()), noreg, vlen_enc); if (to_elem_bt == T_SHORT) { __ evpmovdw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); } else { @@ -7422,17 +7417,17 @@ instruct vcastDtoF_reg(vec dst, vec src) %{ ins_pipe( pipe_slow ); %} -instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ +instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ predicate(is_integral_type(Matcher::vector_element_basic_type(n))); match(Set dst (VectorCastD2X src)); - effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); - format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); + format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} ins_encode %{ int vlen_enc = vector_length_encoding(this, $src); BasicType to_elem_bt = Matcher::vector_element_basic_type(this); __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, - ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc); + ExternalAddress(vector_double_signflip()), noreg, vlen_enc); %} ins_pipe( pipe_slow ); %} @@ -7524,23 +7519,23 @@ instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ ins_pipe( pipe_slow ); %} -instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ +instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 n->bottom_type()->isa_vectmask() == NULL && is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); - effect(TEMP scratch, TEMP ktmp); - format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} + effect(TEMP ktmp); + format %{ "vector_compare $dst,$src1,$src2,$cond" %} ins_encode %{ int vlen_enc = Assembler::AVX_512bit; Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); KRegister mask = k0; // The comparison itself is not being masked. if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); - __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); + __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); } else { __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); - __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); + __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); } %} ins_pipe( pipe_slow ); @@ -7633,13 +7628,13 @@ instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ ins_pipe( pipe_slow ); %} -instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ +instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ predicate((n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); - effect(TEMP scratch, TEMP ktmp); - format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} + effect(TEMP ktmp); + format %{ "vector_compare $dst,$src1,$src2,$cond" %} ins_encode %{ assert(UseAVX > 2, "required"); @@ -7653,12 +7648,12 @@ instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktm switch (src1_elem_bt) { case T_INT: { __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); - __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); + __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); break; } case T_LONG: { __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); - __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); + __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); break; } default: assert(false, "%s", type2name(src1_elem_bt)); @@ -7775,30 +7770,30 @@ instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ %} #endif -instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ +instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ predicate(Matcher::vector_length(n->in(1)) <= 4); match(Set dst (ExtractF src idx)); - effect(TEMP dst, TEMP tmp, TEMP vtmp); - format %{ "extractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} + effect(TEMP dst, TEMP vtmp); + format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} ins_encode %{ assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); - __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); + __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ +instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || Matcher::vector_length(n->in(1)/*src*/) == 16); match(Set dst (ExtractF src idx)); - effect(TEMP tmp, TEMP vtmp); - format %{ "vextractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} + effect(TEMP vtmp); + format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} ins_encode %{ assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); - __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); + __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); %} ins_pipe( pipe_slow ); %} @@ -7876,29 +7871,28 @@ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ ins_pipe( pipe_slow ); %} -instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{ +instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ predicate(Matcher::vector_length_in_bytes(n) == 64 && n->in(2)->bottom_type()->isa_vectmask() == NULL); match(Set dst (VectorBlend (Binary src1 src2) mask)); - format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} - effect(TEMP scratch, TEMP ktmp); + format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} + effect(TEMP ktmp); ins_encode %{ int vlen_enc = Assembler::AVX_512bit; BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); + __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask, rRegP scratch) %{ +instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ predicate(n->in(2)->bottom_type()->isa_vectmask() && (!is_subword_type(Matcher::vector_element_basic_type(n)) || VM_Version::supports_avx512bw())); match(Set dst (VectorBlend (Binary src1 src2) mask)); - format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} - effect(TEMP scratch); + format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); BasicType elem_bt = Matcher::vector_element_basic_type(this); @@ -7974,55 +7968,52 @@ instruct vabsL_reg(vec dst, vec src) %{ // --------------------------------- ABSNEG -------------------------------------- -instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ +instruct vabsnegF(vec dst, vec src) %{ predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F match(Set dst (AbsVF src)); match(Set dst (NegVF src)); - effect(TEMP scratch); format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} ins_cost(150); ins_encode %{ int opcode = this->ideal_Opcode(); int vlen = Matcher::vector_length(this); if (vlen == 2) { - __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); + __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); } else { assert(vlen == 8 || vlen == 16, "required"); int vlen_enc = vector_length_encoding(this); - __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); + __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); } %} ins_pipe( pipe_slow ); %} -instruct vabsneg4F(vec dst, rRegI scratch) %{ +instruct vabsneg4F(vec dst) %{ predicate(Matcher::vector_length(n) == 4); match(Set dst (AbsVF dst)); match(Set dst (NegVF dst)); - effect(TEMP scratch); format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} ins_cost(150); ins_encode %{ int opcode = this->ideal_Opcode(); - __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); + __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ +instruct vabsnegD(vec dst, vec src) %{ match(Set dst (AbsVD src)); match(Set dst (NegVD src)); - effect(TEMP scratch); format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} ins_encode %{ int opcode = this->ideal_Opcode(); uint vlen = Matcher::vector_length(this); if (vlen == 2) { assert(UseSSE >= 2, "required"); - __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); + __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); } else { int vlen_enc = vector_length_encoding(this); - __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); + __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); } %} ins_pipe( pipe_slow ); @@ -8214,14 +8205,14 @@ instruct loadMask(legVec dst, legVec src) %{ ins_pipe( pipe_slow ); %} -instruct loadMask64(kReg dst, vec src, vec xtmp, rRegI tmp) %{ +instruct loadMask64(kReg dst, vec src, vec xtmp) %{ predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); match(Set dst (VectorLoadMask src)); - effect(TEMP xtmp, TEMP tmp); - format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp and $tmp as TEMP" %} + effect(TEMP xtmp); + format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} ins_encode %{ __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, - $tmp$$Register, true, Assembler::AVX_512bit); + true, Assembler::AVX_512bit); %} ins_pipe( pipe_slow ); %} @@ -8234,7 +8225,7 @@ instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ ins_encode %{ int vlen_enc = vector_length_encoding(in(1)); __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, - noreg, false, vlen_enc); + false, vlen_enc); %} ins_pipe( pipe_slow ); %} @@ -8374,15 +8365,15 @@ instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ ins_pipe( pipe_slow ); %} -instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size, rRegI tmp) %{ +instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); match(Set dst (VectorStoreMask mask size)); - effect(TEMP_DEF dst, TEMP tmp); + effect(TEMP_DEF dst); format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} ins_encode %{ assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), - false, Assembler::AVX_512bit, $tmp$$Register); + false, Assembler::AVX_512bit, noreg); __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); %} ins_pipe( pipe_slow ); @@ -8426,30 +8417,29 @@ instruct vmaskcast(vec dst) %{ //-------------------------------- Load Iota Indices ---------------------------------- -instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ +instruct loadIotaIndices(vec dst, immI_0 src) %{ predicate(Matcher::vector_element_basic_type(n) == T_BYTE); match(Set dst (VectorLoadConst src)); - effect(TEMP scratch); format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} ins_encode %{ int vlen_in_bytes = Matcher::vector_length_in_bytes(this); - __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); + __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes); %} ins_pipe( pipe_slow ); %} #ifdef _LP64 -instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp, rRegP scratch) %{ +instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ match(Set dst (PopulateIndex src1 src2)); - effect(TEMP dst, TEMP vtmp, TEMP scratch); - format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp and $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp); + format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} ins_encode %{ assert($src2$$constant == 1, "required"); int vlen = Matcher::vector_length(this); int vlen_enc = vector_length_encoding(this); BasicType elem_bt = Matcher::vector_element_basic_type(this); __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); - __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen); + __ load_iota_indices($dst$$XMMRegister, vlen); if (elem_bt != T_BYTE) { __ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); } @@ -8458,17 +8448,17 @@ instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp, rRegP s ins_pipe( pipe_slow ); %} -instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp, rRegP scratch) %{ +instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ match(Set dst (PopulateIndex src1 src2)); - effect(TEMP dst, TEMP vtmp, TEMP scratch); - format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp and $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp); + format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} ins_encode %{ assert($src2$$constant == 1, "required"); int vlen = Matcher::vector_length(this); int vlen_enc = vector_length_encoding(this); BasicType elem_bt = Matcher::vector_element_basic_type(this); __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); - __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen); + __ load_iota_indices($dst$$XMMRegister, vlen); if (elem_bt != T_BYTE) { __ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); } @@ -8503,12 +8493,12 @@ instruct rearrangeB(vec dst, vec shuffle) %{ ins_pipe( pipe_slow ); %} -instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ +instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ predicate(Matcher::vector_element_basic_type(n) == T_BYTE && Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); match(Set dst (VectorRearrange src shuffle)); - effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); - format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); + format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ assert(UseAVX >= 2, "required"); // Swap src into vtmp1 @@ -8518,7 +8508,7 @@ instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVe // Shuffle original src to get entries from self 128 bit lane __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); // Create a blend mask by setting high bits for entries coming from other lane in shuffle - __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); + __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); // Perform the blend __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); %} @@ -8555,12 +8545,12 @@ instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ // LoadShuffle/Rearrange for Short -instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ +instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ predicate(Matcher::vector_element_basic_type(n) == T_SHORT && Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS match(Set dst (VectorLoadShuffle src)); - effect(TEMP dst, TEMP vtmp, TEMP scratch); - format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp); + format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} ins_encode %{ // Create a byte shuffle mask from short shuffle mask // only byte shuffle instruction available on these platforms @@ -8577,7 +8567,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ __ por($dst$$XMMRegister, $vtmp$$XMMRegister); // Add one to get alternate byte index - __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); + __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); } else { assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); @@ -8591,7 +8581,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); // Add one to get alternate byte index - __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, $scratch$$Register); + __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); } %} ins_pipe( pipe_slow ); @@ -8609,12 +8599,12 @@ instruct rearrangeS(vec dst, vec shuffle) %{ ins_pipe( pipe_slow ); %} -instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ +instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ predicate(Matcher::vector_element_basic_type(n) == T_SHORT && Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); match(Set dst (VectorRearrange src shuffle)); - effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); - format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); + format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ assert(UseAVX >= 2, "required"); // Swap src into vtmp1 @@ -8624,7 +8614,7 @@ instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVe // Shuffle original src to get entries from self 128 bit lane __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); // Create a blend mask by setting high bits for entries coming from other lane in shuffle - __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); + __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); // Perform the blend __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); %} @@ -8663,12 +8653,12 @@ instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ // LoadShuffle/Rearrange for Integer and Float -instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ +instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && Matcher::vector_length(n) == 4 && UseAVX < 2); match(Set dst (VectorLoadShuffle src)); - effect(TEMP dst, TEMP vtmp, TEMP scratch); - format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp); + format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} ins_encode %{ assert(UseSSE >= 4, "required"); @@ -8687,7 +8677,7 @@ instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ __ por($vtmp$$XMMRegister, $dst$$XMMRegister); // Add 3,2,1,0 to get alternate byte index - __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -8734,12 +8724,12 @@ instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ // LoadShuffle/Rearrange for Long and Double -instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ +instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); match(Set dst (VectorLoadShuffle src)); - effect(TEMP dst, TEMP vtmp, TEMP scratch); - format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} + effect(TEMP dst, TEMP vtmp); + format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} ins_encode %{ assert(UseAVX >= 2, "required"); @@ -8756,7 +8746,7 @@ instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); // Add one to get alternate double word index - __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register); + __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); %} ins_pipe( pipe_slow ); %} @@ -9391,30 +9381,30 @@ instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ ins_pipe( pipe_slow ); %} -instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp, rRegI rtmp) %{ +instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ predicate(VM_Version::supports_gfni()); match(Set dst (ReverseV src)); - effect(TEMP dst, TEMP xtmp, TEMP rtmp); - format %{ "vector_reverse_bit_gfni $dst, $src!\t using $rtmp and $xtmp as TEMP" %} + effect(TEMP dst, TEMP xtmp); + format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} ins_encode %{ int vec_enc = vector_length_encoding(this); BasicType bt = Matcher::vector_element_basic_type(this); InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister, - addr, $rtmp$$Register, vec_enc); + addr, noreg, vec_enc); %} ins_pipe( pipe_slow ); %} -instruct vreverse_byte_reg(vec dst, vec src, rRegI rtmp) %{ +instruct vreverse_byte_reg(vec dst, vec src) %{ predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); match(Set dst (ReverseBytesV src)); - effect(TEMP dst, TEMP rtmp); - format %{ "vector_reverse_byte $dst, $src!\t using $rtmp as TEMP" %} + effect(TEMP dst); + format %{ "vector_reverse_byte $dst, $src" %} ins_encode %{ int vec_enc = vector_length_encoding(this); BasicType bt = Matcher::vector_element_basic_type(this); - __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, $rtmp$$Register, vec_enc); + __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); %} ins_pipe( pipe_slow ); %} @@ -10093,10 +10083,9 @@ instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ ins_pipe( pipe_slow ); %} -instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask, rRegP scratch) %{ +instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); - effect(TEMP scratch); - format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask\t! using $scratch as TEMP" %} + format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} ins_encode %{ assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); int vlen_enc = vector_length_encoding(this, $src1);