8329035: New Data Destination instructions support

Reviewed-by: kvn, sviswanathan, jbhateja
This commit is contained in:
Steve Dohrmann 2024-09-12 16:06:16 +00:00
parent 7f1dae12e5
commit ab9b72c50a
2 changed files with 1530 additions and 18 deletions

File diff suppressed because it is too large Load Diff

@ -789,14 +789,26 @@ private:
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_v, bool evex_r, bool evex_b,
bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool no_flags = false);
void evex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
InstructionAttr *attributes, bool no_flags = false);
void evex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
InstructionAttr *attributes, bool no_flags = false);
void vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
InstructionAttr *attributes);
InstructionAttr *attributes, bool nds_is_ndd = false, bool no_flags = false);
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc,
InstructionAttr *attributes, bool src_is_gpr = false);
InstructionAttr *attributes, bool src_is_gpr = false, bool nds_is_ndd = false, bool no_flags = false);
int evex_prefix_and_encode_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
InstructionAttr *attributes, bool no_flags = false);
int evex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
InstructionAttr *attributes, bool no_flags = false);
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
@ -941,13 +953,20 @@ private:
// the product flag UseIncDec value.
void decl(Register dst);
void edecl(Register dst, Register src, bool no_flags);
void decl(Address dst);
void edecl(Register dst, Address src, bool no_flags);
void decq(Address dst);
void edecq(Register dst, Address src, bool no_flags);
void incl(Register dst);
void eincl(Register dst, Register src, bool no_flags);
void incl(Address dst);
void eincl(Register dst, Address src, bool no_flags);
void incq(Register dst);
void eincq(Register dst, Register src, bool no_flags);
void incq(Address dst);
void eincq(Register dst, Address src, bool no_flags);
// New cpus require use of movsd and movss to avoid partial register stall
// when loading from memory. But for old Opteron use movlpd instead of movsd.
@ -1031,6 +1050,7 @@ private:
#endif
void vzeroupper_uncached();
void decq(Register dst);
void edecq(Register dst, Register src, bool no_flags);
void pusha();
void popa();
@ -1072,23 +1092,35 @@ private:
void addw(Address dst, Register src);
void addl(Address dst, int32_t imm32);
void eaddl(Register dst, Address src, int32_t imm32, bool no_flags);
void addl(Address dst, Register src);
void eaddl(Register dst, Address src1, Register src2, bool no_flags);
void addl(Register dst, int32_t imm32);
void eaddl(Register dst, Register src, int32_t imm32, bool no_flags);
void addl(Register dst, Address src);
void eaddl(Register dst, Register src1, Address src2, bool no_flags);
void addl(Register dst, Register src);
void eaddl(Register dst, Register src1, Register src2, bool no_flags);
void addq(Address dst, int32_t imm32);
void eaddq(Register dst, Address src, int32_t imm32, bool no_flags);
void addq(Address dst, Register src);
void eaddq(Register dst, Address src1, Register src2, bool no_flags);
void addq(Register dst, int32_t imm32);
void eaddq(Register dst, Register src, int32_t imm32, bool no_flags);
void addq(Register dst, Address src);
void eaddq(Register dst, Register src1, Address src2, bool no_flags);
void addq(Register dst, Register src);
void eaddq(Register dst, Register src1, Register src2, bool no_flags);
#ifdef _LP64
//Add Unsigned Integers with Carry Flag
void adcxq(Register dst, Register src);
void eadcxq(Register dst, Register src1, Register src2);
//Add Unsigned Integers with Overflow Flag
void adoxq(Register dst, Register src);
void eadoxq(Register dst, Register src1, Register src2);
#endif
void addr_nop_4();
@ -1122,16 +1154,25 @@ private:
void andb(Address dst, Register src);
void andl(Address dst, int32_t imm32);
void eandl(Register dst, Address src, int32_t imm32, bool no_flags);
void andl(Register dst, int32_t imm32);
void eandl(Register dst, Register src, int32_t imm32, bool no_flags);
void andl(Register dst, Address src);
void eandl(Register dst, Register src1, Address src2, bool no_flags);
void andl(Register dst, Register src);
void eandl(Register dst, Register src1, Register src2, bool no_flags);
void andl(Address dst, Register src);
void andq(Address dst, int32_t imm32);
void eandq(Register dst, Address src, int32_t imm32, bool no_flags);
void andq(Register dst, int32_t imm32);
void eandq(Register dst, Register src, int32_t imm32, bool no_flags);
void andq(Register dst, Address src);
void eandq(Register dst, Register src1, Address src2, bool no_flags);
void andq(Register dst, Register src);
void eandq(Register dst, Register src1, Register src2, bool no_flags);
void andq(Address dst, Register src);
void eandq(Register dst, Address src1, Register src2, bool no_flags);
// BMI instructions
void andnl(Register dst, Register src1, Register src2);
@ -1182,10 +1223,14 @@ private:
void clwb(Address adr);
void cmovl(Condition cc, Register dst, Register src);
void ecmovl(Condition cc, Register dst, Register src1, Register src2);
void cmovl(Condition cc, Register dst, Address src);
void ecmovl(Condition cc, Register dst, Register src1, Address src2);
void cmovq(Condition cc, Register dst, Register src);
void ecmovq(Condition cc, Register dst, Register src1, Register src2);
void cmovq(Condition cc, Register dst, Address src);
void ecmovq(Condition cc, Register dst, Register src1, Address src2);
void cmpb(Address dst, int imm8);
@ -1488,25 +1533,41 @@ private:
void hlt();
void idivl(Register src);
void eidivl(Register src, bool no_flags);
void divl(Register src); // Unsigned division
void edivl(Register src, bool no_flags); // Unsigned division
#ifdef _LP64
void idivq(Register src);
void eidivq(Register src, bool no_flags);
void divq(Register src); // Unsigned division
void edivq(Register src, bool no_flags); // Unsigned division
#endif
void imull(Register src);
void eimull(Register src, bool no_flags);
void imull(Register dst, Register src);
void eimull(Register dst, Register src1, Register src2, bool no_flags);
void imull(Register dst, Register src, int value);
void eimull(Register dst, Register src, int value, bool no_flags);
void imull(Register dst, Address src, int value);
void eimull(Register dst, Address src, int value, bool no_flags);
void imull(Register dst, Address src);
void eimull(Register dst, Register src1, Address src2, bool no_flags);
#ifdef _LP64
void imulq(Register dst, Register src);
void eimulq(Register dst, Register src, bool no_flags);
void eimulq(Register dst, Register src1, Register src2, bool no_flags);
void imulq(Register dst, Register src, int value);
void eimulq(Register dst, Register src, int value, bool no_flags);
void imulq(Register dst, Address src, int value);
void eimulq(Register dst, Address src, int value, bool no_flags);
void imulq(Register dst, Address src);
void eimulq(Register dst, Address src, bool no_flags);
void eimulq(Register dst, Register src1, Address src2, bool no_flags);
void imulq(Register dst);
void eimulq(Register dst, bool no_flags);
#endif
// jcc is the generic conditional branch generator to run-
@ -1565,11 +1626,15 @@ private:
void size_prefix();
void lzcntl(Register dst, Register src);
void elzcntl(Register dst, Register src, bool no_flags);
void lzcntl(Register dst, Address src);
void elzcntl(Register dst, Address src, bool no_flags);
#ifdef _LP64
void lzcntq(Register dst, Register src);
void elzcntq(Register dst, Register src, bool no_flags);
void lzcntq(Register dst, Address src);
void elzcntq(Register dst, Address src, bool no_flags);
#endif
enum Membar_mask_bits {
@ -1785,11 +1850,15 @@ private:
// Unsigned multiply with RAX destination register
void mull(Address src);
void emull(Address src, bool no_flags);
void mull(Register src);
void emull(Register src, bool no_flags);
#ifdef _LP64
void mulq(Address src);
void emulq(Address src, bool no_flags);
void mulq(Register src);
void emulq(Register src, bool no_flags);
void mulxq(Register dst1, Register dst2, Register src);
#endif
@ -1802,19 +1871,25 @@ private:
void mulss(XMMRegister dst, XMMRegister src);
void negl(Register dst);
void enegl(Register dst, Register src, bool no_flags);
void negl(Address dst);
void enegl(Register dst, Address src, bool no_flags);
#ifdef _LP64
void negq(Register dst);
void enegq(Register dst, Register src, bool no_flags);
void negq(Address dst);
void enegq(Register dst, Address src, bool no_flags);
#endif
void nop(uint i = 1);
void notl(Register dst);
void enotl(Register dst, Register src);
#ifdef _LP64
void notq(Register dst);
void enotq(Register dst, Register src);
void btsq(Address dst, int imm8);
void btrq(Address dst, int imm8);
@ -1822,21 +1897,37 @@ private:
#endif
void btq(Register dst, Register src);
void orw(Register dst, Register src);
void eorw(Register dst, Register src1, Register src2, bool no_flags);
void orl(Address dst, int32_t imm32);
void eorl(Register dst, Address src, int32_t imm32, bool no_flags);
void orl(Register dst, int32_t imm32);
void eorl(Register dst, Register src, int32_t imm32, bool no_flags);
void orl(Register dst, Address src);
void eorl(Register dst, Register src1, Address src2, bool no_flags);
void orl(Register dst, Register src);
void eorl(Register dst, Register src1, Register src2, bool no_flags);
void orl(Address dst, Register src);
void eorl(Register dst, Address src1, Register src2, bool no_flags);
void orb(Address dst, int imm8);
void eorb(Register dst, Address src, int imm8, bool no_flags);
void orb(Address dst, Register src);
void eorb(Register dst, Address src1, Register src2, bool no_flags);
void orq(Address dst, int32_t imm32);
void eorq(Register dst, Address src, int32_t imm32, bool no_flags);
void orq(Address dst, Register src);
void eorq(Register dst, Address src1, Register src2, bool no_flags);
void orq(Register dst, int32_t imm32);
void eorq(Register dst, Register src, int32_t imm32, bool no_flags);
void orq_imm32(Register dst, int32_t imm32);
void eorq_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
void orq(Register dst, Address src);
void eorq(Register dst, Register src1, Address src2, bool no_flags);
void orq(Register dst, Register src);
void eorq(Register dst, Register src1, Register src2, bool no_flags);
// Pack with signed saturation
void packsswb(XMMRegister dst, XMMRegister src);
@ -2022,7 +2113,9 @@ private:
#endif
void popcntl(Register dst, Address src);
void epopcntl(Register dst, Address src, bool no_flags);
void popcntl(Register dst, Register src);
void epopcntl(Register dst, Register src, bool no_flags);
void evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
@ -2031,7 +2124,9 @@ private:
#ifdef _LP64
void popcntq(Register dst, Address src);
void epopcntq(Register dst, Address src, bool no_flags);
void popcntq(Register dst, Register src);
void epopcntq(Register dst, Register src, bool no_flags);
#endif
// Prefetches (SSE, SSE2, 3DNOW only)
@ -2131,10 +2226,13 @@ private:
void pushq(Address src);
void rcll(Register dst, int imm8);
void ercll(Register dst, Register src, int imm8);
void rclq(Register dst, int imm8);
void erclq(Register dst, Register src, int imm8);
void rcrq(Register dst, int imm8);
void ercrq(Register dst, Register src, int imm8);
void rcpps(XMMRegister dst, XMMRegister src);
@ -2145,18 +2243,26 @@ private:
void ret(int imm16);
void roll(Register dst);
void eroll(Register dst, Register src, bool no_flags);
void roll(Register dst, int imm8);
void eroll(Register dst, Register src, int imm8, bool no_flags);
void rorl(Register dst);
void erorl(Register dst, Register src, bool no_flags);
void rorl(Register dst, int imm8);
void erorl(Register dst, Register src, int imm8, bool no_flags);
#ifdef _LP64
void rolq(Register dst);
void erolq(Register dst, Register src, bool no_flags);
void rolq(Register dst, int imm8);
void erolq(Register dst, Register src, int imm8, bool no_flags);
void rorq(Register dst);
void erorq(Register dst, Register src, bool no_flags);
void rorq(Register dst, int imm8);
void erorq(Register dst, Register src, int imm8, bool no_flags);
void rorxl(Register dst, Register src, int imm8);
void rorxl(Register dst, Address src, int imm8);
void rorxq(Register dst, Register src, int imm8);
@ -2166,25 +2272,41 @@ private:
void sahf();
void sall(Register dst, int imm8);
void esall(Register dst, Register src, int imm8, bool no_flags);
void sall(Register dst);
void esall(Register dst, Register src, bool no_flags);
void sall(Address dst, int imm8);
void esall(Register dst, Address src, int imm8, bool no_flags);
void sall(Address dst);
void esall(Register dst, Address src, bool no_flags);
void sarl(Address dst, int imm8);
void esarl(Register dst, Address src, int imm8, bool no_flags);
void sarl(Address dst);
void esarl(Register dst, Address src, bool no_flags);
void sarl(Register dst, int imm8);
void esarl(Register dst, Register src, int imm8, bool no_flags);
void sarl(Register dst);
void esarl(Register dst, Register src, bool no_flags);
#ifdef _LP64
void salq(Register dst, int imm8);
void esalq(Register dst, Register src, int imm8, bool no_flags);
void salq(Register dst);
void esalq(Register dst, Register src, bool no_flags);
void salq(Address dst, int imm8);
void esalq(Register dst, Address src, int imm8, bool no_flags);
void salq(Address dst);
void esalq(Register dst, Address src, bool no_flags);
void sarq(Address dst, int imm8);
void esarq(Register dst, Address src, int imm8, bool no_flags);
void sarq(Address dst);
void esarq(Register dst, Address src, bool no_flags);
void sarq(Register dst, int imm8);
void esarq(Register dst, Register src, int imm8, bool no_flags);
void sarq(Register dst);
void esarq(Register dst, Register src, bool no_flags);
#endif
void sbbl(Address dst, int32_t imm32);
@ -2216,29 +2338,47 @@ private:
void sha256msg2(XMMRegister dst, XMMRegister src);
void shldl(Register dst, Register src);
void eshldl(Register dst, Register src1, Register src2, bool no_flags);
void shldl(Register dst, Register src, int8_t imm8);
void eshldl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
void shrdl(Register dst, Register src);
void eshrdl(Register dst, Register src1, Register src2, bool no_flags);
void shrdl(Register dst, Register src, int8_t imm8);
void eshrdl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
#ifdef _LP64
void shldq(Register dst, Register src, int8_t imm8);
void eshldq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
void shrdq(Register dst, Register src, int8_t imm8);
void eshrdq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
#endif
void shll(Register dst, int imm8);
void eshll(Register dst, Register src, int imm8, bool no_flags);
void shll(Register dst);
void eshll(Register dst, Register src, bool no_flags);
void shlq(Register dst, int imm8);
void eshlq(Register dst, Register src, int imm8, bool no_flags);
void shlq(Register dst);
void eshlq(Register dst, Register src, bool no_flags);
void shrl(Register dst, int imm8);
void eshrl(Register dst, Register src, int imm8, bool no_flags);
void shrl(Register dst);
void eshrl(Register dst, Register src, bool no_flags);
void shrl(Address dst);
void eshrl(Register dst, Address src, bool no_flags);
void shrl(Address dst, int imm8);
void eshrl(Register dst, Address src, int imm8, bool no_flags);
void shrq(Register dst, int imm8);
void eshrq(Register dst, Register src, int imm8, bool no_flags);
void shrq(Register dst);
void eshrq(Register dst, Register src, bool no_flags);
void shrq(Address dst);
void eshrq(Register dst, Address src, bool no_flags);
void shrq(Address dst, int imm8);
void eshrq(Register dst, Address src, int imm8, bool no_flags);
void smovl(); // QQQ generic?
@ -2258,20 +2398,32 @@ private:
void stmxcsr( Address dst );
void subl(Address dst, int32_t imm32);
void esubl(Register dst, Address src, int32_t imm32, bool no_flags);
void subl(Address dst, Register src);
void esubl(Register dst, Address src1, Register src2, bool no_flags);
void subl(Register dst, int32_t imm32);
void esubl(Register dst, Register src, int32_t imm32, bool no_flags);
void subl(Register dst, Address src);
void esubl(Register dst, Register src1, Address src2, bool no_flags);
void subl(Register dst, Register src);
void esubl(Register dst, Register src1, Register src2, bool no_flags);
void subq(Address dst, int32_t imm32);
void esubq(Register dst, Address src, int32_t imm32, bool no_flags);
void subq(Address dst, Register src);
void esubq(Register dst, Address src1, Register src2, bool no_flags);
void subq(Register dst, int32_t imm32);
void esubq(Register dst, Register src, int32_t imm32, bool no_flags);
void subq(Register dst, Address src);
void esubq(Register dst, Register src1, Address src2, bool no_flags);
void subq(Register dst, Register src);
void esubq(Register dst, Register src1, Register src2, bool no_flags);
// Force generation of a 4 byte immediate value even if it fits into 8bit
void subl_imm32(Register dst, int32_t imm32);
void esubl_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
void subq_imm32(Register dst, int32_t imm32);
void esubq_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
// Subtract Scalar Double-Precision Floating-Point Values
void subsd(XMMRegister dst, Address src);
@ -2296,9 +2448,13 @@ private:
// BMI - count trailing zeros
void tzcntl(Register dst, Register src);
void etzcntl(Register dst, Register src, bool no_flags);
void tzcntl(Register dst, Address src);
void etzcntl(Register dst, Address src, bool no_flags);
void tzcntq(Register dst, Register src);
void etzcntq(Register dst, Register src, bool no_flags);
void tzcntq(Register dst, Address src);
void etzcntq(Register dst, Address src, bool no_flags);
// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void ucomisd(XMMRegister dst, Address src);
@ -2331,20 +2487,33 @@ private:
void xgetbv();
void xorl(Register dst, int32_t imm32);
void exorl(Register dst, Register src, int32_t imm32, bool no_flags);
void xorl(Address dst, int32_t imm32);
void exorl(Register dst, Address src, int32_t imm32, bool no_flags);
void xorl(Register dst, Address src);
void exorl(Register dst, Register src1, Address src2, bool no_flags);
void xorl(Register dst, Register src);
void exorl(Register dst, Register src1, Register src2, bool no_flags);
void xorl(Address dst, Register src);
void exorl(Register dst, Address src1, Register src2, bool no_flags);
void xorb(Address dst, Register src);
void exorb(Register dst, Address src1, Register src2, bool no_flags);
void xorb(Register dst, Address src);
void exorb(Register dst, Register src1, Address src2, bool no_flags);
void xorw(Register dst, Address src);
void exorw(Register dst, Register src1, Address src2, bool no_flags);
void xorq(Register dst, Address src);
void exorq(Register dst, Register src1, Address src2, bool no_flags);
void xorq(Address dst, int32_t imm32);
void exorq(Register dst, Address src, int32_t imm32, bool no_flags);
void xorq(Register dst, Register src);
void exorq(Register dst, Register src1, Register src2, bool no_flags);
void xorq(Register dst, int32_t imm32);
void exorq(Register dst, Register src, int32_t imm32, bool no_flags);
void xorq(Address dst, Register src);
void exorq(Register dst, Address src1, Register src2, bool no_flags);
// AVX 3-operands scalar instructions (encoded with VEX prefix)