From 7a895552c8eb9ae19f8d6eb8c35a0393445305fa Mon Sep 17 00:00:00 2001 From: Scott Gibbons Date: Tue, 23 Apr 2024 23:35:59 +0000 Subject: [PATCH] 8330844: Add aliases for conditional jumps and additional instruction forms for x86 Reviewed-by: kvn, epeter, sviswanathan --- src/hotspot/cpu/x86/assembler_x86.cpp | 71 ++++++++++++++++++++++ src/hotspot/cpu/x86/assembler_x86.hpp | 9 +++ src/hotspot/cpu/x86/macroAssembler_x86.cpp | 10 +++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 70 +++++++++++++++++++++ 4 files changed, 160 insertions(+) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index e4885fa946d..f5a6cb845ad 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -1758,6 +1758,22 @@ void Assembler::cmovl(Condition cc, Register dst, Address src) { emit_operand(dst, src, 0); } +void Assembler::cmpb(Address dst, Register reg) { + assert(reg->has_byte_register(), "must have byte register"); + InstructionMark im(this); + prefix(dst, reg, true); + emit_int8((unsigned char)0x38); + emit_operand(reg, dst, 0); +} + +void Assembler::cmpb(Register reg, Address dst) { + assert(reg->has_byte_register(), "must have byte register"); + InstructionMark im(this); + prefix(dst, reg, true); + emit_int8((unsigned char)0x3a); + emit_operand(reg, dst, 0); +} + void Assembler::cmpb(Address dst, int imm8) { InstructionMark im(this); prefix(dst); @@ -1789,6 +1805,13 @@ void Assembler::cmpl(Register dst, Address src) { emit_operand(dst, src, 0); } +void Assembler::cmpl(Address dst, Register reg) { + InstructionMark im(this); + prefix(dst, reg); + emit_int8(0x39); + emit_operand(reg, dst, 0); +} + void Assembler::cmpl_imm32(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); @@ -1804,6 +1827,14 @@ void Assembler::cmpw(Address dst, int imm16) { emit_int16(imm16); } +void Assembler::cmpw(Address dst, Register reg) { + InstructionMark im(this); + emit_int8(0x66); + prefix(dst, reg); + emit_int8((unsigned char)0x39); + emit_operand(reg, dst, 0); +} + // The 32-bit cmpxchg compares the value at adr with the contents of rax, // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. // The ZF is set if the compared values were equal, and cleared otherwise. @@ -4425,6 +4456,16 @@ void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int emit_int16(0x74, (0xC0 | encode)); } +void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x74); + emit_operand(dst, src2, 0); +} + // In this context, kdst is written the mask used to process the equal components void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx512bw(), ""); @@ -4531,6 +4572,17 @@ void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int emit_int16(0x75, (0xC0 | encode)); } +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst +void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x75); + emit_operand(dst, src, 0); +} + // In this context, kdst is written the mask used to process the equal components void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx512bw(), ""); @@ -7648,6 +7700,14 @@ void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int v emit_int16(0x38, (0xC0 | encode)); } +void Assembler::vpminub(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0xDA, (0xC0 | encode)); +} + void Assembler::pminsw(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse2(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); @@ -12273,6 +12333,13 @@ void Assembler::bzhiq(Register dst, Register src1, Register src2) { emit_int16((unsigned char)0xF5, (0xC0 | encode)); } +void Assembler::bzhil(Register dst, Register src1, Register src2) { + assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xF5, (0xC0 | encode)); +} + void Assembler::pextl(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); @@ -13167,6 +13234,10 @@ void Assembler::cdqq() { emit_int16(REX_W, (unsigned char)0x99); } +void Assembler::cdqe() { + emit_int16(REX_W, (unsigned char)0x98); +} + void Assembler::clflush(Address adr) { assert(VM_Version::supports_clflush(), "should do"); prefix(adr); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 64e8cf99bfc..a8747a4308a 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1094,6 +1094,7 @@ private: void cdql(); void cdqq(); + void cdqe(); void cld(); @@ -1109,12 +1110,15 @@ private: void cmpb(Address dst, int imm8); + void cmpb(Address dst, Register reg); + void cmpb(Register reg, Address dst); void cmpl(Address dst, int32_t imm32); void cmpl(Register dst, int32_t imm32); void cmpl(Register dst, Register src); void cmpl(Register dst, Address src); void cmpl_imm32(Address dst, int32_t imm32); + void cmpl(Address dst, Register reg); void cmpq(Address dst, int32_t imm32); void cmpq(Address dst, Register src); @@ -1123,6 +1127,7 @@ private: void cmpq(Register dst, Address src); void cmpw(Address dst, int imm16); + void cmpw(Address dst, Register reg); void cmpxchg8 (Address adr); @@ -1802,6 +1807,7 @@ private: void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len); void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpcmpeqb(XMMRegister dst, XMMRegister src1, Address src2, int vector_len); void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len); void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len); @@ -1816,6 +1822,7 @@ private: void evpcmpuq(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len); void pcmpeqw(XMMRegister dst, XMMRegister src); + void vpcmpeqw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len); @@ -2306,6 +2313,7 @@ private: void shrxq(Register dst, Address src1, Register src2); void bzhiq(Register dst, Register src1, Register src2); + void bzhil(Register dst, Register src1, Register src2); void pextl(Register dst, Register src1, Register src2); void pdepl(Register dst, Register src1, Register src2); @@ -2581,6 +2589,7 @@ private: // Minimum of packed integers void pminsb(XMMRegister dst, XMMRegister src); void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void vpminub(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); void pminsw(XMMRegister dst, XMMRegister src); void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); void pminsd(XMMRegister dst, XMMRegister src); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 6c8ca583b10..d20f34e27b8 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -3570,11 +3570,21 @@ void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, Assembler::vpcmpeqb(dst, nds, src, vector_len); } +void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) { + assert(((dst->encoding() < 16 && src1->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); + Assembler::vpcmpeqb(dst, src1, src2, vector_len); +} + void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpcmpeqw(dst, nds, src, vector_len); } +void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); + Assembler::vpcmpeqw(dst, nds, src, vector_len); +} + void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) { assert(rscratch != noreg || always_reachable(src), "missing"); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 738ce8a55c3..e045572a5cd 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -955,6 +955,74 @@ public: // contained in the location described by entry (not the address of entry) void jump(ArrayAddress entry, Register rscratch); + // Adding more natural conditional jump instructions + void ALWAYSINLINE jo(Label& L, bool maybe_short = true) { jcc(Assembler::overflow, L, maybe_short); } + void ALWAYSINLINE jno(Label& L, bool maybe_short = true) { jcc(Assembler::noOverflow, L, maybe_short); } + void ALWAYSINLINE js(Label& L, bool maybe_short = true) { jcc(Assembler::negative, L, maybe_short); } + void ALWAYSINLINE jns(Label& L, bool maybe_short = true) { jcc(Assembler::positive, L, maybe_short); } + void ALWAYSINLINE je(Label& L, bool maybe_short = true) { jcc(Assembler::equal, L, maybe_short); } + void ALWAYSINLINE jz(Label& L, bool maybe_short = true) { jcc(Assembler::zero, L, maybe_short); } + void ALWAYSINLINE jne(Label& L, bool maybe_short = true) { jcc(Assembler::notEqual, L, maybe_short); } + void ALWAYSINLINE jnz(Label& L, bool maybe_short = true) { jcc(Assembler::notZero, L, maybe_short); } + void ALWAYSINLINE jb(Label& L, bool maybe_short = true) { jcc(Assembler::below, L, maybe_short); } + void ALWAYSINLINE jnae(Label& L, bool maybe_short = true) { jcc(Assembler::below, L, maybe_short); } + void ALWAYSINLINE jc(Label& L, bool maybe_short = true) { jcc(Assembler::carrySet, L, maybe_short); } + void ALWAYSINLINE jnb(Label& L, bool maybe_short = true) { jcc(Assembler::aboveEqual, L, maybe_short); } + void ALWAYSINLINE jae(Label& L, bool maybe_short = true) { jcc(Assembler::aboveEqual, L, maybe_short); } + void ALWAYSINLINE jnc(Label& L, bool maybe_short = true) { jcc(Assembler::carryClear, L, maybe_short); } + void ALWAYSINLINE jbe(Label& L, bool maybe_short = true) { jcc(Assembler::belowEqual, L, maybe_short); } + void ALWAYSINLINE jna(Label& L, bool maybe_short = true) { jcc(Assembler::belowEqual, L, maybe_short); } + void ALWAYSINLINE ja(Label& L, bool maybe_short = true) { jcc(Assembler::above, L, maybe_short); } + void ALWAYSINLINE jnbe(Label& L, bool maybe_short = true) { jcc(Assembler::above, L, maybe_short); } + void ALWAYSINLINE jl(Label& L, bool maybe_short = true) { jcc(Assembler::less, L, maybe_short); } + void ALWAYSINLINE jnge(Label& L, bool maybe_short = true) { jcc(Assembler::less, L, maybe_short); } + void ALWAYSINLINE jge(Label& L, bool maybe_short = true) { jcc(Assembler::greaterEqual, L, maybe_short); } + void ALWAYSINLINE jnl(Label& L, bool maybe_short = true) { jcc(Assembler::greaterEqual, L, maybe_short); } + void ALWAYSINLINE jle(Label& L, bool maybe_short = true) { jcc(Assembler::lessEqual, L, maybe_short); } + void ALWAYSINLINE jng(Label& L, bool maybe_short = true) { jcc(Assembler::lessEqual, L, maybe_short); } + void ALWAYSINLINE jg(Label& L, bool maybe_short = true) { jcc(Assembler::greater, L, maybe_short); } + void ALWAYSINLINE jnle(Label& L, bool maybe_short = true) { jcc(Assembler::greater, L, maybe_short); } + void ALWAYSINLINE jp(Label& L, bool maybe_short = true) { jcc(Assembler::parity, L, maybe_short); } + void ALWAYSINLINE jpe(Label& L, bool maybe_short = true) { jcc(Assembler::parity, L, maybe_short); } + void ALWAYSINLINE jnp(Label& L, bool maybe_short = true) { jcc(Assembler::noParity, L, maybe_short); } + void ALWAYSINLINE jpo(Label& L, bool maybe_short = true) { jcc(Assembler::noParity, L, maybe_short); } + // * No condition for this * void ALWAYSINLINE jcxz(Label& L, bool maybe_short = true) { jcc(Assembler::cxz, L, maybe_short); } + // * No condition for this * void ALWAYSINLINE jecxz(Label& L, bool maybe_short = true) { jcc(Assembler::cxz, L, maybe_short); } + + // Short versions of the above + void ALWAYSINLINE jo_b(Label& L) { jccb(Assembler::overflow, L); } + void ALWAYSINLINE jno_b(Label& L) { jccb(Assembler::noOverflow, L); } + void ALWAYSINLINE js_b(Label& L) { jccb(Assembler::negative, L); } + void ALWAYSINLINE jns_b(Label& L) { jccb(Assembler::positive, L); } + void ALWAYSINLINE je_b(Label& L) { jccb(Assembler::equal, L); } + void ALWAYSINLINE jz_b(Label& L) { jccb(Assembler::zero, L); } + void ALWAYSINLINE jne_b(Label& L) { jccb(Assembler::notEqual, L); } + void ALWAYSINLINE jnz_b(Label& L) { jccb(Assembler::notZero, L); } + void ALWAYSINLINE jb_b(Label& L) { jccb(Assembler::below, L); } + void ALWAYSINLINE jnae_b(Label& L) { jccb(Assembler::below, L); } + void ALWAYSINLINE jc_b(Label& L) { jccb(Assembler::carrySet, L); } + void ALWAYSINLINE jnb_b(Label& L) { jccb(Assembler::aboveEqual, L); } + void ALWAYSINLINE jae_b(Label& L) { jccb(Assembler::aboveEqual, L); } + void ALWAYSINLINE jnc_b(Label& L) { jccb(Assembler::carryClear, L); } + void ALWAYSINLINE jbe_b(Label& L) { jccb(Assembler::belowEqual, L); } + void ALWAYSINLINE jna_b(Label& L) { jccb(Assembler::belowEqual, L); } + void ALWAYSINLINE ja_b(Label& L) { jccb(Assembler::above, L); } + void ALWAYSINLINE jnbe_b(Label& L) { jccb(Assembler::above, L); } + void ALWAYSINLINE jl_b(Label& L) { jccb(Assembler::less, L); } + void ALWAYSINLINE jnge_b(Label& L) { jccb(Assembler::less, L); } + void ALWAYSINLINE jge_b(Label& L) { jccb(Assembler::greaterEqual, L); } + void ALWAYSINLINE jnl_b(Label& L) { jccb(Assembler::greaterEqual, L); } + void ALWAYSINLINE jle_b(Label& L) { jccb(Assembler::lessEqual, L); } + void ALWAYSINLINE jng_b(Label& L) { jccb(Assembler::lessEqual, L); } + void ALWAYSINLINE jg_b(Label& L) { jccb(Assembler::greater, L); } + void ALWAYSINLINE jnle_b(Label& L) { jccb(Assembler::greater, L); } + void ALWAYSINLINE jp_b(Label& L) { jccb(Assembler::parity, L); } + void ALWAYSINLINE jpe_b(Label& L) { jccb(Assembler::parity, L); } + void ALWAYSINLINE jnp_b(Label& L) { jccb(Assembler::noParity, L); } + void ALWAYSINLINE jpo_b(Label& L) { jccb(Assembler::noParity, L); } + // * No condition for this * void ALWAYSINLINE jcxz_b(Label& L) { jccb(Assembler::cxz, L); } + // * No condition for this * void ALWAYSINLINE jecxz_b(Label& L) { jccb(Assembler::cxz, L); } + // Floating void push_f(XMMRegister r); @@ -1389,7 +1457,9 @@ public: void vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpcmpeqb(XMMRegister dst, XMMRegister src1, Address src2, int vector_len); + void vpcmpeqw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);