8154975: Update for vectorizedMismatch with AVX512
Reviewed-by: kvn
This commit is contained in:
parent
15893e995b
commit
8cb0a98032
@ -2323,6 +2323,15 @@ void Assembler::kortestql(KRegister src1, KRegister src2) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
// This instruction produces ZF or CF flags
|
||||
void Assembler::ktestql(KRegister src1, KRegister src2) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0x99);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::movb(Address dst, int imm8) {
|
||||
InstructionMark im(this);
|
||||
prefix(dst);
|
||||
@ -2491,6 +2500,19 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
|
||||
emit_operand(src, dst);
|
||||
}
|
||||
|
||||
void Assembler::evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx512vlbw(), "");
|
||||
assert(is_vector_masking(), ""); // For stub code use only
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
@ -3285,6 +3307,19 @@ void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vect
|
||||
emit_operand(as_Register(dst_enc), src);
|
||||
}
|
||||
|
||||
void Assembler::evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx512vlbw(), "");
|
||||
assert(is_vector_masking(), ""); // For stub code use only
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x74);
|
||||
emit_operand(as_Register(kdst->encoding()), src);
|
||||
}
|
||||
|
||||
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
|
||||
void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sse2(), "");
|
||||
@ -6938,7 +6973,7 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
|
||||
emit_int8(byte3);
|
||||
|
||||
// P2: byte 4 as zL'Lbv'aaa
|
||||
int byte4 = (_attributes->is_no_reg_mask()) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
|
||||
int byte4 = (_attributes->is_no_reg_mask()) ? 0 : _attributes->get_embedded_opmask_register_specifier(); // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
|
||||
// EVEX.v` for extending EVEX.vvvv or VIDX
|
||||
byte4 |= (evex_v ? 0: EVEX_V);
|
||||
// third EXEC.b for broadcast actions
|
||||
|
@ -606,6 +606,7 @@ private:
|
||||
bool _legacy_mode_vl;
|
||||
bool _legacy_mode_vlbw;
|
||||
bool _is_managed;
|
||||
bool _vector_masking; // For stub code use only
|
||||
|
||||
class InstructionAttr *_attributes;
|
||||
|
||||
@ -813,6 +814,7 @@ private:
|
||||
_legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
|
||||
_legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
|
||||
_is_managed = false;
|
||||
_vector_masking = false;
|
||||
_attributes = NULL;
|
||||
}
|
||||
|
||||
@ -823,6 +825,12 @@ private:
|
||||
void clear_managed(void) { _is_managed = false; }
|
||||
bool is_managed(void) { return _is_managed; }
|
||||
|
||||
// Following functions are for stub code use only
|
||||
void set_vector_masking(void) { _vector_masking = true; }
|
||||
void clear_vector_masking(void) { _vector_masking = false; }
|
||||
bool is_vector_masking(void) { return _vector_masking; }
|
||||
|
||||
|
||||
void lea(Register dst, Address src);
|
||||
|
||||
void mov(Register dst, Register src);
|
||||
@ -1354,6 +1362,8 @@ private:
|
||||
void kortestdl(KRegister dst, KRegister src);
|
||||
void kortestql(KRegister dst, KRegister src);
|
||||
|
||||
void ktestql(KRegister dst, KRegister src);
|
||||
|
||||
void movdl(XMMRegister dst, Register src);
|
||||
void movdl(Register dst, XMMRegister src);
|
||||
void movdl(XMMRegister dst, Address src);
|
||||
@ -1381,6 +1391,7 @@ private:
|
||||
void evmovdqub(Address dst, XMMRegister src, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdquw(Address dst, XMMRegister src, int vector_len);
|
||||
void evmovdquw(XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
@ -1534,6 +1545,7 @@ private:
|
||||
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
|
||||
void evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void pcmpeqw(XMMRegister dst, XMMRegister src);
|
||||
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
@ -2098,7 +2110,8 @@ public:
|
||||
_evex_encoding(0),
|
||||
_is_clear_context(false),
|
||||
_is_extended_context(false),
|
||||
_current_assembler(NULL) {
|
||||
_current_assembler(NULL),
|
||||
_embedded_opmask_register_specifier(1) { // hard code k1, it will be initialized for now
|
||||
if (UseAVX < 3) _legacy_mode = true;
|
||||
}
|
||||
|
||||
@ -2122,6 +2135,7 @@ private:
|
||||
int _evex_encoding;
|
||||
bool _is_clear_context;
|
||||
bool _is_extended_context;
|
||||
int _embedded_opmask_register_specifier;
|
||||
|
||||
Assembler *_current_assembler;
|
||||
|
||||
@ -2139,6 +2153,7 @@ public:
|
||||
int get_evex_encoding(void) const { return _evex_encoding; }
|
||||
bool is_clear_context(void) const { return _is_clear_context; }
|
||||
bool is_extended_context(void) const { return _is_extended_context; }
|
||||
int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
|
||||
|
||||
// Set the vector len manually
|
||||
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
|
||||
@ -2172,6 +2187,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// Set embedded opmask register specifier.
|
||||
void set_embedded_opmask_register_specifier(KRegister mask) {
|
||||
_embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP
|
||||
|
@ -9425,6 +9425,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||
void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
|
||||
Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){
|
||||
assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.");
|
||||
Label VECTOR64_LOOP, VECTOR64_TAIL, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
|
||||
Label VECTOR32_LOOP, VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP;
|
||||
Label VECTOR16_TAIL, VECTOR8_TAIL, VECTOR4_TAIL;
|
||||
Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL;
|
||||
@ -9437,6 +9438,57 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
||||
shlq(length);
|
||||
xorq(result, result);
|
||||
|
||||
if ((UseAVX > 2) &&
|
||||
VM_Version::supports_avx512vlbw()) {
|
||||
set_vector_masking(); // opening of the stub context for programming mask registers
|
||||
cmpq(length, 64);
|
||||
jcc(Assembler::less, VECTOR32_TAIL);
|
||||
movq(tmp1, length);
|
||||
andq(tmp1, 0x3F); // tail count
|
||||
andq(length, ~(0x3F)); //vector count
|
||||
|
||||
bind(VECTOR64_LOOP);
|
||||
// AVX512 code to compare 64 byte vectors.
|
||||
evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
|
||||
kortestql(k7, k7);
|
||||
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
|
||||
addq(result, 64);
|
||||
subq(length, 64);
|
||||
jccb(Assembler::notZero, VECTOR64_LOOP);
|
||||
|
||||
//bind(VECTOR64_TAIL);
|
||||
testq(tmp1, tmp1);
|
||||
jcc(Assembler::zero, SAME_TILL_END);
|
||||
|
||||
bind(VECTOR64_TAIL);
|
||||
// AVX512 code to compare upto 63 byte vectors.
|
||||
// Save k1
|
||||
kmovql(k3, k1);
|
||||
mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
|
||||
shlxq(tmp2, tmp2, tmp1);
|
||||
notq(tmp2);
|
||||
kmovql(k1, tmp2);
|
||||
|
||||
evmovdqub(k1, rymm0, Address(obja, result), Assembler::AVX_512bit);
|
||||
evpcmpeqb(k1, k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
|
||||
|
||||
ktestql(k7, k1);
|
||||
// Restore k1
|
||||
kmovql(k1, k3);
|
||||
jcc(Assembler::below, SAME_TILL_END); // not mismatch
|
||||
|
||||
bind(VECTOR64_NOT_EQUAL);
|
||||
kmovql(tmp1, k7);
|
||||
notq(tmp1);
|
||||
tzcntq(tmp1, tmp1);
|
||||
addq(result, tmp1);
|
||||
shrq(result);
|
||||
jmp(DONE);
|
||||
bind(VECTOR32_TAIL);
|
||||
clear_vector_masking(); // closing of the stub context for programming mask registers
|
||||
}
|
||||
|
||||
cmpq(length, 8);
|
||||
jcc(Assembler::equal, VECTOR8_LOOP);
|
||||
jcc(Assembler::less, VECTOR4_TAIL);
|
||||
@ -9593,7 +9645,6 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
||||
bind(DONE);
|
||||
}
|
||||
|
||||
|
||||
//Helper functions for square_to_len()
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user