8154975: Update for vectorizedMismatch with AVX512

Reviewed-by: kvn
This commit is contained in:
Vivek Deshpande 2016-04-27 13:37:07 -07:00
parent 15893e995b
commit 8cb0a98032
3 changed files with 112 additions and 6 deletions

View File

@ -2323,6 +2323,15 @@ void Assembler::kortestql(KRegister src1, KRegister src2) {
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::ktestql(KRegister src1, KRegister src2) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x99);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::movb(Address dst, int imm8) {
InstructionMark im(this);
prefix(dst);
@ -2491,6 +2500,19 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
emit_operand(src, dst);
}
void Assembler::evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
assert(is_vector_masking(), ""); // For stub code use only
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@ -3285,6 +3307,19 @@ void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vect
emit_operand(as_Register(dst_enc), src);
}
void Assembler::evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
assert(is_vector_masking(), ""); // For stub code use only
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x74);
emit_operand(as_Register(kdst->encoding()), src);
}
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse2(), "");
@ -6938,7 +6973,7 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
emit_int8(byte3);
// P2: byte 4 as zL'Lbv'aaa
int byte4 = (_attributes->is_no_reg_mask()) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
int byte4 = (_attributes->is_no_reg_mask()) ? 0 : _attributes->get_embedded_opmask_register_specifier(); // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
// EVEX.v` for extending EVEX.vvvv or VIDX
byte4 |= (evex_v ? 0: EVEX_V);
// third EXEC.b for broadcast actions

View File

@ -606,6 +606,7 @@ private:
bool _legacy_mode_vl;
bool _legacy_mode_vlbw;
bool _is_managed;
bool _vector_masking; // For stub code use only
class InstructionAttr *_attributes;
@ -813,6 +814,7 @@ private:
_legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
_legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
_is_managed = false;
_vector_masking = false;
_attributes = NULL;
}
@ -823,6 +825,12 @@ private:
void clear_managed(void) { _is_managed = false; }
bool is_managed(void) { return _is_managed; }
// Following functions are for stub code use only
void set_vector_masking(void) { _vector_masking = true; }
void clear_vector_masking(void) { _vector_masking = false; }
bool is_vector_masking(void) { return _vector_masking; }
void lea(Register dst, Address src);
void mov(Register dst, Register src);
@ -1354,6 +1362,8 @@ private:
void kortestdl(KRegister dst, KRegister src);
void kortestql(KRegister dst, KRegister src);
void ktestql(KRegister dst, KRegister src);
void movdl(XMMRegister dst, Register src);
void movdl(Register dst, XMMRegister src);
void movdl(XMMRegister dst, Address src);
@ -1381,6 +1391,7 @@ private:
void evmovdqub(Address dst, XMMRegister src, int vector_len);
void evmovdqub(XMMRegister dst, Address src, int vector_len);
void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
void evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len);
void evmovdquw(Address dst, XMMRegister src, int vector_len);
void evmovdquw(XMMRegister dst, Address src, int vector_len);
void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
@ -1534,6 +1545,7 @@ private:
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pcmpeqw(XMMRegister dst, XMMRegister src);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@ -2098,7 +2110,8 @@ public:
_evex_encoding(0),
_is_clear_context(false),
_is_extended_context(false),
_current_assembler(NULL) {
_current_assembler(NULL),
_embedded_opmask_register_specifier(1) { // hard code k1, it will be initialized for now
if (UseAVX < 3) _legacy_mode = true;
}
@ -2122,6 +2135,7 @@ private:
int _evex_encoding;
bool _is_clear_context;
bool _is_extended_context;
int _embedded_opmask_register_specifier;
Assembler *_current_assembler;
@ -2139,6 +2153,7 @@ public:
int get_evex_encoding(void) const { return _evex_encoding; }
bool is_clear_context(void) const { return _is_clear_context; }
bool is_extended_context(void) const { return _is_extended_context; }
int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
// Set the vector len manually
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
@ -2172,6 +2187,11 @@ public:
}
}
// Set embedded opmask register specifier.
void set_embedded_opmask_register_specifier(KRegister mask) {
_embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
}
};
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP

View File

@ -9425,6 +9425,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){
assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.");
Label VECTOR64_LOOP, VECTOR64_TAIL, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
Label VECTOR32_LOOP, VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP;
Label VECTOR16_TAIL, VECTOR8_TAIL, VECTOR4_TAIL;
Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL;
@ -9437,11 +9438,62 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
shlq(length);
xorq(result, result);
if ((UseAVX > 2) &&
VM_Version::supports_avx512vlbw()) {
set_vector_masking(); // opening of the stub context for programming mask registers
cmpq(length, 64);
jcc(Assembler::less, VECTOR32_TAIL);
movq(tmp1, length);
andq(tmp1, 0x3F); // tail count
andq(length, ~(0x3F)); //vector count
bind(VECTOR64_LOOP);
// AVX512 code to compare 64 byte vectors.
evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
kortestql(k7, k7);
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
addq(result, 64);
subq(length, 64);
jccb(Assembler::notZero, VECTOR64_LOOP);
//bind(VECTOR64_TAIL);
testq(tmp1, tmp1);
jcc(Assembler::zero, SAME_TILL_END);
bind(VECTOR64_TAIL);
// AVX512 code to compare upto 63 byte vectors.
// Save k1
kmovql(k3, k1);
mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
shlxq(tmp2, tmp2, tmp1);
notq(tmp2);
kmovql(k1, tmp2);
evmovdqub(k1, rymm0, Address(obja, result), Assembler::AVX_512bit);
evpcmpeqb(k1, k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
ktestql(k7, k1);
// Restore k1
kmovql(k1, k3);
jcc(Assembler::below, SAME_TILL_END); // not mismatch
bind(VECTOR64_NOT_EQUAL);
kmovql(tmp1, k7);
notq(tmp1);
tzcntq(tmp1, tmp1);
addq(result, tmp1);
shrq(result);
jmp(DONE);
bind(VECTOR32_TAIL);
clear_vector_masking(); // closing of the stub context for programming mask registers
}
cmpq(length, 8);
jcc(Assembler::equal, VECTOR8_LOOP);
jcc(Assembler::less, VECTOR4_TAIL);
if (UseAVX >= 2){
if (UseAVX >= 2) {
cmpq(length, 16);
jcc(Assembler::equal, VECTOR16_LOOP);
@ -9549,7 +9601,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
jmpb(SAME_TILL_END);
if (UseAVX >= 2){
if (UseAVX >= 2) {
bind(VECTOR32_NOT_EQUAL);
vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit);
vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit);
@ -9562,7 +9614,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
}
bind(VECTOR16_NOT_EQUAL);
if (UseAVX >= 2){
if (UseAVX >= 2) {
vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit);
vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit);
pxor(rymm0, rymm2);
@ -9593,7 +9645,6 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
bind(DONE);
}
//Helper functions for square_to_len()
/**