8321648: Integral gather optimized mask computation.

Reviewed-by: thartmann, sviswanathan
This commit is contained in:
Jatin Bhateja 2023-12-19 07:51:52 +00:00
parent 59073fa3eb
commit 76637c53c5
3 changed files with 17 additions and 20 deletions

@ -2846,6 +2846,13 @@ void Assembler::kxorbl(KRegister dst, KRegister src1, KRegister src2) {
emit_int16(0x47, (0xC0 | encode));
}
void Assembler::kxnorwl(KRegister dst, KRegister src1, KRegister src2) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int16(0x46, (0xC0 | encode));
}
void Assembler::kxorwl(KRegister dst, KRegister src1, KRegister src2) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@ -10771,7 +10778,7 @@ void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int v
assert(src.isxmmindex(),"expected to be xmm index");
assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0x90);
emit_operand(dst, src, 0);
@ -10784,7 +10791,7 @@ void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int v
assert(src.isxmmindex(),"expected to be xmm index");
assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0x90);
emit_operand(dst, src, 0);
@ -10797,7 +10804,7 @@ void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int v
assert(src.isxmmindex(),"expected to be xmm index");
assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0x92);
emit_operand(dst, src, 0);
@ -10810,7 +10817,7 @@ void Assembler::vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int v
assert(src.isxmmindex(),"expected to be xmm index");
assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0x92);
emit_operand(dst, src, 0);

@ -1524,6 +1524,8 @@ private:
void kordl(KRegister dst, KRegister src1, KRegister src2);
void korql(KRegister dst, KRegister src1, KRegister src2);
void kxnorwl(KRegister dst, KRegister src1, KRegister src2);
void kxorbl(KRegister dst, KRegister src1, KRegister src2);
void kxorwl(KRegister dst, KRegister src1, KRegister src2);
void kxordl(KRegister dst, KRegister src1, KRegister src2);

@ -4053,39 +4053,26 @@ instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
effect(TEMP dst, TEMP tmp, TEMP mask);
format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
ins_encode %{
assert(UseAVX >= 2, "sanity");
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = Matcher::vector_element_basic_type(this);
assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity");
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
if (vlen_enc == Assembler::AVX_128bit) {
__ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg);
} else {
__ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg);
}
__ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
__ lea($tmp$$Register, $mem$$Address);
__ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
match(Set dst (LoadVectorGather mem idx));
effect(TEMP dst, TEMP tmp, TEMP ktmp);
format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
ins_encode %{
assert(UseAVX > 2, "sanity");
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = Matcher::vector_element_basic_type(this);
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
__ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
__ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
__ lea($tmp$$Register, $mem$$Address);
__ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
%}
@ -4093,6 +4080,7 @@ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
%}
instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}