8304258: x86: Improve the code generation of VectorRearrange with int and float

Reviewed-by: kvn, jbhateja, sviswanathan
This commit is contained in:
Quan Anh Mai 2023-03-25 05:30:16 +00:00
parent 765a94258d
commit 38e17148fa
5 changed files with 44 additions and 12 deletions

View File

@ -4209,7 +4209,8 @@ void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
}
void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
assert((vector_len == AVX_256bit && VM_Version::supports_avx2()) ||
(vector_len == AVX_512bit && VM_Version::supports_evex()), "");
// VEX.NDS.256.66.0F38.W0 36 /r
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@ -4217,7 +4218,8 @@ void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
}
void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
assert((vector_len == AVX_256bit && VM_Version::supports_avx2()) ||
(vector_len == AVX_512bit && VM_Version::supports_evex()), "");
// VEX.NDS.256.66.0F38.W0 36 /r
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@ -4226,6 +4228,15 @@ void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector
emit_operand(dst, src, 0);
}
void Assembler::vpermps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert((vector_len == AVX_256bit && VM_Version::supports_avx2()) ||
(vector_len == AVX_512bit && VM_Version::supports_evex()), "");
// VEX.NDS.XXX.66.0F38.W0 16 /r
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x16, (0xC0 | encode));
}
void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
assert(VM_Version::supports_avx2(), "");
InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@ -4247,6 +4258,13 @@ void Assembler::vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector
emit_int24(0x04, (0xC0 | encode), imm8);
}
void Assembler::vpermilps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x0C, (0xC0 | encode));
}
void Assembler::vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(),/* legacy_mode */ false,/* no_mask_reg */ true, /* uses_vl */ false);

View File

@ -1762,9 +1762,11 @@ private:
void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermilps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

View File

@ -6095,3 +6095,14 @@ void C2_MacroAssembler::rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XM
evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
}
void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst,
XMMRegister shuffle, XMMRegister src, int vlen_enc) {
if (vlen_enc == AVX_128bit) {
vpermilps(dst, src, shuffle, vlen_enc);
} else if (bt == T_INT) {
vpermd(dst, shuffle, src, vlen_enc);
} else {
assert(bt == T_FLOAT, "");
vpermps(dst, shuffle, src, vlen_enc);
}
}

View File

@ -485,4 +485,7 @@ public:
void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
XMMRegister src, int vlen_enc);
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP

View File

@ -8591,7 +8591,7 @@ instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
Matcher::vector_length(n) == 4 && UseAVX < 2);
Matcher::vector_length(n) == 4 && UseAVX == 0);
match(Set dst (VectorLoadShuffle src));
effect(TEMP dst, TEMP vtmp);
format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
@ -8620,8 +8620,8 @@ instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
%}
instruct rearrangeI(vec dst, vec shuffle) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
Matcher::vector_length(n) == 4 && UseAVX < 2);
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
UseAVX == 0);
match(Set dst (VectorRearrange dst shuffle));
format %{ "vector_rearrange $dst, $shuffle, $dst" %}
ins_encode %{
@ -8633,11 +8633,11 @@ instruct rearrangeI(vec dst, vec shuffle) %{
instruct loadShuffleI_avx(vec dst, vec src) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
UseAVX >= 2);
UseAVX > 0);
match(Set dst (VectorLoadShuffle src));
format %{ "vector_load_shuffle $dst, $src" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
int vlen_enc = vector_length_encoding(this);
__ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
@ -8645,15 +8645,13 @@ instruct loadShuffleI_avx(vec dst, vec src) %{
instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
UseAVX >= 2);
UseAVX > 0);
match(Set dst (VectorRearrange src shuffle));
format %{ "vector_rearrange $dst, $shuffle, $src" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
if (vlen_enc == Assembler::AVX_128bit) {
vlen_enc = Assembler::AVX_256bit;
}
__ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
BasicType bt = Matcher::vector_element_basic_type(this);
__ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}