8304258: x86: Improve the code generation of VectorRearrange with int and float
Reviewed-by: kvn, jbhateja, sviswanathan
This commit is contained in:
parent
765a94258d
commit
38e17148fa
@ -4209,7 +4209,8 @@ void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
|
||||
}
|
||||
|
||||
void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
|
||||
assert((vector_len == AVX_256bit && VM_Version::supports_avx2()) ||
|
||||
(vector_len == AVX_512bit && VM_Version::supports_evex()), "");
|
||||
// VEX.NDS.256.66.0F38.W0 36 /r
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
@ -4217,7 +4218,8 @@ void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
|
||||
}
|
||||
|
||||
void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
|
||||
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
|
||||
assert((vector_len == AVX_256bit && VM_Version::supports_avx2()) ||
|
||||
(vector_len == AVX_512bit && VM_Version::supports_evex()), "");
|
||||
// VEX.NDS.256.66.0F38.W0 36 /r
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
@ -4226,6 +4228,15 @@ void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
void Assembler::vpermps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert((vector_len == AVX_256bit && VM_Version::supports_avx2()) ||
|
||||
(vector_len == AVX_512bit && VM_Version::supports_evex()), "");
|
||||
// VEX.NDS.XXX.66.0F38.W0 16 /r
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x16, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
@ -4247,6 +4258,13 @@ void Assembler::vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector
|
||||
emit_int24(0x04, (0xC0 | encode), imm8);
|
||||
}
|
||||
|
||||
void Assembler::vpermilps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x0C, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
|
||||
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(),/* legacy_mode */ false,/* no_mask_reg */ true, /* uses_vl */ false);
|
||||
|
@ -1762,9 +1762,11 @@ private:
|
||||
void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpermps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
|
||||
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
|
||||
void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void vpermilps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
@ -6095,3 +6095,14 @@ void C2_MacroAssembler::rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XM
|
||||
evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst,
|
||||
XMMRegister shuffle, XMMRegister src, int vlen_enc) {
|
||||
if (vlen_enc == AVX_128bit) {
|
||||
vpermilps(dst, src, shuffle, vlen_enc);
|
||||
} else if (bt == T_INT) {
|
||||
vpermd(dst, shuffle, src, vlen_enc);
|
||||
} else {
|
||||
assert(bt == T_FLOAT, "");
|
||||
vpermps(dst, shuffle, src, vlen_enc);
|
||||
}
|
||||
}
|
||||
|
@ -485,4 +485,7 @@ public:
|
||||
void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
|
||||
|
||||
void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
|
||||
XMMRegister src, int vlen_enc);
|
||||
|
||||
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
|
||||
|
@ -8591,7 +8591,7 @@ instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
|
||||
|
||||
instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
|
||||
Matcher::vector_length(n) == 4 && UseAVX < 2);
|
||||
Matcher::vector_length(n) == 4 && UseAVX == 0);
|
||||
match(Set dst (VectorLoadShuffle src));
|
||||
effect(TEMP dst, TEMP vtmp);
|
||||
format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
|
||||
@ -8620,8 +8620,8 @@ instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
|
||||
%}
|
||||
|
||||
instruct rearrangeI(vec dst, vec shuffle) %{
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
|
||||
Matcher::vector_length(n) == 4 && UseAVX < 2);
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
|
||||
UseAVX == 0);
|
||||
match(Set dst (VectorRearrange dst shuffle));
|
||||
format %{ "vector_rearrange $dst, $shuffle, $dst" %}
|
||||
ins_encode %{
|
||||
@ -8633,11 +8633,11 @@ instruct rearrangeI(vec dst, vec shuffle) %{
|
||||
|
||||
instruct loadShuffleI_avx(vec dst, vec src) %{
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
|
||||
UseAVX >= 2);
|
||||
UseAVX > 0);
|
||||
match(Set dst (VectorLoadShuffle src));
|
||||
format %{ "vector_load_shuffle $dst, $src" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
@ -8645,15 +8645,13 @@ instruct loadShuffleI_avx(vec dst, vec src) %{
|
||||
|
||||
instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
|
||||
UseAVX >= 2);
|
||||
UseAVX > 0);
|
||||
match(Set dst (VectorRearrange src shuffle));
|
||||
format %{ "vector_rearrange $dst, $shuffle, $src" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
if (vlen_enc == Assembler::AVX_128bit) {
|
||||
vlen_enc = Assembler::AVX_256bit;
|
||||
}
|
||||
__ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
Loading…
x
Reference in New Issue
Block a user