8238680: C2: Remove redundant AD instructions for Replicate nodes
Reviewed-by: thartmann, sviswanathan
This commit is contained in:
parent
590f5996c6
commit
0d84fe9872
src/hotspot/cpu/x86
@ -7159,7 +7159,7 @@ void Assembler::evbroadcasti64x2(XMMRegister dst, Address src, int vector_len) {
|
||||
// scalar single/double precision replicate
|
||||
|
||||
// duplicate single precision data from src into programmed locations in dest : requires AVX512VL
|
||||
void Assembler::vpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
void Assembler::vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
@ -7167,7 +7167,7 @@ void Assembler::vpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len)
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpbroadcastss(XMMRegister dst, Address src, int vector_len) {
|
||||
void Assembler::vbroadcastss(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
@ -7180,7 +7180,7 @@ void Assembler::vpbroadcastss(XMMRegister dst, Address src, int vector_len) {
|
||||
}
|
||||
|
||||
// duplicate double precision data from src into programmed locations in dest : requires AVX512VL
|
||||
void Assembler::vpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
void Assembler::vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
@ -7189,7 +7189,7 @@ void Assembler::vpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len)
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
|
||||
void Assembler::vbroadcastsd(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
|
@ -2217,10 +2217,10 @@ private:
|
||||
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
|
||||
|
||||
// scalar single/double precision replicate
|
||||
void vpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpbroadcastss(XMMRegister dst, Address src, int vector_len);
|
||||
void vpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpbroadcastsd(XMMRegister dst, Address src, int vector_len);
|
||||
void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vbroadcastss(XMMRegister dst, Address src, int vector_len);
|
||||
void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
|
||||
|
||||
// gpr sourced byte/word/dword/qword replicate
|
||||
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
|
||||
|
@ -3377,20 +3377,18 @@ instruct ReplS_zero(vec dst, immI0 zero) %{
|
||||
// ====================ReplicateI=======================================
|
||||
|
||||
instruct ReplI_reg(vec dst, rRegI src) %{
|
||||
predicate((n->as_Vector()->length() <= 8) ||
|
||||
(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl()));
|
||||
match(Set dst (ReplicateI src));
|
||||
format %{ "replicateI $dst,$src" %}
|
||||
ins_encode %{
|
||||
uint vlen = vector_length(this);
|
||||
if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
|
||||
} else {
|
||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||
if (vlen >= 8) {
|
||||
assert(vlen == 8, "sanity"); // vlen == 16 && !AVX512VL is covered by ReplI_reg_leg
|
||||
assert(vlen == 8, "sanity");
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
}
|
||||
}
|
||||
@ -3398,33 +3396,19 @@ instruct ReplI_reg(vec dst, rRegI src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplI_reg_leg(legVec dst, rRegI src) %{
|
||||
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateI src));
|
||||
format %{ "replicateI $dst,$src" %}
|
||||
ins_encode %{
|
||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplI_mem(vec dst, memory mem) %{
|
||||
predicate((n->as_Vector()->length() <= 8 && VM_Version::supports_avx()) ||
|
||||
(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl()));
|
||||
predicate(VM_Version::supports_avx()); // use VEX-encoded pshufd to relax 16-byte alignment restriction on the source
|
||||
match(Set dst (ReplicateI (LoadI mem)));
|
||||
format %{ "replicateI $dst,$mem" %}
|
||||
ins_encode %{
|
||||
uint vlen = vector_length(this);
|
||||
if (vlen <= 4) {
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
} else {
|
||||
assert(vlen == 8, "sanity"); // vlen == 16 && !AVX512VL is covered by ReplI_mem_leg
|
||||
assert(vlen == 8, "sanity");
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
}
|
||||
@ -3432,21 +3416,7 @@ instruct ReplI_mem(vec dst, memory mem) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplI_mem_leg(legVec dst, memory mem) %{
|
||||
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateI (LoadI mem)));
|
||||
format %{ "replicateI $dst,$mem" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplI_imm(vec dst, immI con) %{
|
||||
predicate((n->as_Vector()->length() <= 8) ||
|
||||
(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl()));
|
||||
match(Set dst (ReplicateI con));
|
||||
format %{ "replicateI $dst,$con" %}
|
||||
ins_encode %{
|
||||
@ -3454,7 +3424,7 @@ instruct ReplI_imm(vec dst, immI con) %{
|
||||
InternalAddress constaddr = $constantaddress(replicate8_imm($con$$constant, 4));
|
||||
if (vlen == 2) {
|
||||
__ movq($dst$$XMMRegister, constaddr);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ movq($dst$$XMMRegister, constaddr);
|
||||
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
||||
@ -3470,19 +3440,6 @@ instruct ReplI_imm(vec dst, immI con) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplI_imm_leg(legVec dst, immI con) %{
|
||||
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateI con));
|
||||
format %{ "replicateI $dst,$con" %}
|
||||
ins_encode %{
|
||||
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Replicate integer (4 byte) scalar zero to be vector
|
||||
instruct ReplI_zero(vec dst, immI0 zero) %{
|
||||
match(Set dst (ReplicateI zero));
|
||||
@ -3504,8 +3461,6 @@ instruct ReplI_zero(vec dst, immI0 zero) %{
|
||||
#ifdef _LP64
|
||||
// Replicate long (8 byte) scalar to be vector
|
||||
instruct ReplL_reg(vec dst, rRegL src) %{
|
||||
predicate((n->as_Vector()->length() <= 4) ||
|
||||
(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()));
|
||||
match(Set dst (ReplicateL src));
|
||||
format %{ "replicateL $dst,$src" %}
|
||||
ins_encode %{
|
||||
@ -3513,11 +3468,11 @@ instruct ReplL_reg(vec dst, rRegL src) %{
|
||||
if (vlen == 2) {
|
||||
__ movdq($dst$$XMMRegister, $src$$Register);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
|
||||
} else {
|
||||
assert(vlen == 4, "sanity"); // vlen == 8 && !AVX512VL is covered by ReplL_reg_leg
|
||||
assert(vlen == 4, "sanity");
|
||||
__ movdq($dst$$XMMRegister, $src$$Register);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
@ -3525,19 +3480,6 @@ instruct ReplL_reg(vec dst, rRegL src) %{
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplL_reg_leg(legVec dst, rRegL src) %{
|
||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateL src));
|
||||
format %{ "replicateL $dst,$src" %}
|
||||
ins_encode %{
|
||||
__ movdq($dst$$XMMRegister, $src$$Register);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
#else // _LP64
|
||||
// Replicate long (8 byte) scalar to be vector
|
||||
instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
|
||||
@ -3595,8 +3537,6 @@ instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
|
||||
#endif // _LP64
|
||||
|
||||
instruct ReplL_mem(vec dst, memory mem) %{
|
||||
predicate((n->as_Vector()->length() <= 4) ||
|
||||
(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()));
|
||||
match(Set dst (ReplicateL (LoadL mem)));
|
||||
format %{ "replicateL $dst,$mem" %}
|
||||
ins_encode %{
|
||||
@ -3604,11 +3544,11 @@ instruct ReplL_mem(vec dst, memory mem) %{
|
||||
if (vlen == 2) {
|
||||
__ movq($dst$$XMMRegister, $mem$$Address);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
|
||||
} else {
|
||||
assert(vlen == 4, "sanity"); // vlen == 8 && !AVX512VL is covered by ReplL_mem_leg
|
||||
assert(vlen == 4, "sanity");
|
||||
__ movq($dst$$XMMRegister, $mem$$Address);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
@ -3617,23 +3557,8 @@ instruct ReplL_mem(vec dst, memory mem) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplL_mem_leg(legVec dst, memory mem) %{
|
||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateL (LoadL mem)));
|
||||
format %{ "replicateL $dst,$mem" %}
|
||||
ins_encode %{
|
||||
__ movq($dst$$XMMRegister, $mem$$Address);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
|
||||
instruct ReplL_imm(vec dst, immL con) %{
|
||||
predicate((n->as_Vector()->length() <= 4) ||
|
||||
(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()));
|
||||
match(Set dst (ReplicateL con));
|
||||
format %{ "replicateL $dst,$con" %}
|
||||
ins_encode %{
|
||||
@ -3642,12 +3567,12 @@ instruct ReplL_imm(vec dst, immL con) %{
|
||||
if (vlen == 2) {
|
||||
__ movq($dst$$XMMRegister, const_addr);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ movq($dst$$XMMRegister, const_addr);
|
||||
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
} else {
|
||||
assert(vlen == 4, "sanity"); // vlen == 8 && !AVX512VL is covered by ReplL_imm_leg
|
||||
assert(vlen == 4, "sanity");
|
||||
__ movq($dst$$XMMRegister, const_addr);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
@ -3656,19 +3581,6 @@ instruct ReplL_imm(vec dst, immL con) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplL_imm_leg(legVec dst, immL con) %{
|
||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateL con));
|
||||
format %{ "replicateL $dst,$con" %}
|
||||
ins_encode %{
|
||||
__ movq($dst$$XMMRegister, $constantaddress($con));
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplL_zero(vec dst, immL0 zero) %{
|
||||
match(Set dst (ReplicateL zero));
|
||||
format %{ "replicateL $dst,$zero" %}
|
||||
@ -3687,19 +3599,17 @@ instruct ReplL_zero(vec dst, immL0 zero) %{
|
||||
// ====================ReplicateF=======================================
|
||||
|
||||
instruct ReplF_reg(vec dst, vlRegF src) %{
|
||||
predicate((n->as_Vector()->length() <= 8) ||
|
||||
(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl()));
|
||||
match(Set dst (ReplicateF src));
|
||||
format %{ "replicateF $dst,$src" %}
|
||||
ins_encode %{
|
||||
uint vlen = vector_length(this);
|
||||
if (vlen <= 4) {
|
||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
||||
__ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
||||
} else {
|
||||
assert(vlen == 8, "sanity"); // vlen == 16 && !AVX512VL is covered by ReplF_reg_leg
|
||||
assert(vlen == 8, "sanity");
|
||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
}
|
||||
@ -3707,32 +3617,19 @@ instruct ReplF_reg(vec dst, vlRegF src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplF_reg_leg(legVec dst, vlRegF src) %{
|
||||
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateF src));
|
||||
format %{ "replicateF $dst,$src" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplF_mem(vec dst, memory mem) %{
|
||||
predicate((n->as_Vector()->length() <= 8 && VM_Version::supports_avx()) ||
|
||||
(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl()));
|
||||
predicate(VM_Version::supports_avx()); // use VEX-encoded pshufd to relax 16-byte alignment restriction on the source
|
||||
match(Set dst (ReplicateF (LoadF mem)));
|
||||
format %{ "replicateF $dst,$mem" %}
|
||||
ins_encode %{
|
||||
uint vlen = vector_length(this);
|
||||
if (vlen <= 4) {
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
__ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
} else {
|
||||
assert(vlen == 8, "sanity"); // vlen == 16 && !AVX512VL is covered by ReplF_mem_leg
|
||||
assert(vlen == 8, "sanity");
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
}
|
||||
@ -3740,18 +3637,6 @@ instruct ReplF_mem(vec dst, memory mem) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplF_mem_leg(legVec dst, memory mem) %{
|
||||
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateF (LoadF mem)));
|
||||
format %{ "replicateF $dst,$mem" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplF_zero(vec dst, immF0 zero) %{
|
||||
match(Set dst (ReplicateF zero));
|
||||
format %{ "replicateF $dst,$zero" %}
|
||||
@ -3771,19 +3656,17 @@ instruct ReplF_zero(vec dst, immF0 zero) %{
|
||||
|
||||
// Replicate double (8 bytes) scalar to be vector
|
||||
instruct ReplD_reg(vec dst, vlRegD src) %{
|
||||
predicate((n->as_Vector()->length() <= 4) ||
|
||||
(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()));
|
||||
match(Set dst (ReplicateD src));
|
||||
format %{ "replicateD $dst,$src" %}
|
||||
ins_encode %{
|
||||
uint vlen = vector_length(this);
|
||||
if (vlen == 2) {
|
||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
||||
__ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
||||
} else {
|
||||
assert(vlen == 4, "sanity"); // vlen == 8 && !AVX512VL is covered by ReplD_reg_leg
|
||||
assert(vlen == 4, "sanity");
|
||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
}
|
||||
@ -3791,32 +3674,19 @@ instruct ReplD_reg(vec dst, vlRegD src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplD_reg_leg(legVec dst, vlRegD src) %{
|
||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateD src));
|
||||
format %{ "replicateD $dst,$src" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplD_mem(vec dst, memory mem) %{
|
||||
predicate((n->as_Vector()->length() <= 4 && VM_Version::supports_avx()) ||
|
||||
(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()));
|
||||
predicate(VM_Version::supports_avx()); // use VEX-encoded pshufd to relax 16-byte alignment restriction on the source
|
||||
match(Set dst (ReplicateD (LoadD mem)));
|
||||
format %{ "replicateD $dst,$mem" %}
|
||||
ins_encode %{
|
||||
uint vlen = vector_length(this);
|
||||
if (vlen == 2) {
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
|
||||
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
__ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
} else {
|
||||
assert(vlen == 4, "sanity"); // vlen == 8 && !AVX512VL is covered by ReplD_mem_leg
|
||||
assert(vlen == 4, "sanity");
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
}
|
||||
@ -3824,18 +3694,6 @@ instruct ReplD_mem(vec dst, memory mem) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplD_mem_leg(legVec dst, memory mem) %{
|
||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateD (LoadD mem)));
|
||||
format %{ "replicateD $dst,$mem" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct ReplD_zero(vec dst, immD0 zero) %{
|
||||
match(Set dst (ReplicateD zero));
|
||||
format %{ "replicateD $dst,$zero" %}
|
||||
|
Loading…
x
Reference in New Issue
Block a user