8283232: x86: Improve vector broadcast operations

Reviewed-by: kvn, jbhateja
This commit is contained in:
Quan Anh Mai 2022-08-04 16:27:45 +00:00 committed by Jatin Bhateja
parent 966ab219b4
commit 92d2982f3f
14 changed files with 467 additions and 195 deletions

View File

@ -896,6 +896,8 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
tail_size = 1;
break;
case 0x10: // movups
case 0x11: // movups
case 0x12: // movlps
case 0x28: // movaps
case 0x2E: // ucomiss
@ -2561,10 +2563,22 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) {
emit_int16(0x12, 0xC0 | encode);
}
void Assembler::movddup(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_DUP, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x12);
emit_operand(dst, src);
}
void Assembler::vmovddup(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_DUP, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x12);
@ -3505,6 +3519,46 @@ void Assembler::movswl(Register dst, Register src) { // movsxw
emit_int24(0x0F, (unsigned char)0xBF, (0xC0 | encode));
}
void Assembler::movups(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x10);
emit_operand(dst, src);
}
void Assembler::vmovups(XMMRegister dst, Address src, int vector_len) {
assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x10);
emit_operand(dst, src);
}
void Assembler::movups(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x11);
emit_operand(src, dst);
}
void Assembler::vmovups(Address dst, XMMRegister src, int vector_len) {
assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x11);
emit_operand(src, dst);
}
void Assembler::movw(Address dst, int imm16) {
InstructionMark im(this);
@ -5156,7 +5210,7 @@ void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, i
emit_int24(0x43, (0xC0 | encode), imm8 & 0xFF);
}
void Assembler::pshufpd(XMMRegister dst, XMMRegister src, int imm8) {
void Assembler::shufpd(XMMRegister dst, XMMRegister src, int imm8) {
assert(isByte(imm8), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@ -5164,14 +5218,14 @@ void Assembler::pshufpd(XMMRegister dst, XMMRegister src, int imm8) {
emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
}
void Assembler::vpshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
void Assembler::vshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
}
void Assembler::pshufps(XMMRegister dst, XMMRegister src, int imm8) {
void Assembler::shufps(XMMRegister dst, XMMRegister src, int imm8) {
assert(isByte(imm8), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@ -5179,7 +5233,7 @@ void Assembler::pshufps(XMMRegister dst, XMMRegister src, int imm8) {
emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
}
void Assembler::vpshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
void Assembler::vshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);

View File

@ -1492,6 +1492,7 @@ private:
void movb(Register dst, Address src);
void movddup(XMMRegister dst, XMMRegister src);
void movddup(XMMRegister dst, Address src);
void vmovddup(XMMRegister dst, Address src, int vector_len);
void kandbl(KRegister dst, KRegister src1, KRegister src2);
@ -1663,6 +1664,11 @@ private:
void movswq(Register dst, Register src);
#endif
void movups(XMMRegister dst, Address src);
void vmovups(XMMRegister dst, Address src, int vector_len);
void movups(Address dst, XMMRegister src);
void vmovups(Address dst, XMMRegister src, int vector_len);
void movw(Address dst, int imm16);
void movw(Register dst, Address src);
void movw(Address dst, Register src);
@ -1942,10 +1948,10 @@ private:
void pshuflw(XMMRegister dst, Address src, int mode);
//shuffle floats and doubles
void pshufps(XMMRegister, XMMRegister, int);
void pshufpd(XMMRegister, XMMRegister, int);
void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
void shufps(XMMRegister, XMMRegister, int);
void shufpd(XMMRegister, XMMRegister, int);
void vshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
void vshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
// Shuffle packed values at 128 bit granularity
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);

View File

@ -1647,7 +1647,7 @@ void C2_MacroAssembler::load_vector(XMMRegister dst, Address src, int vlen_in_by
case 8: movq(dst, src); break;
case 16: movdqu(dst, src); break;
case 32: vmovdqu(dst, src); break;
case 64: evmovdquq(dst, src, Assembler::AVX_512bit); break;
case 64: evmovdqul(dst, src, Assembler::AVX_512bit); break;
default: ShouldNotReachHere();
}
}
@ -1661,6 +1661,38 @@ void C2_MacroAssembler::load_vector(XMMRegister dst, AddressLiteral src, int vle
}
}
void C2_MacroAssembler::load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen) {
int vlen_enc = vector_length_encoding(vlen);
if (VM_Version::supports_avx()) {
if (bt == T_LONG) {
if (VM_Version::supports_avx2()) {
vpbroadcastq(dst, src, vlen_enc, noreg);
} else {
vmovddup(dst, src, vlen_enc, noreg);
}
} else if (bt == T_DOUBLE) {
if (vlen_enc != Assembler::AVX_128bit) {
vbroadcastsd(dst, src, vlen_enc, noreg);
} else {
vmovddup(dst, src, vlen_enc, noreg);
}
} else {
if (VM_Version::supports_avx2() && is_integral_type(bt)) {
vpbroadcastd(dst, src, vlen_enc, noreg);
} else {
vbroadcastss(dst, src, vlen_enc, noreg);
}
}
} else if (VM_Version::supports_sse3()) {
movddup(dst, src);
} else {
movq(dst, src);
if (vlen == 16) {
punpcklqdq(dst, dst);
}
}
}
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
if (vlen_in_bytes <= 4) {
@ -2317,9 +2349,9 @@ void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src
if (typ == T_FLOAT) {
if (UseAVX == 0) {
movdqu(dst, src);
pshufps(dst, dst, eindex);
shufps(dst, dst, eindex);
} else {
vpshufps(dst, src, src, eindex, Assembler::AVX_128bit);
vshufps(dst, src, src, eindex, Assembler::AVX_128bit);
}
} else {
if (UseAVX == 0) {

View File

@ -159,6 +159,7 @@ public:
void load_vector(XMMRegister dst, Address src, int vlen_in_bytes);
void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = rscratch1);
void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.

View File

@ -2732,6 +2732,15 @@ void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
}
}
void MacroAssembler::movddup(XMMRegister dst, AddressLiteral src, Register rscratch) {
if (reachable(src)) {
Assembler::movddup(dst, as_Address(src));
} else {
lea(rscratch, src);
Assembler::movddup(dst, Address(rscratch, 0));
}
}
void MacroAssembler::vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::vmovddup(dst, as_Address(src), vector_len);
@ -3288,9 +3297,13 @@ void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src,
}
}
void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpbroadcastw(dst, src, vector_len);
void MacroAssembler::vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::vpbroadcastd(dst, as_Address(src), vector_len);
} else {
lea(rscratch, src);
Assembler::vpbroadcastd(dst, Address(rscratch, 0), vector_len);
}
}
void MacroAssembler::vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
@ -3311,6 +3324,15 @@ void MacroAssembler::vbroadcastsd(XMMRegister dst, AddressLiteral src, int vecto
}
}
void MacroAssembler::vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::vbroadcastss(dst, as_Address(src), vector_len);
} else {
lea(rscratch, src);
Assembler::vbroadcastss(dst, Address(rscratch, 0), vector_len);
}
}
void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpcmpeqb(dst, nds, src, vector_len);
@ -4354,10 +4376,14 @@ void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file,
void MacroAssembler::vallones(XMMRegister dst, int vector_len) {
if (UseAVX > 2 && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
// Only pcmpeq has dependency breaking treatment (i.e the execution can begin without
// waiting for the previous result on dst), not vpcmpeqd, so just use vpternlog
vpternlogd(dst, 0xFF, dst, dst, vector_len);
} else if (VM_Version::supports_avx()) {
vpcmpeqd(dst, dst, dst, vector_len);
} else {
assert(UseAVX > 0, "");
vpcmpeqb(dst, dst, dst, vector_len);
assert(VM_Version::supports_sse2(), "");
pcmpeqd(dst, dst);
}
}

View File

@ -1114,6 +1114,12 @@ public:
void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
void addpd(XMMRegister dst, AddressLiteral src);
using Assembler::vbroadcastsd;
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
using Assembler::vbroadcastss;
void vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, AddressLiteral src);
@ -1150,6 +1156,11 @@ public:
void kmov(Register dst, KRegister src);
void kmov(KRegister dst, Register src);
using Assembler::movddup;
void movddup(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
using Assembler::vmovddup;
void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
@ -1157,7 +1168,6 @@ public:
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len);
// AVX512 Unaligned
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
@ -1229,9 +1239,6 @@ public:
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, AddressLiteral src);
using Assembler::vmovddup;
void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, AddressLiteral src);
@ -1337,16 +1344,11 @@ public:
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
using Assembler::vpbroadcastd;
void vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
using Assembler::vbroadcastsd;
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
using Assembler::vpbroadcastq;
void vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpbroadcastq(dst, src, vector_len); }
void vpbroadcastq(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastq(dst, src, vector_len); }
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

View File

@ -4107,37 +4107,43 @@ instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rReg
// ====================REPLICATE=======================================
// Replicate byte scalar to be vector
instruct ReplB_reg(vec dst, rRegI src) %{
instruct vReplB_reg(vec dst, rRegI src) %{
predicate(UseAVX >= 2);
match(Set dst (ReplicateB src));
format %{ "replicateB $dst,$src" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
int vlen_enc = vector_length_encoding(this);
if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
int vlen_enc = vector_length_encoding(this);
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
} else if (VM_Version::supports_avx2()) {
int vlen_enc = vector_length_encoding(this);
} else {
__ movdl($dst$$XMMRegister, $src$$Register);
__ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
}
%}
ins_pipe( pipe_slow );
%}
instruct ReplB_reg(vec dst, rRegI src) %{
predicate(UseAVX < 2);
match(Set dst (ReplicateB src));
format %{ "replicateB $dst,$src" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
__ movdl($dst$$XMMRegister, $src$$Register);
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
if (vlen >= 16) {
assert(vlen == 16, "");
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
if (vlen >= 32) {
assert(vlen == 32, "sanity");
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
}
}
%}
ins_pipe( pipe_slow );
%}
instruct ReplB_mem(vec dst, memory mem) %{
predicate(VM_Version::supports_avx2());
predicate(UseAVX >= 2);
match(Set dst (ReplicateB (LoadB mem)));
format %{ "replicateB $dst,$mem" %}
ins_encode %{
@ -4147,48 +4153,45 @@ instruct ReplB_mem(vec dst, memory mem) %{
ins_pipe( pipe_slow );
%}
instruct ReplB_imm(vec dst, immI con) %{
match(Set dst (ReplicateB con));
format %{ "replicateB $dst,$con" %}
ins_encode %{
InternalAddress addr = $constantaddress(T_BYTE, vreplicate_imm(T_BYTE, $con$$constant, Matcher::vector_length(this)));
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
%}
ins_pipe( pipe_slow );
%}
// ====================ReplicateS=======================================
instruct ReplS_reg(vec dst, rRegI src) %{
instruct vReplS_reg(vec dst, rRegI src) %{
predicate(UseAVX >= 2);
match(Set dst (ReplicateS src));
format %{ "replicateS $dst,$src" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
int vlen_enc = vector_length_encoding(this);
if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
int vlen_enc = vector_length_encoding(this);
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
} else if (VM_Version::supports_avx2()) {
int vlen_enc = vector_length_encoding(this);
} else {
__ movdl($dst$$XMMRegister, $src$$Register);
__ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
}
%}
ins_pipe( pipe_slow );
%}
instruct ReplS_reg(vec dst, rRegI src) %{
predicate(UseAVX < 2);
match(Set dst (ReplicateS src));
format %{ "replicateS $dst,$src" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
int vlen_enc = vector_length_encoding(this);
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
if (vlen >= 8) {
assert(vlen == 8, "");
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
if (vlen >= 16) {
assert(vlen == 16, "sanity");
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
}
}
%}
ins_pipe( pipe_slow );
%}
instruct ReplS_mem(vec dst, memory mem) %{
predicate(VM_Version::supports_avx2());
predicate(UseAVX >= 2);
match(Set dst (ReplicateS (LoadS mem)));
format %{ "replicateS $dst,$mem" %}
ins_encode %{
@ -4198,16 +4201,6 @@ instruct ReplS_mem(vec dst, memory mem) %{
ins_pipe( pipe_slow );
%}
instruct ReplS_imm(vec dst, immI con) %{
match(Set dst (ReplicateS con));
format %{ "replicateS $dst,$con" %}
ins_encode %{
InternalAddress addr = $constantaddress(T_SHORT, vreplicate_imm(T_SHORT, $con$$constant, Matcher::vector_length(this)));
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
%}
ins_pipe( pipe_slow );
%}
// ====================ReplicateI=======================================
instruct ReplI_reg(vec dst, rRegI src) %{
@ -4215,20 +4208,15 @@ instruct ReplI_reg(vec dst, rRegI src) %{
format %{ "replicateI $dst,$src" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
int vlen_enc = vector_length_encoding(this);
if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
} else if (VM_Version::supports_avx2()) {
int vlen_enc = vector_length_encoding(this);
__ movdl($dst$$XMMRegister, $src$$Register);
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
if (vlen >= 8) {
assert(vlen == 8, "sanity");
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
}
%}
ins_pipe( pipe_slow );
@ -4238,25 +4226,32 @@ instruct ReplI_mem(vec dst, memory mem) %{
match(Set dst (ReplicateI (LoadI mem)));
format %{ "replicateI $dst,$mem" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen <= 4) {
int vlen_enc = vector_length_encoding(this);
if (VM_Version::supports_avx2()) {
__ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
} else if (VM_Version::supports_avx()) {
__ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
} else {
__ movdl($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
} else {
assert(VM_Version::supports_avx2(), "sanity");
int vlen_enc = vector_length_encoding(this);
__ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
instruct ReplI_imm(vec dst, immI con) %{
match(Set dst (ReplicateB con));
match(Set dst (ReplicateS con));
match(Set dst (ReplicateI con));
format %{ "replicateI $dst,$con" %}
ins_encode %{
InternalAddress addr = $constantaddress(T_INT, vreplicate_imm(T_INT, $con$$constant, Matcher::vector_length(this)));
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this),
vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
(VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) /
type2aelembytes(Matcher::vector_element_basic_type(this))));
BasicType bt = Matcher::vector_element_basic_type(this);
int vlen = Matcher::vector_length_in_bytes(this);
__ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
%}
ins_pipe( pipe_slow );
%}
@ -4268,23 +4263,21 @@ instruct ReplI_zero(vec dst, immI_0 zero) %{
match(Set dst (ReplicateI zero));
format %{ "replicateI $dst,$zero" %}
ins_encode %{
uint vsize = Matcher::vector_length_in_bytes(this);
if (vsize <= 16) {
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
} else {
int vlen_enc = vector_length_encoding(this);
if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
}
%}
ins_pipe( fpu_reg_reg );
%}
instruct ReplI_M1(vec dst, immI_M1 con) %{
predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) >= 16);
predicate(UseSSE >= 2);
match(Set dst (ReplicateB con));
match(Set dst (ReplicateS con));
match(Set dst (ReplicateI con));
effect(TEMP dst);
format %{ "vallones $dst" %}
ins_encode %{
int vector_len = vector_length_encoding(this);
@ -4301,23 +4294,16 @@ instruct ReplL_reg(vec dst, rRegL src) %{
match(Set dst (ReplicateL src));
format %{ "replicateL $dst,$src" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen == 2) {
__ movdq($dst$$XMMRegister, $src$$Register);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
int vlen = Matcher::vector_length(this);
int vlen_enc = vector_length_encoding(this);
if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
__ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
} else if (VM_Version::supports_avx2()) {
assert(vlen == 4, "sanity");
int vlen_enc = vector_length_encoding(this);
__ movdq($dst$$XMMRegister, $src$$Register);
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
assert(vlen == 4, "sanity");
__ movdq($dst$$XMMRegister, $src$$Register);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
%}
ins_pipe( pipe_slow );
@ -4382,14 +4368,14 @@ instruct ReplL_mem(vec dst, memory mem) %{
match(Set dst (ReplicateL (LoadL mem)));
format %{ "replicateL $dst,$mem" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen == 2) {
int vlen_enc = vector_length_encoding(this);
if (VM_Version::supports_avx2()) {
__ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
} else if (VM_Version::supports_sse3()) {
__ movddup($dst$$XMMRegister, $mem$$Address);
} else {
__ movq($dst$$XMMRegister, $mem$$Address);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
} else {
assert(VM_Version::supports_avx2(), "sanity");
int vlen_enc = vector_length_encoding(this);
__ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
@ -4400,8 +4386,9 @@ instruct ReplL_imm(vec dst, immL con) %{
match(Set dst (ReplicateL con));
format %{ "replicateL $dst,$con" %}
ins_encode %{
InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, Matcher::vector_length(this)));
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1));
int vlen = Matcher::vector_length_in_bytes(this);
__ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
%}
ins_pipe( pipe_slow );
%}
@ -4410,21 +4397,19 @@ instruct ReplL_zero(vec dst, immL0 zero) %{
match(Set dst (ReplicateL zero));
format %{ "replicateL $dst,$zero" %}
ins_encode %{
int vlen = Matcher::vector_length(this);
if (vlen == 2) {
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
} else {
int vlen_enc = vector_length_encoding(this);
if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
}
%}
ins_pipe( fpu_reg_reg );
%}
instruct ReplL_M1(vec dst, immL_M1 con) %{
predicate(UseAVX > 0);
predicate(UseSSE >= 2);
match(Set dst (ReplicateL con));
effect(TEMP dst);
format %{ "vallones $dst" %}
ins_encode %{
int vector_len = vector_length_encoding(this);
@ -4435,38 +4420,43 @@ instruct ReplL_M1(vec dst, immL_M1 con) %{
// ====================ReplicateF=======================================
instruct ReplF_reg(vec dst, vlRegF src) %{
instruct vReplF_reg(vec dst, vlRegF src) %{
predicate(UseAVX > 0);
match(Set dst (ReplicateF src));
format %{ "replicateF $dst,$src" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen <= 4) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
} else if (VM_Version::supports_avx2()) {
int vlen_enc = vector_length_encoding(this);
if (vlen <= 4) {
__ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
} else if (VM_Version::supports_avx2()) {
__ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
} else {
assert(vlen == 8, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
__ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
%}
ins_pipe( pipe_slow );
%}
instruct ReplF_reg(vec dst, vlRegF src) %{
predicate(UseAVX == 0);
match(Set dst (ReplicateF src));
format %{ "replicateF $dst,$src" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( pipe_slow );
%}
instruct ReplF_mem(vec dst, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (ReplicateF (LoadF mem)));
format %{ "replicateF $dst,$mem" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen <= 4) {
__ movdl($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
} else {
assert(VM_Version::supports_avx(), "sanity");
int vlen_enc = vector_length_encoding(this);
__ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
@ -4476,8 +4466,10 @@ instruct ReplF_imm(vec dst, immF con) %{
match(Set dst (ReplicateF con));
format %{ "replicateF $dst,$con" %}
ins_encode %{
InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, Matcher::vector_length(this)));
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant,
VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2));
int vlen = Matcher::vector_length_in_bytes(this);
__ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
%}
ins_pipe( pipe_slow );
%}
@ -4486,12 +4478,11 @@ instruct ReplF_zero(vec dst, immF0 zero) %{
match(Set dst (ReplicateF zero));
format %{ "replicateF $dst,$zero" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen <= 4) {
__ xorps($dst$$XMMRegister, $dst$$XMMRegister);
} else {
int vlen_enc = vector_length_encoding(this);
__ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ
if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
__ xorps($dst$$XMMRegister, $dst$$XMMRegister);
}
%}
ins_pipe( fpu_reg_reg );
@ -4500,37 +4491,46 @@ instruct ReplF_zero(vec dst, immF0 zero) %{
// ====================ReplicateD=======================================
// Replicate double (8 bytes) scalar to be vector
instruct ReplD_reg(vec dst, vlRegD src) %{
instruct vReplD_reg(vec dst, vlRegD src) %{
predicate(UseSSE >= 3);
match(Set dst (ReplicateD src));
format %{ "replicateD $dst,$src" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen == 2) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
} else if (VM_Version::supports_avx2()) {
int vlen_enc = vector_length_encoding(this);
if (vlen <= 2) {
__ movddup($dst$$XMMRegister, $src$$XMMRegister);
} else if (VM_Version::supports_avx2()) {
__ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
} else {
assert(vlen == 4, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
__ movddup($dst$$XMMRegister, $src$$XMMRegister);
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
%}
ins_pipe( pipe_slow );
%}
instruct ReplD_reg(vec dst, vlRegD src) %{
predicate(UseSSE < 3);
match(Set dst (ReplicateD src));
format %{ "replicateD $dst,$src" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
%}
ins_pipe( pipe_slow );
%}
instruct ReplD_mem(vec dst, memory mem) %{
predicate(UseSSE >= 3);
match(Set dst (ReplicateD (LoadD mem)));
format %{ "replicateD $dst,$mem" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen == 2) {
__ movq($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44);
} else {
assert(VM_Version::supports_avx(), "sanity");
if (Matcher::vector_length(this) >= 4) {
int vlen_enc = vector_length_encoding(this);
__ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
} else {
__ movddup($dst$$XMMRegister, $mem$$Address);
}
%}
ins_pipe( pipe_slow );
@ -4541,8 +4541,9 @@ instruct ReplD_imm(vec dst, immD con) %{
match(Set dst (ReplicateD con));
format %{ "replicateD $dst,$con" %}
ins_encode %{
InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, Matcher::vector_length(this)));
__ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this));
InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1));
int vlen = Matcher::vector_length_in_bytes(this);
__ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
%}
ins_pipe( pipe_slow );
%}
@ -4551,12 +4552,11 @@ instruct ReplD_zero(vec dst, immD0 zero) %{
match(Set dst (ReplicateD zero));
format %{ "replicateD $dst,$zero" %}
ins_encode %{
uint vlen = Matcher::vector_length(this);
if (vlen == 2) {
__ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
} else {
int vlen_enc = vector_length_encoding(this);
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ
if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
__ xorps($dst$$XMMRegister, $dst$$XMMRegister);
}
%}
ins_pipe( fpu_reg_reg );
@ -8335,7 +8335,7 @@ instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
effect(TEMP_DEF dst, TEMP vtmp);
ins_encode %{
int vlen_enc = Assembler::AVX_128bit;
__ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
__ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
__ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
__ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
__ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);

View File

@ -3334,20 +3334,20 @@ void ADLParser::constant_parse_expression(EncClass* encoding, char* ec_name) {
if (_curchar == '(') {
parens_depth++;
encoding->add_code("(");
next_char();
next_char_or_line();
}
else if (_curchar == ')') {
parens_depth--;
if (parens_depth > 0)
encoding->add_code(")");
next_char();
next_char_or_line();
}
else {
// (1)
// Check if there is a string to pass through to output
char *start = _ptr; // Record start of the next string
while ((_curchar != '$') && (_curchar != '(') && (_curchar != ')')) {
next_char();
next_char_or_line();
}
// If a string was found, terminate it and record in EncClass
if (start != _ptr) {

View File

@ -65,7 +65,7 @@ InstructForm::InstructForm(const char *id, InstructForm *instr, MatchRule *rule)
: _ident(id), _ideal_only(false),
_localNames(instr->_localNames),
_effects(instr->_effects),
_is_mach_constant(false),
_is_mach_constant(instr->_is_mach_constant),
_needs_constant_base(false),
_has_call(false)
{
@ -4090,12 +4090,6 @@ int MatchRule::is_expensive() const {
strcmp(opType,"ReverseBytesL")==0 ||
strcmp(opType,"ReverseBytesUS")==0 ||
strcmp(opType,"ReverseBytesS")==0 ||
strcmp(opType,"ReplicateB")==0 ||
strcmp(opType,"ReplicateS")==0 ||
strcmp(opType,"ReplicateI")==0 ||
strcmp(opType,"ReplicateL")==0 ||
strcmp(opType,"ReplicateF")==0 ||
strcmp(opType,"ReplicateD")==0 ||
strcmp(opType,"PopulateIndex")==0 ||
strcmp(opType,"AddReductionVI")==0 ||
strcmp(opType,"AddReductionVL")==0 ||
@ -4111,9 +4105,10 @@ int MatchRule::is_expensive() const {
strcmp(opType,"OrReductionV")==0 ||
strcmp(opType,"XorReductionV")==0 ||
strcmp(opType,"MaskAll")==0 ||
0 /* 0 to line up columns nicely */ )
0 /* 0 to line up columns nicely */ ) {
return 1;
}
}
return 0;
}

View File

@ -429,11 +429,11 @@ class AbstractAssembler : public ResourceObj {
}
return ptr;
}
address array_constant(BasicType bt, GrowableArray<jvalue>* c) {
address array_constant(BasicType bt, GrowableArray<jvalue>* c, int alignment) {
CodeSection* c1 = _code_section;
int len = c->length();
int size = type2aelembytes(bt) * len;
address ptr = start_a_const(size, MIN2(round_up_power_of_2(size), 8));
address ptr = start_a_const(size, alignment);
if (ptr != NULL) {
for (int i = 0; i < len; i++) {
jvalue e = c->at(i);

View File

@ -36,7 +36,30 @@ bool ConstantTable::Constant::operator==(const Constant& other) {
if (type() != other.type() ) return false;
if (can_be_reused() != other.can_be_reused()) return false;
if (is_array() || other.is_array()) {
return is_array() && other.is_array() && _v._array == other._v._array;
if (is_array() != other.is_array() ||
get_array()->length() != other.get_array()->length()) {
return false;
}
for (int i = 0; i < get_array()->length(); i++) {
jvalue ele1 = get_array()->at(i);
jvalue ele2 = other.get_array()->at(i);
bool is_eq;
switch (type()) {
case T_BOOLEAN: is_eq = ele1.z == ele2.z; break;
case T_BYTE: is_eq = ele1.b == ele2.b; break;
case T_CHAR: is_eq = ele1.c == ele2.c; break;
case T_SHORT: is_eq = ele1.s == ele2.s; break;
case T_INT: is_eq = ele1.i == ele2.i; break;
case T_LONG: is_eq = ele1.j == ele2.j; break;
case T_FLOAT: is_eq = jint_cast(ele1.f) == jint_cast(ele2.f); break;
case T_DOUBLE: is_eq = jlong_cast(ele1.d) == jlong_cast(ele2.d); break;
default: ShouldNotReachHere(); is_eq = false;
}
if (!is_eq) {
return false;
}
}
return true;
}
// For floating point values we compare the bit pattern.
switch (type()) {
@ -104,7 +127,7 @@ void ConstantTable::calculate_offsets_and_size() {
// Align offset for type.
int typesize = constant_size(con);
assert(typesize <= 8 || con->is_array(), "sanity");
offset = align_up(offset, MIN2(round_up_power_of_2(typesize), 8));
offset = align_up(offset, con->alignment());
con->set_offset(offset); // set constant's offset
if (con->type() == T_VOID) {
@ -127,7 +150,7 @@ bool ConstantTable::emit(CodeBuffer& cb) const {
Constant con = _constants.at(i);
address constant_addr = NULL;
if (con.is_array()) {
constant_addr = _masm.array_constant(con.type(), con.get_array());
constant_addr = _masm.array_constant(con.type(), con.get_array(), con.alignment());
} else {
switch (con.type()) {
case T_INT: constant_addr = _masm.int_constant( con.get_jint() ); break;
@ -229,12 +252,18 @@ ConstantTable::Constant ConstantTable::add(Metadata* metadata) {
return con;
}
ConstantTable::Constant ConstantTable::add(MachConstantNode* n, BasicType bt, GrowableArray<jvalue>* array) {
Constant con(bt, array);
ConstantTable::Constant ConstantTable::add(MachConstantNode* n, BasicType bt,
GrowableArray<jvalue>* array, int alignment) {
Constant con(bt, array, alignment);
add(con);
return con;
}
ConstantTable::Constant ConstantTable::add(MachConstantNode* n, BasicType bt,
GrowableArray<jvalue>* array) {
return add(n, bt, array, array->length() * type2aelembytes(bt));
}
ConstantTable::Constant ConstantTable::add(MachConstantNode* n, MachOper* oper) {
jvalue value;
BasicType type = oper->type()->basic_type();

View File

@ -39,6 +39,7 @@ public:
private:
BasicType _type;
bool _is_array;
int _alignment;
union {
jvalue _value;
Metadata* _metadata;
@ -49,7 +50,7 @@ public:
bool _can_be_reused; // true (default) if the value can be shared with other users.
public:
Constant() : _type(T_ILLEGAL), _is_array(false), _offset(-1), _freq(0.0f), _can_be_reused(true) { _v._value.l = 0; }
Constant() : _type(T_ILLEGAL), _is_array(false), _alignment(-1), _offset(-1), _freq(0.0f), _can_be_reused(true) { _v._value.l = 0; }
Constant(BasicType type, jvalue value, float freq = 0.0f, bool can_be_reused = true) :
_type(type),
_is_array(false),
@ -59,24 +60,28 @@ public:
{
assert(type != T_METADATA, "wrong constructor");
_v._value = value;
_alignment = type == T_VOID ? sizeof(jobject) : type2aelembytes(type);
}
Constant(Metadata* metadata, bool can_be_reused = true) :
_type(T_METADATA),
_is_array(false),
_alignment(sizeof(Metadata*)),
_offset(-1),
_freq(0.0f),
_can_be_reused(can_be_reused)
{
_v._metadata = metadata;
}
Constant(BasicType type, GrowableArray<jvalue>* array) :
Constant(BasicType type, GrowableArray<jvalue>* array, int alignment, bool can_be_reused = true) :
_type(type),
_is_array(true),
_alignment(alignment),
_offset(-1),
_freq(0.0f),
_can_be_reused(false)
_can_be_reused(can_be_reused)
{
assert(is_java_primitive(type), "not applicable for %s", type2name(type));
assert(is_power_of_2(alignment), "invalid alignment %d", alignment);
_v._array = new GrowableArray<jvalue>(array->length());
for (jvalue ele : *array) {
_v._array->append(ele);
@ -87,6 +92,7 @@ public:
BasicType type() const { return _type; }
bool is_array() const { return _is_array; }
int alignment() const { return _alignment; }
jint get_jint() const { return _v._value.i; }
jlong get_jlong() const { return _v._value.j; }
@ -145,6 +151,7 @@ public:
Constant add(MachConstantNode* n, BasicType type, jvalue value);
Constant add(Metadata* metadata);
Constant add(MachConstantNode* n, BasicType bt, GrowableArray<jvalue>* array);
Constant add(MachConstantNode* n, BasicType bt, GrowableArray<jvalue>* array, int alignment);
Constant add(MachConstantNode* n, MachOper* oper);
Constant add(MachConstantNode* n, jint i) {
jvalue value; value.i = i;

View File

@ -474,14 +474,15 @@ bool MachNode::rematerialize() const {
}
// Stretching lots of inputs - don't do it.
if (req() > 2) {
// A MachContant has the last input being the constant base
if (req() > (is_MachConstant() ? 3U : 2U)) {
return false;
}
if (req() == 2 && in(1) && in(1)->ideal_reg() == Op_RegFlags) {
if (req() >= 2 && in(1) && in(1)->ideal_reg() == Op_RegFlags) {
// In(1) will be rematerialized, too.
// Stretching lots of inputs - don't do it.
if (in(1)->req() > 2) {
if (in(1)->req() > (in(1)->is_MachConstant() ? 3U : 2U)) {
return false;
}
}
@ -491,7 +492,7 @@ bool MachNode::rematerialize() const {
uint idx = oper_input_base();
if (req() > idx) {
const RegMask &rm = in_RegMask(idx);
if (rm.is_bound(ideal_reg())) {
if (rm.is_NotEmpty() && rm.is_bound(ideal_reg())) {
return false;
}
}

View File

@ -0,0 +1,119 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.jdk.incubator.vector;
import java.util.concurrent.TimeUnit;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import org.openjdk.jmh.annotations.*;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(1)
public class SpiltReplicate {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public long broadcastInt() {
var species = IntVector.SPECIES_PREFERRED;
var sum = IntVector.zero(species);
return sum.add(1).add(2).add(3).add(4).add(5).add(6).add(7).add(8)
.add(9).add(10).add(11).add(12).add(13).add(14).add(15).add(16)
.add(17).add(18).add(19).add(20).add(21).add(22).add(23).add(24)
.add(25).add(26).add(27).add(28).add(29).add(30).add(31).add(32)
.add(1).add(2).add(3).add(4).add(5).add(6).add(7).add(8)
.add(9).add(10).add(11).add(12).add(13).add(14).add(15).add(16)
.add(17).add(18).add(19).add(20).add(21).add(22).add(23).add(24)
.add(25).add(26).add(27).add(28).add(29).add(30).add(31).add(32)
.reinterpretAsLongs()
.lane(0);
}
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public long broadcastLong() {
var species = LongVector.SPECIES_PREFERRED;
var sum = LongVector.zero(species);
return sum.add(1).add(2).add(3).add(4).add(5).add(6).add(7).add(8)
.add(9).add(10).add(11).add(12).add(13).add(14).add(15).add(16)
.add(17).add(18).add(19).add(20).add(21).add(22).add(23).add(24)
.add(25).add(26).add(27).add(28).add(29).add(30).add(31).add(32)
.add(1).add(2).add(3).add(4).add(5).add(6).add(7).add(8)
.add(9).add(10).add(11).add(12).add(13).add(14).add(15).add(16)
.add(17).add(18).add(19).add(20).add(21).add(22).add(23).add(24)
.add(25).add(26).add(27).add(28).add(29).add(30).add(31).add(32)
.reinterpretAsLongs()
.lane(0);
}
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public long broadcastFloat() {
var species = FloatVector.SPECIES_PREFERRED;
var sum = FloatVector.zero(species);
return sum.add(1).add(2).add(3).add(4).add(5).add(6).add(7).add(8)
.add(9).add(10).add(11).add(12).add(13).add(14).add(15).add(16)
.add(17).add(18).add(19).add(20).add(21).add(22).add(23).add(24)
.add(25).add(26).add(27).add(28).add(29).add(30).add(31).add(32)
.add(1).add(2).add(3).add(4).add(5).add(6).add(7).add(8)
.add(9).add(10).add(11).add(12).add(13).add(14).add(15).add(16)
.add(17).add(18).add(19).add(20).add(21).add(22).add(23).add(24)
.add(25).add(26).add(27).add(28).add(29).add(30).add(31).add(32)
.reinterpretAsLongs()
.lane(0);
}
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public long broadcastDouble() {
var species = DoubleVector.SPECIES_PREFERRED;
var sum = DoubleVector.zero(species);
return sum.add(1).add(2).add(3).add(4).add(5).add(6).add(7).add(8)
.add(9).add(10).add(11).add(12).add(13).add(14).add(15).add(16)
.add(17).add(18).add(19).add(20).add(21).add(22).add(23).add(24)
.add(25).add(26).add(27).add(28).add(29).add(30).add(31).add(32)
.add(1).add(2).add(3).add(4).add(5).add(6).add(7).add(8)
.add(9).add(10).add(11).add(12).add(13).add(14).add(15).add(16)
.add(17).add(18).add(19).add(20).add(21).add(22).add(23).add(24)
.add(25).add(26).add(27).add(28).add(29).add(30).add(31).add(32)
.reinterpretAsLongs()
.lane(0);
}
@Benchmark
public void testInt() {
broadcastInt();
}
@Benchmark
public void testLong() {
broadcastLong();
}
@Benchmark
public void testFloat() {
broadcastFloat();
}
@Benchmark
public void testDouble() {
broadcastDouble();
}
}