8275047: Optimize existing fill stubs for AVX-512 target
Reviewed-by: kvn, redestad
This commit is contained in:
parent
63e0f344e9
commit
4be88d5482
@ -5026,7 +5026,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
|
|||||||
|
|
||||||
BIND(L_loop);
|
BIND(L_loop);
|
||||||
if (MaxVectorSize >= 32) {
|
if (MaxVectorSize >= 32) {
|
||||||
fill64_avx(base, 0, xtmp, use64byteVector);
|
fill64(base, 0, xtmp, use64byteVector);
|
||||||
} else {
|
} else {
|
||||||
movdqu(Address(base, 0), xtmp);
|
movdqu(Address(base, 0), xtmp);
|
||||||
movdqu(Address(base, 16), xtmp);
|
movdqu(Address(base, 16), xtmp);
|
||||||
@ -5043,7 +5043,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
|
|||||||
if (use64byteVector) {
|
if (use64byteVector) {
|
||||||
addptr(cnt, 8);
|
addptr(cnt, 8);
|
||||||
jccb(Assembler::equal, L_end);
|
jccb(Assembler::equal, L_end);
|
||||||
fill64_masked_avx(3, base, 0, xtmp, mask, cnt, rtmp, true);
|
fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true);
|
||||||
jmp(L_end);
|
jmp(L_end);
|
||||||
} else {
|
} else {
|
||||||
addptr(cnt, 4);
|
addptr(cnt, 4);
|
||||||
@ -5062,7 +5062,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
|
|||||||
addptr(cnt, 4);
|
addptr(cnt, 4);
|
||||||
jccb(Assembler::lessEqual, L_end);
|
jccb(Assembler::lessEqual, L_end);
|
||||||
if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
|
if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
|
||||||
fill32_masked_avx(3, base, 0, xtmp, mask, cnt, rtmp);
|
fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp);
|
||||||
} else {
|
} else {
|
||||||
decrement(cnt);
|
decrement(cnt);
|
||||||
|
|
||||||
@ -5086,7 +5086,7 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
|
|||||||
// 64 byte initialization loop.
|
// 64 byte initialization loop.
|
||||||
vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
|
vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
|
||||||
for (int i = 0; i < vector64_count; i++) {
|
for (int i = 0; i < vector64_count; i++) {
|
||||||
fill64_avx(base, i * 64, xtmp, use64byteVector);
|
fill64(base, i * 64, xtmp, use64byteVector);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear remaining 64 byte tail.
|
// Clear remaining 64 byte tail.
|
||||||
@ -5207,6 +5207,15 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
|
|||||||
Label L_exit;
|
Label L_exit;
|
||||||
Label L_fill_2_bytes, L_fill_4_bytes;
|
Label L_fill_2_bytes, L_fill_4_bytes;
|
||||||
|
|
||||||
|
#if defined(COMPILER2) && defined(_LP64)
|
||||||
|
if(MaxVectorSize >=32 &&
|
||||||
|
VM_Version::supports_avx512vlbw() &&
|
||||||
|
VM_Version::supports_bmi2()) {
|
||||||
|
generate_fill_avx3(t, to, value, count, rtmp, xtmp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int shift = -1;
|
int shift = -1;
|
||||||
switch (t) {
|
switch (t) {
|
||||||
case T_BYTE:
|
case T_BYTE:
|
||||||
@ -5427,6 +5436,30 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
|
|||||||
BIND(L_exit);
|
BIND(L_exit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len) {
|
||||||
|
switch(type) {
|
||||||
|
case T_BYTE:
|
||||||
|
case T_BOOLEAN:
|
||||||
|
evpbroadcastb(dst, src, vector_len);
|
||||||
|
break;
|
||||||
|
case T_SHORT:
|
||||||
|
case T_CHAR:
|
||||||
|
evpbroadcastw(dst, src, vector_len);
|
||||||
|
break;
|
||||||
|
case T_INT:
|
||||||
|
case T_FLOAT:
|
||||||
|
evpbroadcastd(dst, src, vector_len);
|
||||||
|
break;
|
||||||
|
case T_LONG:
|
||||||
|
case T_DOUBLE:
|
||||||
|
evpbroadcastq(dst, src, vector_len);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fatal("Unhandled type : %s", type2name(type));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// encode char[] to byte[] in ISO_8859_1 or ASCII
|
// encode char[] to byte[] in ISO_8859_1 or ASCII
|
||||||
//@IntrinsicCandidate
|
//@IntrinsicCandidate
|
||||||
//private static int implEncodeISOArray(byte[] sa, int sp,
|
//private static int implEncodeISOArray(byte[] sa, int sp,
|
||||||
@ -8236,59 +8269,234 @@ void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMR
|
|||||||
|
|
||||||
#if COMPILER2_OR_JVMCI
|
#if COMPILER2_OR_JVMCI
|
||||||
|
|
||||||
|
void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
|
||||||
|
Register length, Register temp, int vec_enc) {
|
||||||
|
// Computing mask for predicated vector store.
|
||||||
|
movptr(temp, -1);
|
||||||
|
bzhiq(temp, temp, length);
|
||||||
|
kmov(mask, temp);
|
||||||
|
evmovdqu(bt, mask, dst, xmm, vec_enc);
|
||||||
|
}
|
||||||
|
|
||||||
// Set memory operation for length "less than" 64 bytes.
|
// Set memory operation for length "less than" 64 bytes.
|
||||||
void MacroAssembler::fill64_masked_avx(uint shift, Register dst, int disp,
|
void MacroAssembler::fill64_masked(uint shift, Register dst, int disp,
|
||||||
XMMRegister xmm, KRegister mask, Register length,
|
XMMRegister xmm, KRegister mask, Register length,
|
||||||
Register temp, bool use64byteVector) {
|
Register temp, bool use64byteVector) {
|
||||||
assert(MaxVectorSize >= 32, "vector length should be >= 32");
|
assert(MaxVectorSize >= 32, "vector length should be >= 32");
|
||||||
assert(shift != 0, "shift value should be 1 (short),2(int) or 3(long)");
|
|
||||||
BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
|
BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
|
||||||
if (!use64byteVector) {
|
if (!use64byteVector) {
|
||||||
fill32_avx(dst, disp, xmm);
|
fill32(dst, disp, xmm);
|
||||||
subptr(length, 32 >> shift);
|
subptr(length, 32 >> shift);
|
||||||
fill32_masked_avx(shift, dst, disp + 32, xmm, mask, length, temp);
|
fill32_masked(shift, dst, disp + 32, xmm, mask, length, temp);
|
||||||
} else {
|
} else {
|
||||||
assert(MaxVectorSize == 64, "vector length != 64");
|
assert(MaxVectorSize == 64, "vector length != 64");
|
||||||
movl(temp, 1);
|
fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_512bit);
|
||||||
shlxl(temp, temp, length);
|
|
||||||
subptr(temp, 1);
|
|
||||||
kmovwl(mask, temp);
|
|
||||||
evmovdqu(type[shift], mask, Address(dst, disp), xmm, Assembler::AVX_512bit);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void MacroAssembler::fill32_masked_avx(uint shift, Register dst, int disp,
|
void MacroAssembler::fill32_masked(uint shift, Register dst, int disp,
|
||||||
XMMRegister xmm, KRegister mask, Register length,
|
XMMRegister xmm, KRegister mask, Register length,
|
||||||
Register temp) {
|
Register temp) {
|
||||||
assert(MaxVectorSize >= 32, "vector length should be >= 32");
|
assert(MaxVectorSize >= 32, "vector length should be >= 32");
|
||||||
assert(shift != 0, "shift value should be 1 (short), 2(int) or 3(long)");
|
|
||||||
BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
|
BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
|
||||||
movl(temp, 1);
|
fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_256bit);
|
||||||
shlxl(temp, temp, length);
|
|
||||||
subptr(temp, 1);
|
|
||||||
kmovwl(mask, temp);
|
|
||||||
evmovdqu(type[shift], mask, Address(dst, disp), xmm, Assembler::AVX_256bit);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void MacroAssembler::fill32_avx(Register dst, int disp, XMMRegister xmm) {
|
void MacroAssembler::fill32(Register dst, int disp, XMMRegister xmm) {
|
||||||
assert(MaxVectorSize >= 32, "vector length should be >= 32");
|
assert(MaxVectorSize >= 32, "vector length should be >= 32");
|
||||||
vmovdqu(Address(dst, disp), xmm);
|
vmovdqu(Address(dst, disp), xmm);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroAssembler::fill64_avx(Register dst, int disp, XMMRegister xmm, bool use64byteVector) {
|
void MacroAssembler::fill64(Register dst, int disp, XMMRegister xmm, bool use64byteVector) {
|
||||||
assert(MaxVectorSize >= 32, "vector length should be >= 32");
|
assert(MaxVectorSize >= 32, "vector length should be >= 32");
|
||||||
BasicType type[] = {T_BYTE, T_SHORT, T_INT, T_LONG};
|
BasicType type[] = {T_BYTE, T_SHORT, T_INT, T_LONG};
|
||||||
if (!use64byteVector) {
|
if (!use64byteVector) {
|
||||||
fill32_avx(dst, disp, xmm);
|
fill32(dst, disp, xmm);
|
||||||
fill32_avx(dst, disp + 32, xmm);
|
fill32(dst, disp + 32, xmm);
|
||||||
} else {
|
} else {
|
||||||
evmovdquq(Address(dst, disp), xmm, Assembler::AVX_512bit);
|
evmovdquq(Address(dst, disp), xmm, Assembler::AVX_512bit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register value,
|
||||||
|
Register count, Register rtmp, XMMRegister xtmp) {
|
||||||
|
Label L_exit;
|
||||||
|
Label L_fill_start;
|
||||||
|
Label L_fill_64_bytes;
|
||||||
|
Label L_fill_96_bytes;
|
||||||
|
Label L_fill_128_bytes;
|
||||||
|
Label L_fill_128_bytes_loop;
|
||||||
|
Label L_fill_128_loop_header;
|
||||||
|
Label L_fill_128_bytes_loop_header;
|
||||||
|
Label L_fill_128_bytes_loop_pre_header;
|
||||||
|
Label L_fill_zmm_sequence;
|
||||||
|
|
||||||
|
int shift = -1;
|
||||||
|
switch(type) {
|
||||||
|
case T_BYTE: shift = 0;
|
||||||
|
break;
|
||||||
|
case T_SHORT: shift = 1;
|
||||||
|
break;
|
||||||
|
case T_INT: shift = 2;
|
||||||
|
break;
|
||||||
|
/* Uncomment when LONG fill stubs are supported.
|
||||||
|
case T_LONG: shift = 3;
|
||||||
|
break;
|
||||||
|
*/
|
||||||
|
default:
|
||||||
|
fatal("Unhandled type: %s\n", type2name(type));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (AVX3Threshold != 0 || MaxVectorSize == 32) {
|
||||||
|
|
||||||
|
if (MaxVectorSize == 64) {
|
||||||
|
cmpq(count, AVX3Threshold >> shift);
|
||||||
|
jcc(Assembler::greater, L_fill_zmm_sequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
evpbroadcast(type, xtmp, value, Assembler::AVX_256bit);
|
||||||
|
|
||||||
|
bind(L_fill_start);
|
||||||
|
|
||||||
|
cmpq(count, 32 >> shift);
|
||||||
|
jccb(Assembler::greater, L_fill_64_bytes);
|
||||||
|
fill32_masked(shift, to, 0, xtmp, k2, count, rtmp);
|
||||||
|
jmp(L_exit);
|
||||||
|
|
||||||
|
bind(L_fill_64_bytes);
|
||||||
|
cmpq(count, 64 >> shift);
|
||||||
|
jccb(Assembler::greater, L_fill_96_bytes);
|
||||||
|
fill64_masked(shift, to, 0, xtmp, k2, count, rtmp);
|
||||||
|
jmp(L_exit);
|
||||||
|
|
||||||
|
bind(L_fill_96_bytes);
|
||||||
|
cmpq(count, 96 >> shift);
|
||||||
|
jccb(Assembler::greater, L_fill_128_bytes);
|
||||||
|
fill64(to, 0, xtmp);
|
||||||
|
subq(count, 64 >> shift);
|
||||||
|
fill32_masked(shift, to, 64, xtmp, k2, count, rtmp);
|
||||||
|
jmp(L_exit);
|
||||||
|
|
||||||
|
bind(L_fill_128_bytes);
|
||||||
|
cmpq(count, 128 >> shift);
|
||||||
|
jccb(Assembler::greater, L_fill_128_bytes_loop_pre_header);
|
||||||
|
fill64(to, 0, xtmp);
|
||||||
|
fill32(to, 64, xtmp);
|
||||||
|
subq(count, 96 >> shift);
|
||||||
|
fill32_masked(shift, to, 96, xtmp, k2, count, rtmp);
|
||||||
|
jmp(L_exit);
|
||||||
|
|
||||||
|
bind(L_fill_128_bytes_loop_pre_header);
|
||||||
|
{
|
||||||
|
mov(rtmp, to);
|
||||||
|
andq(rtmp, 31);
|
||||||
|
jccb(Assembler::zero, L_fill_128_bytes_loop_header);
|
||||||
|
negq(rtmp);
|
||||||
|
addq(rtmp, 32);
|
||||||
|
mov64(r8, -1L);
|
||||||
|
bzhiq(r8, r8, rtmp);
|
||||||
|
kmovql(k2, r8);
|
||||||
|
evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_256bit);
|
||||||
|
addq(to, rtmp);
|
||||||
|
shrq(rtmp, shift);
|
||||||
|
subq(count, rtmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmpq(count, 128 >> shift);
|
||||||
|
jcc(Assembler::less, L_fill_start);
|
||||||
|
|
||||||
|
bind(L_fill_128_bytes_loop_header);
|
||||||
|
subq(count, 128 >> shift);
|
||||||
|
|
||||||
|
align32();
|
||||||
|
bind(L_fill_128_bytes_loop);
|
||||||
|
fill64(to, 0, xtmp);
|
||||||
|
fill64(to, 64, xtmp);
|
||||||
|
addq(to, 128);
|
||||||
|
subq(count, 128 >> shift);
|
||||||
|
jccb(Assembler::greaterEqual, L_fill_128_bytes_loop);
|
||||||
|
|
||||||
|
addq(count, 128 >> shift);
|
||||||
|
jcc(Assembler::zero, L_exit);
|
||||||
|
jmp(L_fill_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (MaxVectorSize == 64) {
|
||||||
|
// Sequence using 64 byte ZMM register.
|
||||||
|
Label L_fill_128_bytes_zmm;
|
||||||
|
Label L_fill_192_bytes_zmm;
|
||||||
|
Label L_fill_192_bytes_loop_zmm;
|
||||||
|
Label L_fill_192_bytes_loop_header_zmm;
|
||||||
|
Label L_fill_192_bytes_loop_pre_header_zmm;
|
||||||
|
Label L_fill_start_zmm_sequence;
|
||||||
|
|
||||||
|
bind(L_fill_zmm_sequence);
|
||||||
|
evpbroadcast(type, xtmp, value, Assembler::AVX_512bit);
|
||||||
|
|
||||||
|
bind(L_fill_start_zmm_sequence);
|
||||||
|
cmpq(count, 64 >> shift);
|
||||||
|
jccb(Assembler::greater, L_fill_128_bytes_zmm);
|
||||||
|
fill64_masked(shift, to, 0, xtmp, k2, count, rtmp, true);
|
||||||
|
jmp(L_exit);
|
||||||
|
|
||||||
|
bind(L_fill_128_bytes_zmm);
|
||||||
|
cmpq(count, 128 >> shift);
|
||||||
|
jccb(Assembler::greater, L_fill_192_bytes_zmm);
|
||||||
|
fill64(to, 0, xtmp, true);
|
||||||
|
subq(count, 64 >> shift);
|
||||||
|
fill64_masked(shift, to, 64, xtmp, k2, count, rtmp, true);
|
||||||
|
jmp(L_exit);
|
||||||
|
|
||||||
|
bind(L_fill_192_bytes_zmm);
|
||||||
|
cmpq(count, 192 >> shift);
|
||||||
|
jccb(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm);
|
||||||
|
fill64(to, 0, xtmp, true);
|
||||||
|
fill64(to, 64, xtmp, true);
|
||||||
|
subq(count, 128 >> shift);
|
||||||
|
fill64_masked(shift, to, 128, xtmp, k2, count, rtmp, true);
|
||||||
|
jmp(L_exit);
|
||||||
|
|
||||||
|
bind(L_fill_192_bytes_loop_pre_header_zmm);
|
||||||
|
{
|
||||||
|
movq(rtmp, to);
|
||||||
|
andq(rtmp, 63);
|
||||||
|
jccb(Assembler::zero, L_fill_192_bytes_loop_header_zmm);
|
||||||
|
negq(rtmp);
|
||||||
|
addq(rtmp, 64);
|
||||||
|
mov64(r8, -1L);
|
||||||
|
bzhiq(r8, r8, rtmp);
|
||||||
|
kmovql(k2, r8);
|
||||||
|
evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_512bit);
|
||||||
|
addq(to, rtmp);
|
||||||
|
shrq(rtmp, shift);
|
||||||
|
subq(count, rtmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmpq(count, 192 >> shift);
|
||||||
|
jcc(Assembler::less, L_fill_start_zmm_sequence);
|
||||||
|
|
||||||
|
bind(L_fill_192_bytes_loop_header_zmm);
|
||||||
|
subq(count, 192 >> shift);
|
||||||
|
|
||||||
|
align32();
|
||||||
|
bind(L_fill_192_bytes_loop_zmm);
|
||||||
|
fill64(to, 0, xtmp, true);
|
||||||
|
fill64(to, 64, xtmp, true);
|
||||||
|
fill64(to, 128, xtmp, true);
|
||||||
|
addq(to, 192);
|
||||||
|
subq(count, 192 >> shift);
|
||||||
|
jccb(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm);
|
||||||
|
|
||||||
|
addq(count, 192 >> shift);
|
||||||
|
jcc(Assembler::zero, L_exit);
|
||||||
|
jmp(L_fill_start_zmm_sequence);
|
||||||
|
}
|
||||||
|
bind(L_exit);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif //COMPILER2_OR_JVMCI
|
#endif //COMPILER2_OR_JVMCI
|
||||||
|
|
||||||
|
|
||||||
|
@ -1305,6 +1305,7 @@ public:
|
|||||||
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||||
int comparison, bool is_signed, int vector_len, Register scratch_reg);
|
int comparison, bool is_signed, int vector_len, Register scratch_reg);
|
||||||
|
|
||||||
|
void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len);
|
||||||
|
|
||||||
// Emit comparison instruction for the specified comparison predicate.
|
// Emit comparison instruction for the specified comparison predicate.
|
||||||
void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
|
void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
|
||||||
@ -1838,17 +1839,20 @@ public:
|
|||||||
void byte_array_inflate(Register src, Register dst, Register len,
|
void byte_array_inflate(Register src, Register dst, Register len,
|
||||||
XMMRegister tmp1, Register tmp2, KRegister mask = knoreg);
|
XMMRegister tmp1, Register tmp2, KRegister mask = knoreg);
|
||||||
|
|
||||||
void fill64_masked_avx(uint shift, Register dst, int disp,
|
void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
|
||||||
|
Register length, Register temp, int vec_enc);
|
||||||
|
|
||||||
|
void fill64_masked(uint shift, Register dst, int disp,
|
||||||
XMMRegister xmm, KRegister mask, Register length,
|
XMMRegister xmm, KRegister mask, Register length,
|
||||||
Register temp, bool use64byteVector = false);
|
Register temp, bool use64byteVector = false);
|
||||||
|
|
||||||
void fill32_masked_avx(uint shift, Register dst, int disp,
|
void fill32_masked(uint shift, Register dst, int disp,
|
||||||
XMMRegister xmm, KRegister mask, Register length,
|
XMMRegister xmm, KRegister mask, Register length,
|
||||||
Register temp);
|
Register temp);
|
||||||
|
|
||||||
void fill32_avx(Register dst, int disp, XMMRegister xmm);
|
void fill32(Register dst, int disp, XMMRegister xmm);
|
||||||
|
|
||||||
void fill64_avx(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false);
|
void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false);
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
void convert_f2i(Register dst, XMMRegister src);
|
void convert_f2i(Register dst, XMMRegister src);
|
||||||
@ -1885,6 +1889,10 @@ public:
|
|||||||
void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
|
void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
|
||||||
bool conjoint, int shift = Address::times_1, int offset = 0,
|
bool conjoint, int shift = Address::times_1, int offset = 0,
|
||||||
bool use64byteVector = false);
|
bool use64byteVector = false);
|
||||||
|
|
||||||
|
void generate_fill_avx3(BasicType type, Register to, Register value,
|
||||||
|
Register count, Register rtmp, XMMRegister xtmp);
|
||||||
|
|
||||||
#endif // COMPILER2_OR_JVMCI
|
#endif // COMPILER2_OR_JVMCI
|
||||||
|
|
||||||
#endif // _LP64
|
#endif // _LP64
|
||||||
|
@ -2113,13 +2113,14 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
|
|
||||||
BLOCK_COMMENT("Entry:");
|
BLOCK_COMMENT("Entry:");
|
||||||
|
|
||||||
const Register to = c_rarg0; // source array address
|
const Register to = c_rarg0; // destination array address
|
||||||
const Register value = c_rarg1; // value
|
const Register value = c_rarg1; // value
|
||||||
const Register count = c_rarg2; // elements count
|
const Register count = c_rarg2; // elements count
|
||||||
|
__ mov(r11, count);
|
||||||
|
|
||||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
|
||||||
__ generate_fill(t, aligned, to, value, count, rax, xmm0);
|
__ generate_fill(t, aligned, to, value, r11, rax, xmm0);
|
||||||
|
|
||||||
__ vzeroupper();
|
__ vzeroupper();
|
||||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
|
|||||||
|
|
||||||
enum platform_dependent_constants {
|
enum platform_dependent_constants {
|
||||||
code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small)
|
code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small)
|
||||||
code_size2 = 35300 LP64_ONLY(+32000) // simply increase if too small (assembler will crash if too small)
|
code_size2 = 35300 LP64_ONLY(+35000) // simply increase if too small (assembler will crash if too small)
|
||||||
};
|
};
|
||||||
|
|
||||||
class x86 {
|
class x86 {
|
||||||
|
@ -1469,6 +1469,14 @@ void VM_Version::get_processor_features() {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef COMPILER2
|
||||||
|
if (FLAG_IS_DEFAULT(OptimizeFill)) {
|
||||||
|
if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
|
||||||
|
OptimizeFill = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
if (UseSSE42Intrinsics) {
|
if (UseSSE42Intrinsics) {
|
||||||
if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
|
if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
|
||||||
@ -1585,12 +1593,6 @@ void VM_Version::get_processor_features() {
|
|||||||
// Modern processors allow misaligned memory operations for vectors.
|
// Modern processors allow misaligned memory operations for vectors.
|
||||||
AlignVector = !UseUnalignedLoadStores;
|
AlignVector = !UseUnalignedLoadStores;
|
||||||
}
|
}
|
||||||
if (FLAG_IS_DEFAULT(OptimizeFill)) {
|
|
||||||
// 8247307: On x86, the auto-vectorized loop array fill code shows
|
|
||||||
// better performance than the array fill stubs. We should reenable
|
|
||||||
// this after the x86 stubs get improved.
|
|
||||||
OptimizeFill = false;
|
|
||||||
}
|
|
||||||
#endif // COMPILER2
|
#endif // COMPILER2
|
||||||
|
|
||||||
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
|
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -36,12 +36,12 @@ import org.openjdk.jmh.annotations.Warmup;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
@BenchmarkMode(Mode.AverageTime)
|
@BenchmarkMode(Mode.Throughput)
|
||||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||||
@State(Scope.Thread)
|
@State(Scope.Thread)
|
||||||
public class ArraysFill {
|
public class ArraysFill {
|
||||||
|
|
||||||
@Param({"10", "266", "2048"})
|
@Param({"10", "16", "31", "59", "89", "126", "250", "266", "511", "1021", "2047", "2048", "4095", "8195"})
|
||||||
public int size;
|
public int size;
|
||||||
|
|
||||||
public byte[] testByteArray;
|
public byte[] testByteArray;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user