8288043: Optimize FP to word/sub-word integral type conversion on X86 AVX2 platforms
Reviewed-by: kvn, sviswanathan
This commit is contained in:
parent
703a6ef591
commit
2ceb80c60f
@ -2157,6 +2157,13 @@ void Assembler::vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
emit_int16(0x5B, (0xC0 | encode));
|
emit_int16(0x5B, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vcvttpd2dq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
|
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
|
||||||
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int16((unsigned char)0xE6, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
|
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
@ -2165,7 +2172,7 @@ void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
assert(VM_Version::supports_avx512dq(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
attributes.set_is_evex_instruction();
|
attributes.set_is_evex_instruction();
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||||
@ -2173,7 +2180,7 @@ void Assembler::evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
assert(VM_Version::supports_avx512dq(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
attributes.set_is_evex_instruction();
|
attributes.set_is_evex_instruction();
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||||
@ -2181,7 +2188,7 @@ void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
assert(VM_Version::supports_avx512dq(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
attributes.set_is_evex_instruction();
|
attributes.set_is_evex_instruction();
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||||
@ -2189,7 +2196,7 @@ void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
assert(VM_Version::supports_avx512dq(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
attributes.set_is_evex_instruction();
|
attributes.set_is_evex_instruction();
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||||
@ -2197,7 +2204,7 @@ void Assembler::evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
assert(VM_Version::supports_avx512dq(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
attributes.set_is_evex_instruction();
|
attributes.set_is_evex_instruction();
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||||
@ -2205,7 +2212,7 @@ void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::evpmovwb(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::evpmovwb(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(UseAVX > 2 && VM_Version::supports_avx512bw(), "");
|
assert(VM_Version::supports_avx512bw(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
attributes.set_is_evex_instruction();
|
attributes.set_is_evex_instruction();
|
||||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
@ -1201,6 +1201,9 @@ private:
|
|||||||
void evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
|
void evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
|
void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
|
// Convert vector double to int
|
||||||
|
void vcvttpd2dq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Evex casts with truncation
|
// Evex casts with truncation
|
||||||
void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
|
void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
|
void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
@ -4359,9 +4359,11 @@ void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
|
|||||||
* If the src is positive infinity or any value greater than or equal to the value of Integer.MAX_VALUE,
|
* If the src is positive infinity or any value greater than or equal to the value of Integer.MAX_VALUE,
|
||||||
* the result is equal to the value of Integer.MAX_VALUE.
|
* the result is equal to the value of Integer.MAX_VALUE.
|
||||||
*/
|
*/
|
||||||
void C2_MacroAssembler::vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
void C2_MacroAssembler::vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
||||||
Register rscratch) {
|
Register rscratch, AddressLiteral float_sign_flip,
|
||||||
|
int vec_enc) {
|
||||||
|
assert(rscratch != noreg || always_reachable(float_sign_flip), "missing");
|
||||||
Label done;
|
Label done;
|
||||||
vmovdqu(xtmp1, float_sign_flip, vec_enc, rscratch);
|
vmovdqu(xtmp1, float_sign_flip, vec_enc, rscratch);
|
||||||
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
|
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
|
||||||
@ -4387,10 +4389,11 @@ void C2_MacroAssembler::vector_cast_float_special_cases_avx(XMMRegister dst, XMM
|
|||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
void C2_MacroAssembler::vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2,
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
KRegister ktmp1, KRegister ktmp2,
|
Register rscratch, AddressLiteral float_sign_flip,
|
||||||
Register rscratch) {
|
int vec_enc) {
|
||||||
|
assert(rscratch != noreg || always_reachable(float_sign_flip), "missing");
|
||||||
Label done;
|
Label done;
|
||||||
evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, rscratch);
|
evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, rscratch);
|
||||||
Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
|
Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
|
||||||
@ -4408,11 +4411,10 @@ void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XM
|
|||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src,
|
void C2_MacroAssembler::vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
AddressLiteral double_sign_flip, int vec_enc,
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2,
|
Register rscratch, AddressLiteral double_sign_flip,
|
||||||
KRegister ktmp1, KRegister ktmp2,
|
int vec_enc) {
|
||||||
Register rscratch) {
|
|
||||||
assert(rscratch != noreg || always_reachable(double_sign_flip), "missing");
|
assert(rscratch != noreg || always_reachable(double_sign_flip), "missing");
|
||||||
|
|
||||||
Label done;
|
Label done;
|
||||||
@ -4432,6 +4434,28 @@ void C2_MacroAssembler::vector_cast_float_to_long_special_cases_evex(XMMRegister
|
|||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
|
Register rscratch, AddressLiteral float_sign_flip,
|
||||||
|
int vec_enc) {
|
||||||
|
assert(rscratch != noreg || always_reachable(float_sign_flip), "missing");
|
||||||
|
Label done;
|
||||||
|
evmovdquq(xtmp1, k0, float_sign_flip, false, vec_enc, rscratch);
|
||||||
|
Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
|
||||||
|
kortestwl(ktmp1, ktmp1);
|
||||||
|
jccb(Assembler::equal, done);
|
||||||
|
|
||||||
|
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
|
||||||
|
evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
|
||||||
|
evmovdqul(dst, ktmp2, xtmp2, true, vec_enc);
|
||||||
|
|
||||||
|
kxorwl(ktmp1, ktmp1, ktmp2);
|
||||||
|
evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
|
||||||
|
vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
|
||||||
|
evmovdqul(dst, ktmp1, xtmp2, true, vec_enc);
|
||||||
|
bind(done);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
|
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
|
||||||
* If src is NaN, the result is 0.
|
* If src is NaN, the result is 0.
|
||||||
@ -4440,10 +4464,10 @@ void C2_MacroAssembler::vector_cast_float_to_long_special_cases_evex(XMMRegister
|
|||||||
* If the src is positive infinity or any value greater than or equal to the value of Long.MAX_VALUE,
|
* If the src is positive infinity or any value greater than or equal to the value of Long.MAX_VALUE,
|
||||||
* the result is equal to the value of Long.MAX_VALUE.
|
* the result is equal to the value of Long.MAX_VALUE.
|
||||||
*/
|
*/
|
||||||
void C2_MacroAssembler::vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src,
|
void C2_MacroAssembler::vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
AddressLiteral double_sign_flip, int vec_enc,
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
Register rscratch, AddressLiteral double_sign_flip,
|
||||||
Register rscratch) {
|
int vec_enc) {
|
||||||
assert(rscratch != noreg || always_reachable(double_sign_flip), "missing");
|
assert(rscratch != noreg || always_reachable(double_sign_flip), "missing");
|
||||||
|
|
||||||
Label done;
|
Label done;
|
||||||
@ -4463,6 +4487,82 @@ void C2_MacroAssembler::vector_cast_double_special_cases_evex(XMMRegister dst, X
|
|||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero,
|
||||||
|
XMMRegister xtmp, int index, int vec_enc) {
|
||||||
|
assert(vec_enc < Assembler::AVX_512bit, "");
|
||||||
|
if (vec_enc == Assembler::AVX_256bit) {
|
||||||
|
vextractf128_high(xtmp, src);
|
||||||
|
vshufps(dst, src, xtmp, index, vec_enc);
|
||||||
|
} else {
|
||||||
|
vshufps(dst, src, zero, index, vec_enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch,
|
||||||
|
AddressLiteral float_sign_flip, int src_vec_enc) {
|
||||||
|
assert(rscratch != noreg || always_reachable(float_sign_flip), "missing");
|
||||||
|
|
||||||
|
Label done;
|
||||||
|
// Compare the destination lanes with float_sign_flip
|
||||||
|
// value to get mask for all special values.
|
||||||
|
movdqu(xtmp1, float_sign_flip, rscratch);
|
||||||
|
vpcmpeqd(xtmp2, dst, xtmp1, Assembler::AVX_128bit);
|
||||||
|
ptest(xtmp2, xtmp2);
|
||||||
|
jccb(Assembler::equal, done);
|
||||||
|
|
||||||
|
// Flip float_sign_flip to get max integer value.
|
||||||
|
vpcmpeqd(xtmp4, xtmp4, xtmp4, Assembler::AVX_128bit);
|
||||||
|
pxor(xtmp1, xtmp4);
|
||||||
|
|
||||||
|
// Set detination lanes corresponding to unordered source lanes as zero.
|
||||||
|
vpxor(xtmp4, xtmp4, xtmp4, src_vec_enc);
|
||||||
|
vcmppd(xtmp3, src, src, Assembler::UNORD_Q, src_vec_enc);
|
||||||
|
|
||||||
|
// Shuffle mask vector and pack lower doubles word from each quadword lane.
|
||||||
|
vector_crosslane_doubleword_pack_avx(xtmp3, xtmp3, xtmp4, xtmp5, 0x88, src_vec_enc);
|
||||||
|
vblendvps(dst, dst, xtmp4, xtmp3, Assembler::AVX_128bit);
|
||||||
|
|
||||||
|
// Recompute the mask for remaining special value.
|
||||||
|
pxor(xtmp2, xtmp3);
|
||||||
|
// Extract mask corresponding to non-negative source lanes.
|
||||||
|
vcmppd(xtmp3, src, xtmp4, Assembler::NLT_UQ, src_vec_enc);
|
||||||
|
|
||||||
|
// Shuffle mask vector and pack lower doubles word from each quadword lane.
|
||||||
|
vector_crosslane_doubleword_pack_avx(xtmp3, xtmp3, xtmp4, xtmp5, 0x88, src_vec_enc);
|
||||||
|
pand(xtmp3, xtmp2);
|
||||||
|
|
||||||
|
// Replace destination lanes holding special value(0x80000000) with max int
|
||||||
|
// if corresponding source lane holds a +ve value.
|
||||||
|
vblendvps(dst, dst, xtmp1, xtmp3, Assembler::AVX_128bit);
|
||||||
|
bind(done);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero,
|
||||||
|
XMMRegister xtmp, Register rscratch, int vec_enc) {
|
||||||
|
switch(to_elem_bt) {
|
||||||
|
case T_SHORT:
|
||||||
|
assert(rscratch != noreg || always_reachable(ExternalAddress(StubRoutines::x86::vector_int_to_short_mask())), "missing");
|
||||||
|
vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_short_mask()), vec_enc, rscratch);
|
||||||
|
vpackusdw(dst, dst, zero, vec_enc);
|
||||||
|
if (vec_enc == Assembler::AVX_256bit) {
|
||||||
|
vector_crosslane_doubleword_pack_avx(dst, dst, zero, xtmp, 0x44, vec_enc);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case T_BYTE:
|
||||||
|
assert(rscratch != noreg || always_reachable(ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask())), "missing");
|
||||||
|
vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), vec_enc, rscratch);
|
||||||
|
vpackusdw(dst, dst, zero, vec_enc);
|
||||||
|
if (vec_enc == Assembler::AVX_256bit) {
|
||||||
|
vector_crosslane_doubleword_pack_avx(dst, dst, zero, xtmp, 0x44, vec_enc);
|
||||||
|
}
|
||||||
|
vpackuswb(dst, dst, zero, vec_enc);
|
||||||
|
break;
|
||||||
|
default: assert(false, "%s", type2name(to_elem_bt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Algorithm for vector D2L and F2I conversions:-
|
* Algorithm for vector D2L and F2I conversions:-
|
||||||
* a) Perform vector D2L/F2I cast.
|
* a) Perform vector D2L/F2I cast.
|
||||||
@ -4473,50 +4573,71 @@ void C2_MacroAssembler::vector_cast_double_special_cases_evex(XMMRegister dst, X
|
|||||||
* d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
|
* d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, int vec_enc,
|
void C2_MacroAssembler::vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch) {
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
||||||
assert(rscratch != noreg || always_reachable(double_sign_flip), "missing");
|
AddressLiteral float_sign_flip, Register rscratch, int vec_enc) {
|
||||||
|
int to_elem_sz = type2aelembytes(to_elem_bt);
|
||||||
evcvttpd2qq(dst, src, vec_enc);
|
assert(to_elem_sz <= 4, "");
|
||||||
vector_cast_double_special_cases_evex(dst, src, double_sign_flip, vec_enc,
|
|
||||||
xtmp1, xtmp2, ktmp1, ktmp2, rscratch);
|
|
||||||
}
|
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, Register rscratch) {
|
|
||||||
assert(rscratch != noreg || always_reachable(float_sign_flip), "missing");
|
|
||||||
|
|
||||||
vcvttps2dq(dst, src, vec_enc);
|
vcvttps2dq(dst, src, vec_enc);
|
||||||
vector_cast_float_special_cases_avx(dst, src, float_sign_flip, vec_enc,
|
vector_cast_float_to_int_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, rscratch, float_sign_flip, vec_enc);
|
||||||
xtmp1, xtmp2, xtmp3, xtmp4, rscratch);
|
if (to_elem_sz < 4) {
|
||||||
|
vpxor(xtmp4, xtmp4, xtmp4, vec_enc);
|
||||||
|
vector_cast_int_to_subword(to_elem_bt, dst, xtmp4, xtmp3, rscratch, vec_enc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
void C2_MacroAssembler::vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch) {
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
||||||
assert(rscratch != noreg || always_reachable(float_sign_flip), "missing");
|
Register rscratch, int vec_enc) {
|
||||||
|
int to_elem_sz = type2aelembytes(to_elem_bt);
|
||||||
|
assert(to_elem_sz <= 4, "");
|
||||||
vcvttps2dq(dst, src, vec_enc);
|
vcvttps2dq(dst, src, vec_enc);
|
||||||
vector_cast_float_special_cases_evex(dst, src, float_sign_flip, vec_enc,
|
vector_cast_float_to_int_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, rscratch, float_sign_flip, vec_enc);
|
||||||
xtmp1, xtmp2, ktmp1, ktmp2, rscratch);
|
|
||||||
}
|
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_castF2L_evex(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch) {
|
|
||||||
assert(rscratch != noreg || always_reachable(float_sign_flip), "missing");
|
|
||||||
|
|
||||||
evcvttps2qq(dst, src, vec_enc);
|
|
||||||
vector_cast_float_to_long_special_cases_evex(dst, src, float_sign_flip, vec_enc,
|
|
||||||
xtmp1, xtmp2, ktmp1, ktmp2, rscratch);
|
|
||||||
}
|
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, int vec_enc,
|
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch) {
|
|
||||||
assert(rscratch != noreg || always_reachable(double_sign_flip), "missing");
|
|
||||||
|
|
||||||
vector_castD2L_evex(dst, src, double_sign_flip, vec_enc,
|
|
||||||
xtmp1, xtmp2, ktmp1, ktmp2, rscratch);
|
|
||||||
if (to_elem_bt != T_LONG) {
|
|
||||||
switch(to_elem_bt) {
|
switch(to_elem_bt) {
|
||||||
|
case T_INT:
|
||||||
|
break;
|
||||||
|
case T_SHORT:
|
||||||
|
evpmovdw(dst, dst, vec_enc);
|
||||||
|
break;
|
||||||
|
case T_BYTE:
|
||||||
|
evpmovdb(dst, dst, vec_enc);
|
||||||
|
break;
|
||||||
|
default: assert(false, "%s", type2name(to_elem_bt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
Register rscratch, int vec_enc) {
|
||||||
|
evcvttps2qq(dst, src, vec_enc);
|
||||||
|
vector_cast_float_to_long_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, rscratch, double_sign_flip, vec_enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handling for downcasting from double to integer or sub-word types on AVX2.
|
||||||
|
void C2_MacroAssembler::vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5,
|
||||||
|
AddressLiteral float_sign_flip, Register rscratch, int vec_enc) {
|
||||||
|
int to_elem_sz = type2aelembytes(to_elem_bt);
|
||||||
|
assert(to_elem_sz < 8, "");
|
||||||
|
vcvttpd2dq(dst, src, vec_enc);
|
||||||
|
vector_cast_double_to_int_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, xtmp5, rscratch,
|
||||||
|
float_sign_flip, vec_enc);
|
||||||
|
if (to_elem_sz < 4) {
|
||||||
|
// xtmp4 holds all zero lanes.
|
||||||
|
vector_cast_int_to_subword(to_elem_bt, dst, xtmp4, xtmp5, rscratch, Assembler::AVX_128bit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src,
|
||||||
|
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1,
|
||||||
|
KRegister ktmp2, AddressLiteral sign_flip,
|
||||||
|
Register rscratch, int vec_enc) {
|
||||||
|
if (VM_Version::supports_avx512dq()) {
|
||||||
|
evcvttpd2qq(dst, src, vec_enc);
|
||||||
|
vector_cast_double_to_long_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, rscratch, sign_flip, vec_enc);
|
||||||
|
switch(to_elem_bt) {
|
||||||
|
case T_LONG:
|
||||||
|
break;
|
||||||
case T_INT:
|
case T_INT:
|
||||||
evpmovsqd(dst, dst, vec_enc);
|
evpmovsqd(dst, dst, vec_enc);
|
||||||
break;
|
break;
|
||||||
@ -4530,6 +4651,21 @@ void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister ds
|
|||||||
break;
|
break;
|
||||||
default: assert(false, "%s", type2name(to_elem_bt));
|
default: assert(false, "%s", type2name(to_elem_bt));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
assert(type2aelembytes(to_elem_bt) <= 4, "");
|
||||||
|
vcvttpd2dq(dst, src, vec_enc);
|
||||||
|
vector_cast_double_to_int_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, rscratch, sign_flip, vec_enc);
|
||||||
|
switch(to_elem_bt) {
|
||||||
|
case T_INT:
|
||||||
|
break;
|
||||||
|
case T_SHORT:
|
||||||
|
evpmovdw(dst, dst, vec_enc);
|
||||||
|
break;
|
||||||
|
case T_BYTE:
|
||||||
|
evpmovdb(dst, dst, vec_enc);
|
||||||
|
break;
|
||||||
|
default: assert(false, "%s", type2name(to_elem_bt));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4545,8 +4681,8 @@ void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister sr
|
|||||||
evpbroadcastq(xtmp1, tmp, vec_enc);
|
evpbroadcastq(xtmp1, tmp, vec_enc);
|
||||||
vaddpd(xtmp1, src , xtmp1, vec_enc);
|
vaddpd(xtmp1, src , xtmp1, vec_enc);
|
||||||
evcvtpd2qq(dst, xtmp1, vec_enc);
|
evcvtpd2qq(dst, xtmp1, vec_enc);
|
||||||
vector_cast_double_special_cases_evex(dst, src, double_sign_flip, vec_enc,
|
vector_cast_double_to_long_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, tmp /*rscratch*/,
|
||||||
xtmp1, xtmp2, ktmp1, ktmp2, tmp);
|
double_sign_flip, vec_enc);;
|
||||||
|
|
||||||
ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/);
|
ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/);
|
||||||
}
|
}
|
||||||
@ -4563,8 +4699,8 @@ void C2_MacroAssembler::vector_round_float_evex(XMMRegister dst, XMMRegister src
|
|||||||
vbroadcastss(xtmp1, xtmp1, vec_enc);
|
vbroadcastss(xtmp1, xtmp1, vec_enc);
|
||||||
vaddps(xtmp1, src , xtmp1, vec_enc);
|
vaddps(xtmp1, src , xtmp1, vec_enc);
|
||||||
vcvtps2dq(dst, xtmp1, vec_enc);
|
vcvtps2dq(dst, xtmp1, vec_enc);
|
||||||
vector_cast_float_special_cases_evex(dst, src, float_sign_flip, vec_enc,
|
vector_cast_float_to_int_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, tmp /*rscratch*/,
|
||||||
xtmp1, xtmp2, ktmp1, ktmp2, tmp);
|
float_sign_flip, vec_enc);
|
||||||
|
|
||||||
ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/);
|
ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/);
|
||||||
}
|
}
|
||||||
@ -4581,8 +4717,7 @@ void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src,
|
|||||||
vbroadcastss(xtmp1, xtmp1, vec_enc);
|
vbroadcastss(xtmp1, xtmp1, vec_enc);
|
||||||
vaddps(xtmp1, src , xtmp1, vec_enc);
|
vaddps(xtmp1, src , xtmp1, vec_enc);
|
||||||
vcvtps2dq(dst, xtmp1, vec_enc);
|
vcvtps2dq(dst, xtmp1, vec_enc);
|
||||||
vector_cast_float_special_cases_avx(dst, src, float_sign_flip, vec_enc,
|
vector_cast_float_to_int_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, tmp /*rscratch*/, float_sign_flip, vec_enc);
|
||||||
xtmp1, xtmp2, xtmp3, xtmp4, tmp);
|
|
||||||
|
|
||||||
ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/);
|
ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/);
|
||||||
}
|
}
|
||||||
|
@ -308,36 +308,59 @@ public:
|
|||||||
void masked_op(int ideal_opc, int mask_len, KRegister dst,
|
void masked_op(int ideal_opc, int mask_len, KRegister dst,
|
||||||
KRegister src1, KRegister src2);
|
KRegister src1, KRegister src2);
|
||||||
|
|
||||||
void vector_castF2I_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, Register rscratch = noreg);
|
BasicType from_elem_bt, BasicType to_elem_bt);
|
||||||
|
|
||||||
void vector_castF2I_evex(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch = noreg);
|
XMMRegister xtmp, Register rscratch, int vec_enc);
|
||||||
|
|
||||||
void vector_castF2L_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, int vec_enc,
|
void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch = noreg);
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
||||||
|
AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
|
||||||
|
|
||||||
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, int vec_enc,
|
void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch = noreg );
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
||||||
|
Register rscratch, int vec_enc);
|
||||||
|
|
||||||
void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, int vec_enc,
|
void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch = noreg);
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
Register rscratch, int vec_enc);
|
||||||
|
|
||||||
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc, BasicType from_elem_bt, BasicType to_elem_bt);
|
void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip,
|
||||||
|
Register rscratch, int vec_enc);
|
||||||
|
|
||||||
void vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, int vec_enc,
|
void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch = noreg );
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5,
|
||||||
|
AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
|
||||||
|
|
||||||
void vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, Register rscratch = noreg);
|
|
||||||
|
|
||||||
void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, int vec_enc,
|
void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch,
|
||||||
Register rscratch = noreg);
|
AddressLiteral float_sign_flip, int vec_enc);
|
||||||
|
|
||||||
void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, int vec_enc,
|
void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
|
||||||
Register rscratch = noreg);
|
int vec_enc);
|
||||||
|
|
||||||
|
void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
|
||||||
|
int vec_enc);
|
||||||
|
|
||||||
|
void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
|
||||||
|
int vec_enc);
|
||||||
|
|
||||||
|
void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
|
||||||
|
int vec_enc);
|
||||||
|
|
||||||
|
void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
|
||||||
|
XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip,
|
||||||
|
int vec_enc);
|
||||||
|
|
||||||
|
void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero,
|
||||||
|
XMMRegister xtmp, int index, int vec_enc);
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
|
void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
|
||||||
|
@ -184,10 +184,17 @@
|
|||||||
static const bool supports_encode_ascii_array = true;
|
static const bool supports_encode_ascii_array = true;
|
||||||
|
|
||||||
// Returns pre-selection estimated size of a vector operation.
|
// Returns pre-selection estimated size of a vector operation.
|
||||||
|
// Currently, it's a rudimentary heuristic based on emitted code size for complex
|
||||||
|
// IR nodes used by unroll policy. Idea is to constrain unrolling factor and prevent
|
||||||
|
// generating bloated loop bodies.
|
||||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||||
switch(vopc) {
|
switch(vopc) {
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
|
case Op_VectorCastF2X: // fall through
|
||||||
|
case Op_VectorCastD2X: {
|
||||||
|
return is_floating_point_type(ety) ? 0 : (is_subword_type(ety) ? 35 : 30);
|
||||||
|
}
|
||||||
case Op_CountTrailingZerosV:
|
case Op_CountTrailingZerosV:
|
||||||
case Op_CountLeadingZerosV:
|
case Op_CountLeadingZerosV:
|
||||||
return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40;
|
return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40;
|
||||||
|
@ -1392,6 +1392,8 @@ Assembler::Width widthForType(BasicType bt) {
|
|||||||
static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
|
static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
|
||||||
static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
|
static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
|
||||||
static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
|
static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
|
||||||
|
static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
|
||||||
|
static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
const bool Matcher::match_rule_supported(int opcode) {
|
const bool Matcher::match_rule_supported(int opcode) {
|
||||||
@ -1883,40 +1885,26 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Op_VectorCastF2X: {
|
||||||
|
// As per JLS section 5.1.3 narrowing conversion to sub-word types
|
||||||
|
// happen after intermediate conversion to integer and special handling
|
||||||
|
// code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
|
||||||
|
int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
|
||||||
|
if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// fallthrough
|
||||||
case Op_VectorCastD2X:
|
case Op_VectorCastD2X:
|
||||||
// Conversion to integral type is only supported on AVX-512 platforms with avx512dq.
|
if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
|
||||||
// Need avx512vl for size_in_bits < 512
|
|
||||||
if (is_integral_type(bt)) {
|
|
||||||
if (!VM_Version::supports_avx512dq()) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case Op_RoundVD:
|
case Op_RoundVD:
|
||||||
if (!VM_Version::supports_avx512dq()) {
|
if (!VM_Version::supports_avx512dq()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Op_VectorCastF2X:
|
|
||||||
// F2I is supported on all AVX and above platforms
|
|
||||||
// For conversion to other integral types need AVX512:
|
|
||||||
// Conversion to long in addition needs avx512dq
|
|
||||||
// Need avx512vl for size_in_bits < 512
|
|
||||||
if (is_integral_type(bt) && (bt != T_INT)) {
|
|
||||||
if (UseAVX <= 2) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if ((bt == T_LONG) && !VM_Version::supports_avx512dq()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Op_MulReductionVI:
|
case Op_MulReductionVI:
|
||||||
if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
|
if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
|
||||||
return false;
|
return false;
|
||||||
@ -7376,66 +7364,47 @@ instruct vcastFtoD_reg(vec dst, vec src) %{
|
|||||||
%}
|
%}
|
||||||
|
|
||||||
|
|
||||||
instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
|
instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
|
||||||
// F2I conversion for < 64 byte vector using AVX instructions
|
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
|
||||||
// AVX512 platforms that dont support avx512vl also use AVX instructions to support F2I
|
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4);
|
||||||
predicate(!VM_Version::supports_avx512vl() &&
|
|
||||||
Matcher::vector_length_in_bytes(n) < 64 &&
|
|
||||||
Matcher::vector_element_basic_type(n) == T_INT);
|
|
||||||
match(Set dst (VectorCastF2X src));
|
match(Set dst (VectorCastF2X src));
|
||||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
|
||||||
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
|
format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
__ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister,
|
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
||||||
ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vlen_enc,
|
// JDK-8292878 removed the need for an explicit scratch register needed to load greater than
|
||||||
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
|
// 32 bit addresses for register indirect addressing mode since stub constants
|
||||||
%}
|
// are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
|
||||||
ins_pipe( pipe_slow );
|
// However, targets are free to increase this limit, but having a large code cache size
|
||||||
%}
|
// greater than 2G looks unreasonable in practical scenario, on the hind side with given
|
||||||
|
// cap we save a temporary register allocation which in limiting case can prevent
|
||||||
instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
|
// spilling in high register pressure blocks.
|
||||||
predicate((VM_Version::supports_avx512vl() ||
|
__ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
Matcher::vector_length_in_bytes(n) == 64) &&
|
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
|
||||||
Matcher::vector_element_basic_type(n) == T_INT);
|
ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
|
||||||
match(Set dst (VectorCastF2X src));
|
|
||||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
|
|
||||||
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
int vlen_enc = vector_length_encoding(this);
|
|
||||||
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister,
|
|
||||||
ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vlen_enc,
|
|
||||||
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
|
instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
|
||||||
// F2X conversion for integral non T_INT target using AVX512 instructions
|
predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
|
||||||
// Platforms that dont support avx512vl can only support 64 byte vectors
|
is_integral_type(Matcher::vector_element_basic_type(n)));
|
||||||
predicate(is_integral_type(Matcher::vector_element_basic_type(n)) &&
|
|
||||||
Matcher::vector_element_basic_type(n) != T_INT);
|
|
||||||
match(Set dst (VectorCastF2X src));
|
match(Set dst (VectorCastF2X src));
|
||||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
|
||||||
format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
|
format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
||||||
if (to_elem_bt == T_LONG) {
|
if (to_elem_bt == T_LONG) {
|
||||||
int vlen_enc = vector_length_encoding(this);
|
int vlen_enc = vector_length_encoding(this);
|
||||||
__ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister,
|
__ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vlen_enc,
|
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
|
||||||
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
|
ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
|
||||||
} else {
|
} else {
|
||||||
int vlen_enc = vector_length_encoding(this, $src);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister,
|
__ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vlen_enc,
|
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
|
||||||
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
|
ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
|
||||||
if (to_elem_bt == T_SHORT) {
|
|
||||||
__ evpmovdw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
||||||
} else {
|
|
||||||
assert(to_elem_bt == T_BYTE, "required");
|
|
||||||
__ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
@ -7452,17 +7421,35 @@ instruct vcastDtoF_reg(vec dst, vec src) %{
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
|
instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
|
||||||
predicate(is_integral_type(Matcher::vector_element_basic_type(n)));
|
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
|
||||||
|
is_integral_type(Matcher::vector_element_basic_type(n)));
|
||||||
match(Set dst (VectorCastD2X src));
|
match(Set dst (VectorCastD2X src));
|
||||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
|
||||||
format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
|
format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this, $src);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
||||||
__ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister,
|
__ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vlen_enc,
|
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
|
||||||
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
|
ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
|
||||||
|
predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
|
||||||
|
is_integral_type(Matcher::vector_element_basic_type(n)));
|
||||||
|
match(Set dst (VectorCastD2X src));
|
||||||
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
|
||||||
|
format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
|
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
||||||
|
AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
|
||||||
|
ExternalAddress(vector_float_signflip());
|
||||||
|
__ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
|
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
@ -1004,6 +1004,8 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
|
|||||||
case Op_ReverseV:
|
case Op_ReverseV:
|
||||||
case Op_RoundVF:
|
case Op_RoundVF:
|
||||||
case Op_RoundVD:
|
case Op_RoundVD:
|
||||||
|
case Op_VectorCastD2X:
|
||||||
|
case Op_VectorCastF2X:
|
||||||
case Op_PopCountVI:
|
case Op_PopCountVI:
|
||||||
case Op_PopCountVL: {
|
case Op_PopCountVL: {
|
||||||
const TypeVect* vt = n->bottom_type()->is_vect();
|
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||||
|
@ -27,7 +27,6 @@
|
|||||||
* @summary Test float/double to integral cast
|
* @summary Test float/double to integral cast
|
||||||
* @modules jdk.incubator.vector
|
* @modules jdk.incubator.vector
|
||||||
* @requires vm.compiler2.enabled
|
* @requires vm.compiler2.enabled
|
||||||
* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*avx512dq.*")
|
|
||||||
* @library /test/lib /
|
* @library /test/lib /
|
||||||
* @run driver compiler.vectorapi.VectorFPtoIntCastTest
|
* @run driver compiler.vectorapi.VectorFPtoIntCastTest
|
||||||
*/
|
*/
|
||||||
@ -87,7 +86,7 @@ public class VectorFPtoIntCastTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = {IRNode.VECTOR_CAST_F2X, "> 0"})
|
@IR(counts = {IRNode.VECTOR_CAST_F2X, "> 0"}, applyIfCPUFeature = {"avx512f", "true"})
|
||||||
public void float2int() {
|
public void float2int() {
|
||||||
var cvec = (IntVector)fvec512.convertShape(VectorOperators.F2I, ispec512, 0);
|
var cvec = (IntVector)fvec512.convertShape(VectorOperators.F2I, ispec512, 0);
|
||||||
cvec.intoArray(int_arr, 0);
|
cvec.intoArray(int_arr, 0);
|
||||||
@ -104,7 +103,7 @@ public class VectorFPtoIntCastTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = {IRNode.VECTOR_CAST_F2X, "> 0"})
|
@IR(counts = {IRNode.VECTOR_CAST_F2X, "> 0"}, applyIfCPUFeature = {"avx512dq", "true"})
|
||||||
public void float2long() {
|
public void float2long() {
|
||||||
var cvec = (LongVector)fvec512.convertShape(VectorOperators.F2L, lspec512, 0);
|
var cvec = (LongVector)fvec512.convertShape(VectorOperators.F2L, lspec512, 0);
|
||||||
cvec.intoArray(long_arr, 0);
|
cvec.intoArray(long_arr, 0);
|
||||||
@ -121,7 +120,7 @@ public class VectorFPtoIntCastTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = {IRNode.VECTOR_CAST_F2X, "> 0"})
|
@IR(counts = {IRNode.VECTOR_CAST_F2X, "> 0"}, applyIfCPUFeature = {"avx512f", "true"})
|
||||||
public void float2short() {
|
public void float2short() {
|
||||||
var cvec = (ShortVector)fvec512.convertShape(VectorOperators.F2S, sspec256, 0);
|
var cvec = (ShortVector)fvec512.convertShape(VectorOperators.F2S, sspec256, 0);
|
||||||
cvec.intoArray(short_arr, 0);
|
cvec.intoArray(short_arr, 0);
|
||||||
@ -138,7 +137,7 @@ public class VectorFPtoIntCastTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = {IRNode.VECTOR_CAST_F2X, "> 0"})
|
@IR(counts = {IRNode.VECTOR_CAST_F2X, "> 0"}, applyIfCPUFeature = {"avx512f", "true"})
|
||||||
public void float2byte() {
|
public void float2byte() {
|
||||||
var cvec = (ByteVector)fvec512.convertShape(VectorOperators.F2B, bspec128, 0);
|
var cvec = (ByteVector)fvec512.convertShape(VectorOperators.F2B, bspec128, 0);
|
||||||
cvec.intoArray(byte_arr, 0);
|
cvec.intoArray(byte_arr, 0);
|
||||||
@ -155,7 +154,7 @@ public class VectorFPtoIntCastTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = {IRNode.VECTOR_CAST_D2X, "> 0"})
|
@IR(counts = {IRNode.VECTOR_CAST_D2X, "> 0"}, applyIfCPUFeature = {"avx512f", "true"})
|
||||||
public void double2int() {
|
public void double2int() {
|
||||||
var cvec = (IntVector)dvec512.convertShape(VectorOperators.D2I, ispec256, 0);
|
var cvec = (IntVector)dvec512.convertShape(VectorOperators.D2I, ispec256, 0);
|
||||||
cvec.intoArray(int_arr, 0);
|
cvec.intoArray(int_arr, 0);
|
||||||
@ -172,7 +171,7 @@ public class VectorFPtoIntCastTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = {IRNode.VECTOR_CAST_D2X, "> 0"})
|
@IR(counts = {IRNode.VECTOR_CAST_D2X, "> 0"}, applyIfCPUFeature = {"avx512dq", "true"})
|
||||||
public void double2long() {
|
public void double2long() {
|
||||||
var cvec = (LongVector)dvec512.convertShape(VectorOperators.D2L, lspec512, 0);
|
var cvec = (LongVector)dvec512.convertShape(VectorOperators.D2L, lspec512, 0);
|
||||||
cvec.intoArray(long_arr, 0);
|
cvec.intoArray(long_arr, 0);
|
||||||
@ -189,7 +188,7 @@ public class VectorFPtoIntCastTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = {IRNode.VECTOR_CAST_D2X, "> 0"})
|
@IR(counts = {IRNode.VECTOR_CAST_D2X, "> 0"}, applyIfCPUFeature = {"avx512f", "true"})
|
||||||
public void double2short() {
|
public void double2short() {
|
||||||
var cvec = (ShortVector)dvec512.convertShape(VectorOperators.D2S, sspec128, 0);
|
var cvec = (ShortVector)dvec512.convertShape(VectorOperators.D2S, sspec128, 0);
|
||||||
cvec.intoArray(short_arr, 0);
|
cvec.intoArray(short_arr, 0);
|
||||||
@ -206,7 +205,7 @@ public class VectorFPtoIntCastTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = {IRNode.VECTOR_CAST_D2X, "> 0"})
|
@IR(counts = {IRNode.VECTOR_CAST_D2X, "> 0"}, applyIfCPUFeature = {"avx512f", "true"})
|
||||||
public void double2byte() {
|
public void double2byte() {
|
||||||
var cvec = (ByteVector)dvec512.convertShape(VectorOperators.D2B, bspec64, 0);
|
var cvec = (ByteVector)dvec512.convertShape(VectorOperators.D2B, bspec64, 0);
|
||||||
cvec.intoArray(byte_arr, 0);
|
cvec.intoArray(byte_arr, 0);
|
||||||
|
@ -62,8 +62,11 @@ public class TestCastMethods {
|
|||||||
makePair(FSPEC128, ISPEC128),
|
makePair(FSPEC128, ISPEC128),
|
||||||
makePair(FSPEC64, DSPEC128),
|
makePair(FSPEC64, DSPEC128),
|
||||||
makePair(FSPEC128, DSPEC256),
|
makePair(FSPEC128, DSPEC256),
|
||||||
|
makePair(FSPEC128, ISPEC128),
|
||||||
|
makePair(FSPEC128, SSPEC64),
|
||||||
makePair(DSPEC128, FSPEC64),
|
makePair(DSPEC128, FSPEC64),
|
||||||
makePair(DSPEC256, FSPEC128),
|
makePair(DSPEC256, FSPEC128),
|
||||||
|
makePair(DSPEC128, ISPEC64),
|
||||||
makePair(BSPEC64, SSPEC64, true),
|
makePair(BSPEC64, SSPEC64, true),
|
||||||
makePair(BSPEC64, SSPEC128, true),
|
makePair(BSPEC64, SSPEC128, true),
|
||||||
makePair(BSPEC64, ISPEC128, true),
|
makePair(BSPEC64, ISPEC128, true),
|
||||||
@ -74,6 +77,11 @@ public class TestCastMethods {
|
|||||||
);
|
);
|
||||||
|
|
||||||
public static final List<VectorSpeciesPair> AVX2_CAST_TESTS = Stream.concat(AVX1_CAST_TESTS.stream(), Stream.of(
|
public static final List<VectorSpeciesPair> AVX2_CAST_TESTS = Stream.concat(AVX1_CAST_TESTS.stream(), Stream.of(
|
||||||
|
makePair(DSPEC256, ISPEC128),
|
||||||
|
makePair(DSPEC256, SSPEC64),
|
||||||
|
makePair(FSPEC256, ISPEC256),
|
||||||
|
makePair(FSPEC256, SSPEC128),
|
||||||
|
makePair(FSPEC256, BSPEC64),
|
||||||
makePair(BSPEC128, SSPEC256),
|
makePair(BSPEC128, SSPEC256),
|
||||||
makePair(BSPEC64, ISPEC256),
|
makePair(BSPEC64, ISPEC256),
|
||||||
makePair(BSPEC64, LSPEC256),
|
makePair(BSPEC64, LSPEC256),
|
||||||
@ -89,7 +97,6 @@ public class TestCastMethods {
|
|||||||
makePair(LSPEC256, BSPEC64),
|
makePair(LSPEC256, BSPEC64),
|
||||||
makePair(LSPEC256, SSPEC64),
|
makePair(LSPEC256, SSPEC64),
|
||||||
makePair(LSPEC256, ISPEC128),
|
makePair(LSPEC256, ISPEC128),
|
||||||
makePair(FSPEC256, ISPEC256),
|
|
||||||
makePair(BSPEC128, SSPEC256, true),
|
makePair(BSPEC128, SSPEC256, true),
|
||||||
makePair(BSPEC64, ISPEC256, true),
|
makePair(BSPEC64, ISPEC256, true),
|
||||||
makePair(BSPEC64, LSPEC256, true),
|
makePair(BSPEC64, LSPEC256, true),
|
||||||
@ -115,9 +122,14 @@ public class TestCastMethods {
|
|||||||
makePair(LSPEC512, BSPEC64),
|
makePair(LSPEC512, BSPEC64),
|
||||||
makePair(LSPEC512, SSPEC128),
|
makePair(LSPEC512, SSPEC128),
|
||||||
makePair(LSPEC512, ISPEC256),
|
makePair(LSPEC512, ISPEC256),
|
||||||
makePair(FSPEC512, ISPEC512),
|
|
||||||
makePair(FSPEC256, DSPEC512),
|
makePair(FSPEC256, DSPEC512),
|
||||||
makePair(DSPEC512, FSPEC256),
|
makePair(DSPEC512, FSPEC256),
|
||||||
|
makePair(DSPEC512, ISPEC256),
|
||||||
|
makePair(DSPEC512, SSPEC128),
|
||||||
|
makePair(DSPEC512, BSPEC64),
|
||||||
|
makePair(FSPEC512, ISPEC512),
|
||||||
|
makePair(FSPEC512, SSPEC256),
|
||||||
|
makePair(FSPEC512, BSPEC128),
|
||||||
makePair(BSPEC128, ISPEC512, true),
|
makePair(BSPEC128, ISPEC512, true),
|
||||||
makePair(BSPEC64, LSPEC512, true),
|
makePair(BSPEC64, LSPEC512, true),
|
||||||
makePair(SSPEC256, ISPEC512, true),
|
makePair(SSPEC256, ISPEC512, true),
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
|
|
||||||
package org.openjdk.bench.jdk.incubator.vector;
|
package org.openjdk.bench.jdk.incubator.vector;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
import jdk.incubator.vector.*;
|
import jdk.incubator.vector.*;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import org.openjdk.jmh.annotations.*;
|
import org.openjdk.jmh.annotations.*;
|
||||||
@ -32,65 +33,630 @@ import org.openjdk.jmh.annotations.*;
|
|||||||
@State(Scope.Thread)
|
@State(Scope.Thread)
|
||||||
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||||
public class VectorFPtoIntCastOperations {
|
public class VectorFPtoIntCastOperations {
|
||||||
|
@Param({"512", "1024"})
|
||||||
|
static int SIZE;
|
||||||
|
|
||||||
FloatVector fvec256;
|
static final float [] float_sp_vals = {
|
||||||
FloatVector fvec512;
|
Float.NaN,
|
||||||
DoubleVector dvec512;
|
Float.POSITIVE_INFINITY,
|
||||||
|
Float.NEGATIVE_INFINITY,
|
||||||
static final float [] float_arr = {
|
0.0f,
|
||||||
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
|
-0.0f
|
||||||
9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static final double [] double_arr = {
|
static final double [] double_sp_vals = {
|
||||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
|
Double.NaN,
|
||||||
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0
|
Double.POSITIVE_INFINITY,
|
||||||
|
Double.NEGATIVE_INFINITY,
|
||||||
|
0.0,
|
||||||
|
-0.0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static float [] float_arr;
|
||||||
|
|
||||||
|
static double [] double_arr;
|
||||||
|
|
||||||
|
static long [] long_res;
|
||||||
|
|
||||||
|
static int [] int_res;
|
||||||
|
|
||||||
|
static short [] short_res;
|
||||||
|
|
||||||
|
static byte [] byte_res;
|
||||||
|
|
||||||
@Setup(Level.Trial)
|
@Setup(Level.Trial)
|
||||||
public void BmSetup() {
|
public void BmSetup() {
|
||||||
fvec256 = FloatVector.fromArray(FloatVector.SPECIES_256, float_arr, 0);
|
Random r = new Random(1024);
|
||||||
fvec512 = FloatVector.fromArray(FloatVector.SPECIES_512, float_arr, 0);
|
float_arr = new float[SIZE];
|
||||||
dvec512 = DoubleVector.fromArray(DoubleVector.SPECIES_512, double_arr, 0);
|
double_arr = new double[SIZE];
|
||||||
|
long_res = new long[SIZE];
|
||||||
|
int_res = new int[SIZE * 2];
|
||||||
|
short_res = new short[SIZE * 4];
|
||||||
|
byte_res = new byte[SIZE * 8];
|
||||||
|
for(int i = 0; i < SIZE; i++) {
|
||||||
|
float_arr[i] = SIZE * r.nextFloat();
|
||||||
|
double_arr[i] = SIZE * r.nextDouble();
|
||||||
|
}
|
||||||
|
for(int i = 0 ; i < SIZE; i += 100) {
|
||||||
|
System.arraycopy(float_sp_vals, 0, float_arr, i, float_sp_vals.length);
|
||||||
|
System.arraycopy(double_sp_vals, 0, double_arr, i, double_sp_vals.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat128ToByte128() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public Vector microFloat2Int() {
|
public void microFloat128ToByte256() {
|
||||||
return fvec512.convertShape(VectorOperators.F2I, IntVector.SPECIES_512, 0);
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 8); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public Vector microFloat2Long() {
|
public void microFloat128ToByte512() {
|
||||||
return fvec256.convertShape(VectorOperators.F2L, LongVector.SPECIES_512, 0);
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 16); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public Vector microFloat2Short() {
|
public void microFloat128ToShort128() {
|
||||||
return fvec512.convertShape(VectorOperators.F2S, ShortVector.SPECIES_256, 0);
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public Vector microFloat2Byte() {
|
public void microFloat128ToShort256() {
|
||||||
return fvec512.convertShape(VectorOperators.F2B, ByteVector.SPECIES_128, 0);
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public Vector microDouble2Int() {
|
public void microFloat128ToShort512() {
|
||||||
return dvec512.convertShape(VectorOperators.D2I, IntVector.SPECIES_256, 0);
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 8); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public Vector microDouble2Long() {
|
public void microFloat128ToInteger128() {
|
||||||
return dvec512.convertShape(VectorOperators.D2L, LongVector.SPECIES_512, 0);
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public Vector microDouble2Short() {
|
public void microFloat128ToInteger256() {
|
||||||
return dvec512.convertShape(VectorOperators.D2S, ShortVector.SPECIES_128, 0);
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public Vector microDouble2Byte() {
|
public void microFloat128ToInteger512() {
|
||||||
return dvec512.convertShape(VectorOperators.D2B, ByteVector.SPECIES_64, 0);
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat128ToLong128() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat128ToLong256() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat128ToLong512() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToByte128() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToByte256() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToByte512() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 8); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToShort128() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToShort256() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToShort512() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToInteger128() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToInteger256() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToInteger512() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToLong128() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToLong256() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microFloat256ToLong512() {
|
||||||
|
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_256;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||||
|
.convertShape(VectorOperators.F2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToByte128() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 8); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToByte256() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 16); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToByte512() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 32); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToShort128() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToShort256() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 8); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToShort512() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 16); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToInteger128() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToInteger256() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToInteger512() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 8); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToLong128() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToLong256() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble128ToLong512() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_128;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToByte128() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToByte256() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 8); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToByte512() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Byte> OSPECIES = ByteVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 16); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2B, OSPECIES, 0)
|
||||||
|
.reinterpretAsBytes()
|
||||||
|
.intoArray(byte_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToShort128() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToShort256() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToShort512() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 8); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2S, OSPECIES, 0)
|
||||||
|
.reinterpretAsShorts()
|
||||||
|
.intoArray(short_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToInteger128() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToInteger256() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToInteger512() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Integer> OSPECIES = IntVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 4); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2I, OSPECIES, 0)
|
||||||
|
.reinterpretAsInts()
|
||||||
|
.intoArray(int_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToLong128() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_128;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToLong256() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_256;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void microDouble256ToLong512() {
|
||||||
|
VectorSpecies<Double> ISPECIES = DoubleVector.SPECIES_256;
|
||||||
|
VectorSpecies<Long> OSPECIES = LongVector.SPECIES_512;
|
||||||
|
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||||
|
DoubleVector.fromArray(ISPECIES, double_arr, i)
|
||||||
|
.convertShape(VectorOperators.D2L, OSPECIES, 0)
|
||||||
|
.reinterpretAsLongs()
|
||||||
|
.intoArray(long_res, j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user