8287835: Add support for additional float/double to integral conversion for x86
Reviewed-by: kvn, jbhateja
This commit is contained in:
parent
3ee1e60595
commit
2cc40afa07
@ -2102,6 +2102,14 @@ void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
emit_int16(0x5B, (0xC0 | encode));
|
emit_int16(0x5B, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
|
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
||||||
|
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_is_evex_instruction();
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int16(0x7A, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
@ -2182,6 +2190,14 @@ void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
emit_int16(0x34, (0xC0 | encode));
|
emit_int16(0x34, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::evpmovsqd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
|
assert(UseAVX > 2, "");
|
||||||
|
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_is_evex_instruction();
|
||||||
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int16(0x25, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::decl(Address dst) {
|
void Assembler::decl(Address dst) {
|
||||||
// Don't use it directly. Use MacroAssembler::decrement() instead.
|
// Don't use it directly. Use MacroAssembler::decrement() instead.
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
@ -4293,6 +4309,16 @@ void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
|
|||||||
emit_int16(0x29, (0xC0 | encode));
|
emit_int16(0x29, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::evpcmpeqq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||||
|
assert(VM_Version::supports_evex(), "");
|
||||||
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||||
|
attributes.set_is_evex_instruction();
|
||||||
|
attributes.reset_is_clear_context();
|
||||||
|
attributes.set_embedded_opmask_register_specifier(mask);
|
||||||
|
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int16(0x29, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
|
void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||||
|
@ -1169,9 +1169,10 @@ private:
|
|||||||
void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
|
void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Convert vector float and int
|
// Convert vector float to int/long
|
||||||
void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
|
void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
|
void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
void evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Convert vector long to vector FP
|
// Convert vector long to vector FP
|
||||||
void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
@ -1189,6 +1190,9 @@ private:
|
|||||||
void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
|
void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
|
void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
|
// Evex casts with signed saturation
|
||||||
|
void evpmovsqd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
//Abs of packed Integer values
|
//Abs of packed Integer values
|
||||||
void pabsb(XMMRegister dst, XMMRegister src);
|
void pabsb(XMMRegister dst, XMMRegister src);
|
||||||
void pabsw(XMMRegister dst, XMMRegister src);
|
void pabsw(XMMRegister dst, XMMRegister src);
|
||||||
@ -1786,6 +1790,7 @@ private:
|
|||||||
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
|
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
|
||||||
|
|
||||||
void pcmpeqq(XMMRegister dst, XMMRegister src);
|
void pcmpeqq(XMMRegister dst, XMMRegister src);
|
||||||
|
void evpcmpeqq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
|
void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
|
||||||
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
|
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
|
@ -4183,6 +4183,28 @@ void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XM
|
|||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_cast_float_to_long_special_cases_evex(
|
||||||
|
XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
|
Register scratch, AddressLiteral double_sign_flip,
|
||||||
|
int vec_enc) {
|
||||||
|
Label done;
|
||||||
|
evmovdquq(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
|
||||||
|
Assembler::evpcmpeqq(ktmp1, k0, xtmp1, dst, vec_enc);
|
||||||
|
kortestwl(ktmp1, ktmp1);
|
||||||
|
jccb(Assembler::equal, done);
|
||||||
|
|
||||||
|
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
|
||||||
|
evcmpps(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
|
||||||
|
evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
|
||||||
|
|
||||||
|
kxorwl(ktmp1, ktmp1, ktmp2);
|
||||||
|
evcmpps(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
|
||||||
|
vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
|
||||||
|
evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
|
||||||
|
bind(done);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
|
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
|
||||||
* If src is NaN, the result is 0.
|
* If src is NaN, the result is 0.
|
||||||
@ -4243,6 +4265,35 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
|
|||||||
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
|
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
Register scratch, int vec_enc) {
|
||||||
|
evcvttps2qq(dst, src, vec_enc);
|
||||||
|
vector_cast_float_to_long_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
|
AddressLiteral double_sign_flip, Register scratch, int vec_enc) {
|
||||||
|
vector_castD2L_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, double_sign_flip, scratch, vec_enc);
|
||||||
|
if (to_elem_bt != T_LONG) {
|
||||||
|
switch(to_elem_bt) {
|
||||||
|
case T_INT:
|
||||||
|
evpmovsqd(dst, dst, vec_enc);
|
||||||
|
break;
|
||||||
|
case T_SHORT:
|
||||||
|
evpmovsqd(dst, dst, vec_enc);
|
||||||
|
evpmovdw(dst, dst, vec_enc);
|
||||||
|
break;
|
||||||
|
case T_BYTE:
|
||||||
|
evpmovsqd(dst, dst, vec_enc);
|
||||||
|
evpmovdb(dst, dst, vec_enc);
|
||||||
|
break;
|
||||||
|
default: assert(false, "%s", type2name(to_elem_bt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
@ -310,11 +310,18 @@ public:
|
|||||||
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
||||||
Register scratch, int vec_enc);
|
Register scratch, int vec_enc);
|
||||||
|
|
||||||
|
void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
Register scratch, int vec_enc);
|
||||||
|
|
||||||
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
Register scratch, int vec_enc);
|
Register scratch, int vec_enc);
|
||||||
|
|
||||||
|
void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
Register scratch, int vec_enc);
|
||||||
|
|
||||||
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
|
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
|
||||||
BasicType from_elem_bt, BasicType to_elem_bt);
|
BasicType from_elem_bt, BasicType to_elem_bt);
|
||||||
|
|
||||||
@ -326,6 +333,11 @@ public:
|
|||||||
KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip,
|
KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip,
|
||||||
int vec_enc);
|
int vec_enc);
|
||||||
|
|
||||||
|
void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
|
Register scratch, AddressLiteral double_sign_flip,
|
||||||
|
int vec_enc);
|
||||||
|
|
||||||
void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
||||||
Register scratch, AddressLiteral float_sign_flip,
|
Register scratch, AddressLiteral float_sign_flip,
|
||||||
|
@ -1878,11 +1878,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Op_VectorCastD2X:
|
case Op_VectorCastD2X:
|
||||||
if (is_subword_type(bt) || bt == T_INT) {
|
// Conversion to integral type is only supported on AVX-512 platforms with avx512dq.
|
||||||
return false;
|
// Need avx512vl for size_in_bits < 512
|
||||||
}
|
if (is_integral_type(bt)) {
|
||||||
if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
|
if (!VM_Version::supports_avx512dq()) {
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Op_RoundVD:
|
case Op_RoundVD:
|
||||||
@ -1891,8 +1895,20 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Op_VectorCastF2X:
|
case Op_VectorCastF2X:
|
||||||
if (is_subword_type(bt) || bt == T_LONG) {
|
// F2I is supported on all AVX and above platforms
|
||||||
return false;
|
// For conversion to other integral types need AVX512:
|
||||||
|
// Conversion to long in addition needs avx512dq
|
||||||
|
// Need avx512vl for size_in_bits < 512
|
||||||
|
if (is_integral_type(bt) && (bt != T_INT)) {
|
||||||
|
if (UseAVX <= 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if ((bt == T_LONG) && !VM_Version::supports_avx512dq()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Op_MulReductionVI:
|
case Op_MulReductionVI:
|
||||||
@ -7325,6 +7341,8 @@ instruct vcastFtoD_reg(vec dst, vec src) %{
|
|||||||
|
|
||||||
|
|
||||||
instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
|
instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
|
||||||
|
// F2I conversion for < 64 byte vector using AVX instructions
|
||||||
|
// AVX512 platforms that dont support avx512vl also use AVX instructions to support F2I
|
||||||
predicate(!VM_Version::supports_avx512vl() &&
|
predicate(!VM_Version::supports_avx512vl() &&
|
||||||
Matcher::vector_length_in_bytes(n) < 64 &&
|
Matcher::vector_length_in_bytes(n) < 64 &&
|
||||||
Matcher::vector_element_basic_type(n) == T_INT);
|
Matcher::vector_element_basic_type(n) == T_INT);
|
||||||
@ -7356,6 +7374,37 @@ instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, k
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
||||||
|
// F2X conversion for integral non T_INT target using AVX512 instructions
|
||||||
|
// Platforms that dont support avx512vl can only support 64 byte vectors
|
||||||
|
predicate(is_integral_type(Matcher::vector_element_basic_type(n)) &&
|
||||||
|
Matcher::vector_element_basic_type(n) != T_INT);
|
||||||
|
match(Set dst (VectorCastF2X src));
|
||||||
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
||||||
|
format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
||||||
|
if (to_elem_bt == T_LONG) {
|
||||||
|
int vlen_enc = vector_length_encoding(this);
|
||||||
|
__ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
|
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
|
||||||
|
ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc);
|
||||||
|
} else {
|
||||||
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
|
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
|
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
|
||||||
|
ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc);
|
||||||
|
if (to_elem_bt == T_SHORT) {
|
||||||
|
__ evpmovdw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||||
|
} else {
|
||||||
|
assert(to_elem_bt == T_BYTE, "required");
|
||||||
|
__ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
instruct vcastDtoF_reg(vec dst, vec src) %{
|
instruct vcastDtoF_reg(vec dst, vec src) %{
|
||||||
predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
|
predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
|
||||||
match(Set dst (VectorCastD2X src));
|
match(Set dst (VectorCastD2X src));
|
||||||
@ -7367,14 +7416,15 @@ instruct vcastDtoF_reg(vec dst, vec src) %{
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
||||||
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
|
predicate(is_integral_type(Matcher::vector_element_basic_type(n)));
|
||||||
match(Set dst (VectorCastD2X src));
|
match(Set dst (VectorCastD2X src));
|
||||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
||||||
format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
|
format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
__ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
||||||
|
__ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
|
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
|
||||||
ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc);
|
ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc);
|
||||||
%}
|
%}
|
||||||
|
221
test/hotspot/jtreg/compiler/vectorapi/VectorFPtoIntCastTest.java
Normal file
221
test/hotspot/jtreg/compiler/vectorapi/VectorFPtoIntCastTest.java
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8287835
|
||||||
|
* @summary Test float/double to integral cast
|
||||||
|
* @modules jdk.incubator.vector
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*avx512dq.*")
|
||||||
|
* @library /test/lib /
|
||||||
|
* @run driver compiler.vectorapi.VectorFPtoIntCastTest
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.vectorapi;
|
||||||
|
|
||||||
|
import jdk.incubator.vector.*;
|
||||||
|
import jdk.incubator.vector.FloatVector;
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
public class VectorFPtoIntCastTest {
|
||||||
|
private static final int COUNT = 16;
|
||||||
|
private static final VectorSpecies<Long> lspec512 = LongVector.SPECIES_512;
|
||||||
|
private static final VectorSpecies<Integer> ispec512 = IntVector.SPECIES_512;
|
||||||
|
private static final VectorSpecies<Integer> ispec256 = IntVector.SPECIES_256;
|
||||||
|
private static final VectorSpecies<Short> sspec256 = ShortVector.SPECIES_256;
|
||||||
|
private static final VectorSpecies<Short> sspec128 = ShortVector.SPECIES_128;
|
||||||
|
private static final VectorSpecies<Byte> bspec128 = ByteVector.SPECIES_128;
|
||||||
|
private static final VectorSpecies<Byte> bspec64 = ByteVector.SPECIES_64;
|
||||||
|
|
||||||
|
private float [] float_arr;
|
||||||
|
private double [] double_arr;
|
||||||
|
private long [] long_arr;
|
||||||
|
private int [] int_arr;
|
||||||
|
private short [] short_arr;
|
||||||
|
private byte [] byte_arr;
|
||||||
|
|
||||||
|
private FloatVector fvec256;
|
||||||
|
private FloatVector fvec512;
|
||||||
|
private DoubleVector dvec512;
|
||||||
|
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
|
||||||
|
}
|
||||||
|
|
||||||
|
public VectorFPtoIntCastTest() {
|
||||||
|
float_arr = new float[COUNT];
|
||||||
|
double_arr = new double[COUNT];
|
||||||
|
long_arr = new long[COUNT];
|
||||||
|
int_arr = new int[COUNT];
|
||||||
|
short_arr = new short[COUNT];
|
||||||
|
byte_arr = new byte[COUNT];
|
||||||
|
|
||||||
|
Random ran = new Random(0);
|
||||||
|
for (int i = 0; i < COUNT; i++) {
|
||||||
|
float_arr[i] = ran.nextFloat();
|
||||||
|
double_arr[i] = ran.nextDouble();
|
||||||
|
}
|
||||||
|
|
||||||
|
fvec256 = FloatVector.fromArray(FloatVector.SPECIES_256, float_arr, 0);
|
||||||
|
fvec512 = FloatVector.fromArray(FloatVector.SPECIES_512, float_arr, 0);
|
||||||
|
dvec512 = DoubleVector.fromArray(DoubleVector.SPECIES_512, double_arr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"F2X", ">= 1"})
|
||||||
|
public void float2int() {
|
||||||
|
var cvec = (IntVector)fvec512.convertShape(VectorOperators.F2I, ispec512, 0);
|
||||||
|
cvec.intoArray(int_arr, 0);
|
||||||
|
checkf2int(cvec.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkf2int(int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
int expected = (int)float_arr[i];
|
||||||
|
if (int_arr[i] != expected) {
|
||||||
|
throw new RuntimeException("Invalid result: int_arr[" + i + "] = " + int_arr[i] + " != " + expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"F2X", ">= 1"})
|
||||||
|
public void float2long() {
|
||||||
|
var cvec = (LongVector)fvec512.convertShape(VectorOperators.F2L, lspec512, 0);
|
||||||
|
cvec.intoArray(long_arr, 0);
|
||||||
|
checkf2long(cvec.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkf2long(int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
long expected = (long)float_arr[i];
|
||||||
|
if (long_arr[i] != expected) {
|
||||||
|
throw new RuntimeException("Invalid result: long_arr[" + i + "] = " + long_arr[i] + " != " + expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"F2X", ">= 1"})
|
||||||
|
public void float2short() {
|
||||||
|
var cvec = (ShortVector)fvec512.convertShape(VectorOperators.F2S, sspec256, 0);
|
||||||
|
cvec.intoArray(short_arr, 0);
|
||||||
|
checkf2short(cvec.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkf2short(int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
short expected = (short)float_arr[i];
|
||||||
|
if (short_arr[i] != expected) {
|
||||||
|
throw new RuntimeException("Invalid result: short_arr[" + i + "] = " + short_arr[i] + " != " + expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"F2X", ">= 1"})
|
||||||
|
public void float2byte() {
|
||||||
|
var cvec = (ByteVector)fvec512.convertShape(VectorOperators.F2B, bspec128, 0);
|
||||||
|
cvec.intoArray(byte_arr, 0);
|
||||||
|
checkf2byte(cvec.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkf2byte(int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
byte expected = (byte)float_arr[i];
|
||||||
|
if (byte_arr[i] != expected) {
|
||||||
|
throw new RuntimeException("Invalid result: byte_arr[" + i + "] = " + byte_arr[i] + " != " + expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"D2X", ">= 1"})
|
||||||
|
public void double2int() {
|
||||||
|
var cvec = (IntVector)dvec512.convertShape(VectorOperators.D2I, ispec256, 0);
|
||||||
|
cvec.intoArray(int_arr, 0);
|
||||||
|
checkd2int(cvec.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkd2int(int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
int expected = (int)double_arr[i];
|
||||||
|
if (int_arr[i] != expected) {
|
||||||
|
throw new RuntimeException("Invalid result: int_arr[" + i + "] = " + int_arr[i] + " != " + expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"D2X", ">= 1"})
|
||||||
|
public void double2long() {
|
||||||
|
var cvec = (LongVector)dvec512.convertShape(VectorOperators.D2L, lspec512, 0);
|
||||||
|
cvec.intoArray(long_arr, 0);
|
||||||
|
checkd2long(cvec.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkd2long(int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
long expected = (long)double_arr[i];
|
||||||
|
if (long_arr[i] != expected) {
|
||||||
|
throw new RuntimeException("Invalid result: long_arr[" + i + "] = " + long_arr[i] + " != " + expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"D2X", ">= 1"})
|
||||||
|
public void double2short() {
|
||||||
|
var cvec = (ShortVector)dvec512.convertShape(VectorOperators.D2S, sspec128, 0);
|
||||||
|
cvec.intoArray(short_arr, 0);
|
||||||
|
checkd2short(cvec.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkd2short(int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
short expected = (short)double_arr[i];
|
||||||
|
if (short_arr[i] != expected) {
|
||||||
|
throw new RuntimeException("Invalid result: short_arr[" + i + "] = " + short_arr[i] + " != " + expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"D2X", ">= 1"})
|
||||||
|
public void double2byte() {
|
||||||
|
var cvec = (ByteVector)dvec512.convertShape(VectorOperators.D2B, bspec64, 0);
|
||||||
|
cvec.intoArray(byte_arr, 0);
|
||||||
|
checkd2byte(cvec.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkd2byte(int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
byte expected = (byte)double_arr[i];
|
||||||
|
if (byte_arr[i] != expected) {
|
||||||
|
throw new RuntimeException("Invalid result: byte_arr[" + i + "] = " + byte_arr[i] + " != " + expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.openjdk.bench.jdk.incubator.vector;
|
||||||
|
|
||||||
|
import jdk.incubator.vector.*;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
|
||||||
|
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||||
|
@State(Scope.Thread)
|
||||||
|
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||||
|
public class VectorFPtoIntCastOperations {
|
||||||
|
|
||||||
|
FloatVector fvec256;
|
||||||
|
FloatVector fvec512;
|
||||||
|
DoubleVector dvec512;
|
||||||
|
|
||||||
|
static final float [] float_arr = {
|
||||||
|
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
|
||||||
|
9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f
|
||||||
|
};
|
||||||
|
|
||||||
|
static final double [] double_arr = {
|
||||||
|
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
|
||||||
|
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0
|
||||||
|
};
|
||||||
|
|
||||||
|
@Setup(Level.Trial)
|
||||||
|
public void BmSetup() {
|
||||||
|
fvec256 = FloatVector.fromArray(FloatVector.SPECIES_256, float_arr, 0);
|
||||||
|
fvec512 = FloatVector.fromArray(FloatVector.SPECIES_512, float_arr, 0);
|
||||||
|
dvec512 = DoubleVector.fromArray(DoubleVector.SPECIES_512, double_arr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Vector microFloat2Int() {
|
||||||
|
return fvec512.convertShape(VectorOperators.F2I, IntVector.SPECIES_512, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Vector microFloat2Long() {
|
||||||
|
return fvec256.convertShape(VectorOperators.F2L, LongVector.SPECIES_512, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Vector microFloat2Short() {
|
||||||
|
return fvec512.convertShape(VectorOperators.F2S, ShortVector.SPECIES_256, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Vector microFloat2Byte() {
|
||||||
|
return fvec512.convertShape(VectorOperators.F2B, ByteVector.SPECIES_128, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Vector microDouble2Int() {
|
||||||
|
return dvec512.convertShape(VectorOperators.D2I, IntVector.SPECIES_256, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Vector microDouble2Long() {
|
||||||
|
return dvec512.convertShape(VectorOperators.D2L, LongVector.SPECIES_512, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Vector microDouble2Short() {
|
||||||
|
return dvec512.convertShape(VectorOperators.D2S, ShortVector.SPECIES_128, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Vector microDouble2Byte() {
|
||||||
|
return dvec512.convertShape(VectorOperators.D2B, ByteVector.SPECIES_64, 0);
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user