8279508: Auto-vectorize Math.round API
Reviewed-by: sviswanathan, aph
This commit is contained in:
parent
c1e67b6603
commit
003ec21f3c
@ -1995,6 +1995,13 @@ void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
|
|||||||
emit_int16(0x2C, (0xC0 | encode));
|
emit_int16(0x2C, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::cvtss2sil(Register dst, XMMRegister src) {
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse(), ""));
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int16(0x2D, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::cvttss2sil(Register dst, XMMRegister src) {
|
void Assembler::cvttss2sil(Register dst, XMMRegister src) {
|
||||||
NOT_LP64(assert(VM_Version::supports_sse(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse(), ""));
|
||||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||||
@ -2088,6 +2095,21 @@ void Assembler::vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
|
|||||||
emit_int16(0x5B, (0xC0 | encode));
|
emit_int16(0x5B, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
|
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
|
||||||
|
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int16(0x5B, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
|
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
||||||
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_is_evex_instruction();
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int16(0x7B, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
|
||||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
@ -6493,7 +6515,6 @@ void Assembler::vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vec
|
|||||||
emit_int8((rmode));
|
emit_int8((rmode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
|
void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
@ -12285,6 +12306,13 @@ void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
|
|||||||
emit_int16(0x2C, (0xC0 | encode));
|
emit_int16(0x2C, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::cvtsd2siq(Register dst, XMMRegister src) {
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int16(0x2D, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::cvttss2siq(Register dst, XMMRegister src) {
|
void Assembler::cvttss2siq(Register dst, XMMRegister src) {
|
||||||
NOT_LP64(assert(VM_Version::supports_sse(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse(), ""));
|
||||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||||
|
@ -1149,6 +1149,7 @@ private:
|
|||||||
void cvtss2sd(XMMRegister dst, Address src);
|
void cvtss2sd(XMMRegister dst, Address src);
|
||||||
|
|
||||||
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
|
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
|
||||||
|
void cvtsd2siq(Register dst, XMMRegister src);
|
||||||
void cvttsd2sil(Register dst, Address src);
|
void cvttsd2sil(Register dst, Address src);
|
||||||
void cvttsd2sil(Register dst, XMMRegister src);
|
void cvttsd2sil(Register dst, XMMRegister src);
|
||||||
void cvttsd2siq(Register dst, Address src);
|
void cvttsd2siq(Register dst, Address src);
|
||||||
@ -1157,6 +1158,7 @@ private:
|
|||||||
// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
|
// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
|
||||||
void cvttss2sil(Register dst, XMMRegister src);
|
void cvttss2sil(Register dst, XMMRegister src);
|
||||||
void cvttss2siq(Register dst, XMMRegister src);
|
void cvttss2siq(Register dst, XMMRegister src);
|
||||||
|
void cvtss2sil(Register dst, XMMRegister src);
|
||||||
|
|
||||||
// Convert vector double to int
|
// Convert vector double to int
|
||||||
void cvttpd2dq(XMMRegister dst, XMMRegister src);
|
void cvttpd2dq(XMMRegister dst, XMMRegister src);
|
||||||
@ -1166,6 +1168,7 @@ private:
|
|||||||
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Convert vector float and int
|
// Convert vector float and int
|
||||||
|
void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
|
void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Convert vector long to vector FP
|
// Convert vector long to vector FP
|
||||||
@ -1173,6 +1176,7 @@ private:
|
|||||||
void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
|
void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Convert vector double to long
|
// Convert vector double to long
|
||||||
|
void evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
|
void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Evex casts with truncation
|
// Evex casts with truncation
|
||||||
|
@ -4061,41 +4061,18 @@ void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Algorithm for vector D2L and F2I conversions:-
|
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
|
||||||
* a) Perform vector D2L/F2I cast.
|
* If src is NaN, the result is 0.
|
||||||
* b) Choose fast path if none of the result vector lane contains 0x80000000 value.
|
* If the src is negative infinity or any value less than or equal to the value of Integer.MIN_VALUE,
|
||||||
* It signifies that source value could be any of the special floating point
|
* the result is equal to the value of Integer.MIN_VALUE.
|
||||||
* values(NaN,-Inf,Inf,Max,-Min).
|
* If the src is positive infinity or any value greater than or equal to the value of Integer.MAX_VALUE,
|
||||||
* c) Set destination to zero if source is NaN value.
|
* the result is equal to the value of Integer.MAX_VALUE.
|
||||||
* d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
|
|
||||||
*/
|
*/
|
||||||
|
void C2_MacroAssembler::vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
||||||
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
Register scratch, AddressLiteral float_sign_flip,
|
||||||
Register scratch, int vec_enc) {
|
int vec_enc) {
|
||||||
Label done;
|
Label done;
|
||||||
evcvttpd2qq(dst, src, vec_enc);
|
|
||||||
evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
|
|
||||||
evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
|
|
||||||
kortestwl(ktmp1, ktmp1);
|
|
||||||
jccb(Assembler::equal, done);
|
|
||||||
|
|
||||||
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
|
|
||||||
evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
|
|
||||||
evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
|
|
||||||
|
|
||||||
kxorwl(ktmp1, ktmp1, ktmp2);
|
|
||||||
evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
|
|
||||||
vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
|
|
||||||
evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
|
|
||||||
bind(done);
|
|
||||||
}
|
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
|
||||||
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
|
||||||
AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
|
|
||||||
Label done;
|
|
||||||
vcvttps2dq(dst, src, vec_enc);
|
|
||||||
vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc);
|
vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc);
|
||||||
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
|
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
|
||||||
vptest(xtmp2, xtmp2, vec_enc);
|
vptest(xtmp2, xtmp2, vec_enc);
|
||||||
@ -4120,11 +4097,11 @@ void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMM
|
|||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
Register scratch, int vec_enc) {
|
Register scratch, AddressLiteral float_sign_flip,
|
||||||
|
int vec_enc) {
|
||||||
Label done;
|
Label done;
|
||||||
vcvttps2dq(dst, src, vec_enc);
|
|
||||||
evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, scratch);
|
evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, scratch);
|
||||||
Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
|
Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
|
||||||
kortestwl(ktmp1, ktmp1);
|
kortestwl(ktmp1, ktmp1);
|
||||||
@ -4141,6 +4118,115 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
|
|||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
|
||||||
|
* If src is NaN, the result is 0.
|
||||||
|
* If the src is negative infinity or any value less than or equal to the value of Long.MIN_VALUE,
|
||||||
|
* the result is equal to the value of Long.MIN_VALUE.
|
||||||
|
* If the src is positive infinity or any value greater than or equal to the value of Long.MAX_VALUE,
|
||||||
|
* the result is equal to the value of Long.MAX_VALUE.
|
||||||
|
*/
|
||||||
|
void C2_MacroAssembler::vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
|
||||||
|
Register scratch, AddressLiteral double_sign_flip,
|
||||||
|
int vec_enc) {
|
||||||
|
Label done;
|
||||||
|
evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
|
||||||
|
evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
|
||||||
|
kortestwl(ktmp1, ktmp1);
|
||||||
|
jccb(Assembler::equal, done);
|
||||||
|
|
||||||
|
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
|
||||||
|
evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
|
||||||
|
evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
|
||||||
|
|
||||||
|
kxorwl(ktmp1, ktmp1, ktmp2);
|
||||||
|
evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
|
||||||
|
vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
|
||||||
|
evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
|
||||||
|
bind(done);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Algorithm for vector D2L and F2I conversions:-
|
||||||
|
* a) Perform vector D2L/F2I cast.
|
||||||
|
* b) Choose fast path if none of the result vector lane contains 0x80000000 value.
|
||||||
|
* It signifies that source value could be any of the special floating point
|
||||||
|
* values(NaN,-Inf,Inf,Max,-Min).
|
||||||
|
* c) Set destination to zero if source is NaN value.
|
||||||
|
* d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
Register scratch, int vec_enc) {
|
||||||
|
evcvttpd2qq(dst, src, vec_enc);
|
||||||
|
vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
||||||
|
AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
|
||||||
|
vcvttps2dq(dst, src, vec_enc);
|
||||||
|
vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
||||||
|
Register scratch, int vec_enc) {
|
||||||
|
vcvttps2dq(dst, src, vec_enc);
|
||||||
|
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
|
||||||
|
// Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
|
||||||
|
// and re-instantiate original MXCSR.RC mode after that.
|
||||||
|
ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
|
||||||
|
ldmxcsr(new_mxcsr, scratch);
|
||||||
|
mov64(scratch, julong_cast(0.5L));
|
||||||
|
evpbroadcastq(xtmp1, scratch, vec_enc);
|
||||||
|
vaddpd(xtmp1, src , xtmp1, vec_enc);
|
||||||
|
evcvtpd2qq(dst, xtmp1, vec_enc);
|
||||||
|
vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
|
||||||
|
ldmxcsr(mxcsr_std, scratch);
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
||||||
|
AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
|
||||||
|
// Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
|
||||||
|
// and re-instantiate original MXCSR.RC mode after that.
|
||||||
|
ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
|
||||||
|
ldmxcsr(new_mxcsr, scratch);
|
||||||
|
movl(scratch, jint_cast(0.5));
|
||||||
|
movq(xtmp1, scratch);
|
||||||
|
vbroadcastss(xtmp1, xtmp1, vec_enc);
|
||||||
|
vaddps(xtmp1, src , xtmp1, vec_enc);
|
||||||
|
vcvtps2dq(dst, xtmp1, vec_enc);
|
||||||
|
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
|
||||||
|
ldmxcsr(mxcsr_std, scratch);
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip,
|
||||||
|
AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
|
||||||
|
// Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
|
||||||
|
// and re-instantiate original MXCSR.RC mode after that.
|
||||||
|
ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
|
||||||
|
ldmxcsr(new_mxcsr, scratch);
|
||||||
|
movl(scratch, jint_cast(0.5));
|
||||||
|
movq(xtmp1, scratch);
|
||||||
|
vbroadcastss(xtmp1, xtmp1, vec_enc);
|
||||||
|
vaddps(xtmp1, src , xtmp1, vec_enc);
|
||||||
|
vcvtps2dq(dst, xtmp1, vec_enc);
|
||||||
|
vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
|
||||||
|
ldmxcsr(mxcsr_std, scratch);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
|
void C2_MacroAssembler::vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
|
||||||
BasicType from_elem_bt, BasicType to_elem_bt) {
|
BasicType from_elem_bt, BasicType to_elem_bt) {
|
||||||
switch (from_elem_bt) {
|
switch (from_elem_bt) {
|
||||||
|
@ -303,6 +303,7 @@ public:
|
|||||||
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
|
||||||
Register scratch, int vec_enc);
|
Register scratch, int vec_enc);
|
||||||
|
|
||||||
|
|
||||||
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
Register scratch, int vec_enc);
|
Register scratch, int vec_enc);
|
||||||
@ -310,6 +311,33 @@ public:
|
|||||||
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
|
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
|
||||||
BasicType from_elem_bt, BasicType to_elem_bt);
|
BasicType from_elem_bt, BasicType to_elem_bt);
|
||||||
|
|
||||||
|
void vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral double_sign_flip,
|
||||||
|
int vec_enc);
|
||||||
|
|
||||||
|
void vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip,
|
||||||
|
int vec_enc);
|
||||||
|
|
||||||
|
void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
|
||||||
|
Register scratch, AddressLiteral float_sign_flip,
|
||||||
|
int vec_enc);
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
void vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
AddressLiteral new_mxcsr, Register scratch, int vec_enc);
|
||||||
|
|
||||||
|
void vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||||
|
AddressLiteral new_mxcsr, Register scratch, int vec_enc);
|
||||||
|
|
||||||
|
void vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||||
|
XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip,
|
||||||
|
AddressLiteral new_mxcsr, Register scratch, int vec_enc);
|
||||||
|
#endif
|
||||||
|
|
||||||
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
|
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
|
||||||
bool merge, BasicType bt, int vlen_enc);
|
bool merge, BasicType bt, int vlen_enc);
|
||||||
|
|
||||||
|
@ -2252,12 +2252,12 @@ void MacroAssembler::fld_x(AddressLiteral src) {
|
|||||||
Assembler::fld_x(as_Address(src));
|
Assembler::fld_x(as_Address(src));
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroAssembler::ldmxcsr(AddressLiteral src) {
|
void MacroAssembler::ldmxcsr(AddressLiteral src, Register scratchReg) {
|
||||||
if (reachable(src)) {
|
if (reachable(src)) {
|
||||||
Assembler::ldmxcsr(as_Address(src));
|
Assembler::ldmxcsr(as_Address(src));
|
||||||
} else {
|
} else {
|
||||||
lea(rscratch1, src);
|
lea(scratchReg, src);
|
||||||
Assembler::ldmxcsr(Address(rscratch1, 0));
|
Assembler::ldmxcsr(Address(scratchReg, 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -9120,6 +9120,80 @@ void MacroAssembler::convert_f2l(Register dst, XMMRegister src) {
|
|||||||
bind(done);
|
bind(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::round_float(Register dst, XMMRegister src, Register rtmp, Register rcx) {
|
||||||
|
// Following code is line by line assembly translation rounding algorithm.
|
||||||
|
// Please refer to java.lang.Math.round(float) algorithm for details.
|
||||||
|
const int32_t FloatConsts_EXP_BIT_MASK = 0x7F800000;
|
||||||
|
const int32_t FloatConsts_SIGNIFICAND_WIDTH = 24;
|
||||||
|
const int32_t FloatConsts_EXP_BIAS = 127;
|
||||||
|
const int32_t FloatConsts_SIGNIF_BIT_MASK = 0x007FFFFF;
|
||||||
|
const int32_t MINUS_32 = 0xFFFFFFE0;
|
||||||
|
Label L_special_case, L_block1, L_exit;
|
||||||
|
movl(rtmp, FloatConsts_EXP_BIT_MASK);
|
||||||
|
movdl(dst, src);
|
||||||
|
andl(dst, rtmp);
|
||||||
|
sarl(dst, FloatConsts_SIGNIFICAND_WIDTH - 1);
|
||||||
|
movl(rtmp, FloatConsts_SIGNIFICAND_WIDTH - 2 + FloatConsts_EXP_BIAS);
|
||||||
|
subl(rtmp, dst);
|
||||||
|
movl(rcx, rtmp);
|
||||||
|
movl(dst, MINUS_32);
|
||||||
|
testl(rtmp, dst);
|
||||||
|
jccb(Assembler::notEqual, L_special_case);
|
||||||
|
movdl(dst, src);
|
||||||
|
andl(dst, FloatConsts_SIGNIF_BIT_MASK);
|
||||||
|
orl(dst, FloatConsts_SIGNIF_BIT_MASK + 1);
|
||||||
|
movdl(rtmp, src);
|
||||||
|
testl(rtmp, rtmp);
|
||||||
|
jccb(Assembler::greaterEqual, L_block1);
|
||||||
|
negl(dst);
|
||||||
|
bind(L_block1);
|
||||||
|
sarl(dst);
|
||||||
|
addl(dst, 0x1);
|
||||||
|
sarl(dst, 0x1);
|
||||||
|
jmp(L_exit);
|
||||||
|
bind(L_special_case);
|
||||||
|
convert_f2i(dst, src);
|
||||||
|
bind(L_exit);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::round_double(Register dst, XMMRegister src, Register rtmp, Register rcx) {
|
||||||
|
// Following code is line by line assembly translation rounding algorithm.
|
||||||
|
// Please refer to java.lang.Math.round(double) algorithm for details.
|
||||||
|
const int64_t DoubleConsts_EXP_BIT_MASK = 0x7FF0000000000000L;
|
||||||
|
const int64_t DoubleConsts_SIGNIFICAND_WIDTH = 53;
|
||||||
|
const int64_t DoubleConsts_EXP_BIAS = 1023;
|
||||||
|
const int64_t DoubleConsts_SIGNIF_BIT_MASK = 0x000FFFFFFFFFFFFFL;
|
||||||
|
const int64_t MINUS_64 = 0xFFFFFFFFFFFFFFC0L;
|
||||||
|
Label L_special_case, L_block1, L_exit;
|
||||||
|
mov64(rtmp, DoubleConsts_EXP_BIT_MASK);
|
||||||
|
movq(dst, src);
|
||||||
|
andq(dst, rtmp);
|
||||||
|
sarq(dst, DoubleConsts_SIGNIFICAND_WIDTH - 1);
|
||||||
|
mov64(rtmp, DoubleConsts_SIGNIFICAND_WIDTH - 2 + DoubleConsts_EXP_BIAS);
|
||||||
|
subq(rtmp, dst);
|
||||||
|
movq(rcx, rtmp);
|
||||||
|
mov64(dst, MINUS_64);
|
||||||
|
testq(rtmp, dst);
|
||||||
|
jccb(Assembler::notEqual, L_special_case);
|
||||||
|
movq(dst, src);
|
||||||
|
mov64(rtmp, DoubleConsts_SIGNIF_BIT_MASK);
|
||||||
|
andq(dst, rtmp);
|
||||||
|
mov64(rtmp, DoubleConsts_SIGNIF_BIT_MASK + 1);
|
||||||
|
orq(dst, rtmp);
|
||||||
|
movq(rtmp, src);
|
||||||
|
testq(rtmp, rtmp);
|
||||||
|
jccb(Assembler::greaterEqual, L_block1);
|
||||||
|
negq(dst);
|
||||||
|
bind(L_block1);
|
||||||
|
sarq(dst);
|
||||||
|
addq(dst, 0x1);
|
||||||
|
sarq(dst, 0x1);
|
||||||
|
jmp(L_exit);
|
||||||
|
bind(L_special_case);
|
||||||
|
convert_d2l(dst, src);
|
||||||
|
bind(L_exit);
|
||||||
|
}
|
||||||
|
|
||||||
void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
|
void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
|
||||||
Label done;
|
Label done;
|
||||||
cvttsd2siq(dst, src);
|
cvttsd2siq(dst, src);
|
||||||
|
@ -906,7 +906,7 @@ public:
|
|||||||
void fld_x(AddressLiteral src);
|
void fld_x(AddressLiteral src);
|
||||||
|
|
||||||
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
|
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
|
||||||
void ldmxcsr(AddressLiteral src);
|
void ldmxcsr(AddressLiteral src, Register scratchReg = rscratch1);
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
private:
|
private:
|
||||||
@ -1994,6 +1994,8 @@ public:
|
|||||||
void convert_d2i(Register dst, XMMRegister src);
|
void convert_d2i(Register dst, XMMRegister src);
|
||||||
void convert_f2l(Register dst, XMMRegister src);
|
void convert_f2l(Register dst, XMMRegister src);
|
||||||
void convert_d2l(Register dst, XMMRegister src);
|
void convert_d2l(Register dst, XMMRegister src);
|
||||||
|
void round_double(Register dst, XMMRegister src, Register rtmp, Register rcx);
|
||||||
|
void round_float(Register dst, XMMRegister src, Register rtmp, Register rcx);
|
||||||
|
|
||||||
void cache_wb(Address line);
|
void cache_wb(Address line);
|
||||||
void cache_wbsync(bool is_pre);
|
void cache_wbsync(bool is_pre);
|
||||||
|
@ -1468,6 +1468,16 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Op_RoundVF:
|
||||||
|
if (UseAVX < 2) { // enabled for AVX2 only
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Op_RoundVD:
|
||||||
|
if (UseAVX < 3) {
|
||||||
|
return false; // enabled for AVX3 only
|
||||||
|
}
|
||||||
|
break;
|
||||||
case Op_CompareAndSwapL:
|
case Op_CompareAndSwapL:
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
case Op_CompareAndSwapP:
|
case Op_CompareAndSwapP:
|
||||||
@ -1572,6 +1582,12 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Op_RoundF:
|
||||||
|
case Op_RoundD:
|
||||||
|
if (!is_LP64) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case Op_CopySignD:
|
case Op_CopySignD:
|
||||||
case Op_CopySignF:
|
case Op_CopySignF:
|
||||||
if (UseAVX < 3 || !is_LP64) {
|
if (UseAVX < 3 || !is_LP64) {
|
||||||
@ -1817,6 +1833,11 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Op_RoundVD:
|
||||||
|
if (!VM_Version::supports_avx512dq()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case Op_VectorCastF2X:
|
case Op_VectorCastF2X:
|
||||||
if (is_subword_type(bt) || bt == T_LONG) {
|
if (is_subword_type(bt) || bt == T_LONG) {
|
||||||
return false;
|
return false;
|
||||||
@ -7173,13 +7194,14 @@ instruct vcastFtoD_reg(vec dst, vec src) %{
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
|
|
||||||
|
instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
|
||||||
predicate(!VM_Version::supports_avx512vl() &&
|
predicate(!VM_Version::supports_avx512vl() &&
|
||||||
Matcher::vector_length_in_bytes(n) < 64 &&
|
Matcher::vector_length_in_bytes(n) < 64 &&
|
||||||
Matcher::vector_element_basic_type(n) == T_INT);
|
Matcher::vector_element_basic_type(n) == T_INT);
|
||||||
match(Set dst (VectorCastF2X src));
|
match(Set dst (VectorCastF2X src));
|
||||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
|
||||||
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
|
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this);
|
int vlen_enc = vector_length_encoding(this);
|
||||||
__ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
__ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
@ -7189,13 +7211,13 @@ instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, ve
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vcastFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
||||||
predicate((VM_Version::supports_avx512vl() ||
|
predicate((VM_Version::supports_avx512vl() ||
|
||||||
Matcher::vector_length_in_bytes(n) == 64) &&
|
Matcher::vector_length_in_bytes(n) == 64) &&
|
||||||
Matcher::vector_element_basic_type(n) == T_INT);
|
Matcher::vector_element_basic_type(n) == T_INT);
|
||||||
match(Set dst (VectorCastF2X src));
|
match(Set dst (VectorCastF2X src));
|
||||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
||||||
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
|
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this);
|
int vlen_enc = vector_length_encoding(this);
|
||||||
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
@ -7216,11 +7238,11 @@ instruct vcastDtoF_reg(vec dst, vec src) %{
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vcastDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
||||||
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
|
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
|
||||||
match(Set dst (VectorCastD2X src));
|
match(Set dst (VectorCastD2X src));
|
||||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
||||||
format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
|
format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this);
|
int vlen_enc = vector_length_encoding(this);
|
||||||
__ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
__ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
@ -7246,6 +7268,56 @@ instruct vucast(vec dst, vec src) %{
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
instruct vround_float_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
|
||||||
|
predicate(!VM_Version::supports_avx512vl() &&
|
||||||
|
Matcher::vector_length_in_bytes(n) < 64 &&
|
||||||
|
Matcher::vector_element_basic_type(n) == T_INT);
|
||||||
|
match(Set dst (RoundVF src));
|
||||||
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
|
||||||
|
format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vlen_enc = vector_length_encoding(this);
|
||||||
|
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
|
||||||
|
__ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
|
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
|
||||||
|
ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vround_float_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
||||||
|
predicate((VM_Version::supports_avx512vl() ||
|
||||||
|
Matcher::vector_length_in_bytes(n) == 64) &&
|
||||||
|
Matcher::vector_element_basic_type(n) == T_INT);
|
||||||
|
match(Set dst (RoundVF src));
|
||||||
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
||||||
|
format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vlen_enc = vector_length_encoding(this);
|
||||||
|
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
|
||||||
|
__ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
|
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
|
||||||
|
ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vround_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
|
||||||
|
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
|
||||||
|
match(Set dst (RoundVD src));
|
||||||
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
|
||||||
|
format %{ "vector_round_long $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vlen_enc = vector_length_encoding(this);
|
||||||
|
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
|
||||||
|
__ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
|
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
|
||||||
|
ExternalAddress(vector_double_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
#endif
|
||||||
// --------------------------------- VectorMaskCmp --------------------------------------
|
// --------------------------------- VectorMaskCmp --------------------------------------
|
||||||
|
|
||||||
instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
|
instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
|
||||||
|
@ -10821,6 +10821,28 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
|
||||||
|
%{
|
||||||
|
match(Set dst (RoundD src));
|
||||||
|
effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
|
||||||
|
format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
|
||||||
|
ins_encode %{
|
||||||
|
__ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
|
||||||
|
%{
|
||||||
|
match(Set dst (RoundF src));
|
||||||
|
effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
|
||||||
|
format %{ "round_float $dst,$src" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
instruct convI2F_reg_reg(regF dst, rRegI src)
|
instruct convI2F_reg_reg(regF dst, rRegI src)
|
||||||
%{
|
%{
|
||||||
predicate(!UseXmmI2F);
|
predicate(!UseXmmI2F);
|
||||||
|
@ -4239,6 +4239,7 @@ bool MatchRule::is_vector() const {
|
|||||||
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
|
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
|
||||||
// Next are vector mask ops.
|
// Next are vector mask ops.
|
||||||
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
|
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
|
||||||
|
"RoundVF", "RoundVD",
|
||||||
// Next are not supported currently.
|
// Next are not supported currently.
|
||||||
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
|
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
|
||||||
"ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD"
|
"ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD"
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -134,6 +134,7 @@ class methodHandle;
|
|||||||
do_name(log_name,"log") do_name(log10_name,"log10") do_name(pow_name,"pow") \
|
do_name(log_name,"log") do_name(log10_name,"log10") do_name(pow_name,"pow") \
|
||||||
do_name(exp_name,"exp") do_name(min_name,"min") do_name(max_name,"max") \
|
do_name(exp_name,"exp") do_name(min_name,"min") do_name(max_name,"max") \
|
||||||
do_name(floor_name, "floor") do_name(ceil_name, "ceil") do_name(rint_name, "rint") \
|
do_name(floor_name, "floor") do_name(ceil_name, "ceil") do_name(rint_name, "rint") \
|
||||||
|
do_name(round_name, "round") \
|
||||||
\
|
\
|
||||||
do_name(addExact_name,"addExact") \
|
do_name(addExact_name,"addExact") \
|
||||||
do_name(decrementExact_name,"decrementExact") \
|
do_name(decrementExact_name,"decrementExact") \
|
||||||
@ -185,6 +186,8 @@ class methodHandle;
|
|||||||
do_intrinsic(_minF, java_lang_Math, min_name, float2_float_signature, F_S) \
|
do_intrinsic(_minF, java_lang_Math, min_name, float2_float_signature, F_S) \
|
||||||
do_intrinsic(_maxD, java_lang_Math, max_name, double2_double_signature, F_S) \
|
do_intrinsic(_maxD, java_lang_Math, max_name, double2_double_signature, F_S) \
|
||||||
do_intrinsic(_minD, java_lang_Math, min_name, double2_double_signature, F_S) \
|
do_intrinsic(_minD, java_lang_Math, min_name, double2_double_signature, F_S) \
|
||||||
|
do_intrinsic(_roundD, java_lang_Math, round_name, double_long_signature, F_S) \
|
||||||
|
do_intrinsic(_roundF, java_lang_Math, round_name, float_int_signature, F_S) \
|
||||||
do_intrinsic(_dcopySign, java_lang_Math, copySign_name, double2_double_signature, F_S) \
|
do_intrinsic(_dcopySign, java_lang_Math, copySign_name, double2_double_signature, F_S) \
|
||||||
do_intrinsic(_fcopySign, java_lang_Math, copySign_name, float2_float_signature, F_S) \
|
do_intrinsic(_fcopySign, java_lang_Math, copySign_name, float2_float_signature, F_S) \
|
||||||
do_intrinsic(_dsignum, java_lang_Math, signum_name, double_double_signature, F_S) \
|
do_intrinsic(_dsignum, java_lang_Math, signum_name, double_double_signature, F_S) \
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -523,6 +523,8 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
|||||||
case vmIntrinsics::_dlog:
|
case vmIntrinsics::_dlog:
|
||||||
case vmIntrinsics::_dlog10:
|
case vmIntrinsics::_dlog10:
|
||||||
case vmIntrinsics::_dpow:
|
case vmIntrinsics::_dpow:
|
||||||
|
case vmIntrinsics::_roundD:
|
||||||
|
case vmIntrinsics::_roundF:
|
||||||
case vmIntrinsics::_min:
|
case vmIntrinsics::_min:
|
||||||
case vmIntrinsics::_max:
|
case vmIntrinsics::_max:
|
||||||
case vmIntrinsics::_min_strict:
|
case vmIntrinsics::_min_strict:
|
||||||
|
@ -311,6 +311,8 @@ macro(SignumD)
|
|||||||
macro(SignumF)
|
macro(SignumF)
|
||||||
macro(SqrtD)
|
macro(SqrtD)
|
||||||
macro(SqrtF)
|
macro(SqrtF)
|
||||||
|
macro(RoundF)
|
||||||
|
macro(RoundD)
|
||||||
macro(Start)
|
macro(Start)
|
||||||
macro(StartOSR)
|
macro(StartOSR)
|
||||||
macro(StoreB)
|
macro(StoreB)
|
||||||
@ -446,6 +448,8 @@ macro(ReplicateI)
|
|||||||
macro(ReplicateL)
|
macro(ReplicateL)
|
||||||
macro(ReplicateF)
|
macro(ReplicateF)
|
||||||
macro(ReplicateD)
|
macro(ReplicateD)
|
||||||
|
macro(RoundVF)
|
||||||
|
macro(RoundVD)
|
||||||
macro(Extract)
|
macro(Extract)
|
||||||
macro(ExtractB)
|
macro(ExtractB)
|
||||||
macro(ExtractUB)
|
macro(ExtractUB)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -81,6 +81,14 @@ class ConvD2LNode : public Node {
|
|||||||
virtual uint ideal_reg() const { return Op_RegL; }
|
virtual uint ideal_reg() const { return Op_RegL; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RoundDNode : public Node {
|
||||||
|
public:
|
||||||
|
RoundDNode( Node *dbl ) : Node(0,dbl) {}
|
||||||
|
virtual int Opcode() const;
|
||||||
|
virtual const Type *bottom_type() const { return TypeLong::LONG; }
|
||||||
|
virtual uint ideal_reg() const { return Op_RegL; }
|
||||||
|
};
|
||||||
|
|
||||||
//------------------------------ConvF2DNode------------------------------------
|
//------------------------------ConvF2DNode------------------------------------
|
||||||
// Convert Float to a Double.
|
// Convert Float to a Double.
|
||||||
class ConvF2DNode : public Node {
|
class ConvF2DNode : public Node {
|
||||||
@ -105,6 +113,7 @@ class ConvF2INode : public Node {
|
|||||||
virtual uint ideal_reg() const { return Op_RegI; }
|
virtual uint ideal_reg() const { return Op_RegI; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
//------------------------------ConvF2LNode------------------------------------
|
//------------------------------ConvF2LNode------------------------------------
|
||||||
// Convert float to long
|
// Convert float to long
|
||||||
class ConvF2LNode : public Node {
|
class ConvF2LNode : public Node {
|
||||||
@ -141,6 +150,14 @@ class ConvI2FNode : public Node {
|
|||||||
virtual uint ideal_reg() const { return Op_RegF; }
|
virtual uint ideal_reg() const { return Op_RegF; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RoundFNode : public Node {
|
||||||
|
public:
|
||||||
|
RoundFNode( Node *in1 ) : Node(0,in1) {}
|
||||||
|
virtual int Opcode() const;
|
||||||
|
virtual const Type *bottom_type() const { return TypeInt::INT; }
|
||||||
|
virtual uint ideal_reg() const { return Op_RegI; }
|
||||||
|
};
|
||||||
|
|
||||||
//------------------------------ConvI2LNode------------------------------------
|
//------------------------------ConvI2LNode------------------------------------
|
||||||
// Convert integer to long
|
// Convert integer to long
|
||||||
class ConvI2LNode : public TypeNode {
|
class ConvI2LNode : public TypeNode {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -269,6 +269,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|||||||
case vmIntrinsics::_dcopySign:
|
case vmIntrinsics::_dcopySign:
|
||||||
case vmIntrinsics::_fcopySign:
|
case vmIntrinsics::_fcopySign:
|
||||||
case vmIntrinsics::_dsignum:
|
case vmIntrinsics::_dsignum:
|
||||||
|
case vmIntrinsics::_roundF:
|
||||||
|
case vmIntrinsics::_roundD:
|
||||||
case vmIntrinsics::_fsignum: return inline_math_native(intrinsic_id());
|
case vmIntrinsics::_fsignum: return inline_math_native(intrinsic_id());
|
||||||
|
|
||||||
case vmIntrinsics::_notify:
|
case vmIntrinsics::_notify:
|
||||||
@ -1605,6 +1607,7 @@ Node* LibraryCallKit::round_double_node(Node* n) {
|
|||||||
// public static double Math.sqrt(double)
|
// public static double Math.sqrt(double)
|
||||||
// public static double Math.log(double)
|
// public static double Math.log(double)
|
||||||
// public static double Math.log10(double)
|
// public static double Math.log10(double)
|
||||||
|
// public static double Math.round(double)
|
||||||
bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
|
bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
|
||||||
Node* arg = round_double_node(argument(0));
|
Node* arg = round_double_node(argument(0));
|
||||||
Node* n = NULL;
|
Node* n = NULL;
|
||||||
@ -1616,6 +1619,7 @@ bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
|
|||||||
case vmIntrinsics::_ceil: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_ceil); break;
|
case vmIntrinsics::_ceil: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_ceil); break;
|
||||||
case vmIntrinsics::_floor: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_floor); break;
|
case vmIntrinsics::_floor: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_floor); break;
|
||||||
case vmIntrinsics::_rint: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_rint); break;
|
case vmIntrinsics::_rint: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_rint); break;
|
||||||
|
case vmIntrinsics::_roundD: n = new RoundDNode(arg); break;
|
||||||
case vmIntrinsics::_dcopySign: n = CopySignDNode::make(_gvn, arg, round_double_node(argument(2))); break;
|
case vmIntrinsics::_dcopySign: n = CopySignDNode::make(_gvn, arg, round_double_node(argument(2))); break;
|
||||||
case vmIntrinsics::_dsignum: n = SignumDNode::make(_gvn, arg); break;
|
case vmIntrinsics::_dsignum: n = SignumDNode::make(_gvn, arg); break;
|
||||||
default: fatal_unexpected_iid(id); break;
|
default: fatal_unexpected_iid(id); break;
|
||||||
@ -1637,6 +1641,7 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
|
|||||||
case vmIntrinsics::_labs: n = new AbsLNode( arg); break;
|
case vmIntrinsics::_labs: n = new AbsLNode( arg); break;
|
||||||
case vmIntrinsics::_fcopySign: n = new CopySignFNode(arg, argument(1)); break;
|
case vmIntrinsics::_fcopySign: n = new CopySignFNode(arg, argument(1)); break;
|
||||||
case vmIntrinsics::_fsignum: n = SignumFNode::make(_gvn, arg); break;
|
case vmIntrinsics::_fsignum: n = SignumFNode::make(_gvn, arg); break;
|
||||||
|
case vmIntrinsics::_roundF: n = new RoundFNode(arg); break;
|
||||||
default: fatal_unexpected_iid(id); break;
|
default: fatal_unexpected_iid(id); break;
|
||||||
}
|
}
|
||||||
set_result(_gvn.transform(n));
|
set_result(_gvn.transform(n));
|
||||||
@ -1752,9 +1757,11 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
|||||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
|
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
|
||||||
|
|
||||||
// These intrinsics are supported on all hardware
|
// These intrinsics are supported on all hardware
|
||||||
|
case vmIntrinsics::_roundD: return Matcher::match_rule_supported(Op_RoundD) ? inline_double_math(id) : false;
|
||||||
case vmIntrinsics::_ceil:
|
case vmIntrinsics::_ceil:
|
||||||
case vmIntrinsics::_floor:
|
case vmIntrinsics::_floor:
|
||||||
case vmIntrinsics::_rint: return Matcher::match_rule_supported(Op_RoundDoubleMode) ? inline_double_math(id) : false;
|
case vmIntrinsics::_rint: return Matcher::match_rule_supported(Op_RoundDoubleMode) ? inline_double_math(id) : false;
|
||||||
|
|
||||||
case vmIntrinsics::_dsqrt:
|
case vmIntrinsics::_dsqrt:
|
||||||
case vmIntrinsics::_dsqrt_strict:
|
case vmIntrinsics::_dsqrt_strict:
|
||||||
return Matcher::match_rule_supported(Op_SqrtD) ? inline_double_math(id) : false;
|
return Matcher::match_rule_supported(Op_SqrtD) ? inline_double_math(id) : false;
|
||||||
@ -1774,6 +1781,7 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
|||||||
case vmIntrinsics::_fcopySign: return inline_math(id);
|
case vmIntrinsics::_fcopySign: return inline_math(id);
|
||||||
case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
|
case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
|
||||||
case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;
|
case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;
|
||||||
|
case vmIntrinsics::_roundF: return Matcher::match_rule_supported(Op_RoundF) ? inline_math(id) : false;
|
||||||
|
|
||||||
// These intrinsics are not yet correctly implemented
|
// These intrinsics are not yet correctly implemented
|
||||||
case vmIntrinsics::_datan2:
|
case vmIntrinsics::_datan2:
|
||||||
|
@ -970,6 +970,10 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
|
|||||||
case Op_ModL: body_size += 30; break;
|
case Op_ModL: body_size += 30; break;
|
||||||
case Op_DivL: body_size += 30; break;
|
case Op_DivL: body_size += 30; break;
|
||||||
case Op_MulL: body_size += 10; break;
|
case Op_MulL: body_size += 10; break;
|
||||||
|
case Op_RoundF: body_size += 30; break;
|
||||||
|
case Op_RoundD: body_size += 30; break;
|
||||||
|
case Op_RoundVF: body_size += 30; break;
|
||||||
|
case Op_RoundVD: body_size += 30; break;
|
||||||
case Op_PopCountVI:
|
case Op_PopCountVI:
|
||||||
case Op_PopCountVL: {
|
case Op_PopCountVL: {
|
||||||
const TypeVect* vt = n->bottom_type()->is_vect();
|
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||||
|
@ -2563,6 +2563,7 @@ bool SuperWord::output() {
|
|||||||
opc == Op_AbsF || opc == Op_AbsD ||
|
opc == Op_AbsF || opc == Op_AbsD ||
|
||||||
opc == Op_AbsI || opc == Op_AbsL ||
|
opc == Op_AbsI || opc == Op_AbsL ||
|
||||||
opc == Op_NegF || opc == Op_NegD ||
|
opc == Op_NegF || opc == Op_NegD ||
|
||||||
|
opc == Op_RoundF || opc == Op_RoundD ||
|
||||||
opc == Op_PopCountI || opc == Op_PopCountL) {
|
opc == Op_PopCountI || opc == Op_PopCountL) {
|
||||||
assert(n->req() == 2, "only one input expected");
|
assert(n->req() == 2, "only one input expected");
|
||||||
Node* in = vector_opd(p, 1);
|
Node* in = vector_opd(p, 1);
|
||||||
|
@ -157,6 +157,10 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
|||||||
return (bt == T_FLOAT ? Op_SqrtVF : 0);
|
return (bt == T_FLOAT ? Op_SqrtVF : 0);
|
||||||
case Op_SqrtD:
|
case Op_SqrtD:
|
||||||
return (bt == T_DOUBLE ? Op_SqrtVD : 0);
|
return (bt == T_DOUBLE ? Op_SqrtVD : 0);
|
||||||
|
case Op_RoundF:
|
||||||
|
return (bt == T_INT ? Op_RoundVF : 0);
|
||||||
|
case Op_RoundD:
|
||||||
|
return (bt == T_LONG ? Op_RoundVD : 0);
|
||||||
case Op_PopCountI:
|
case Op_PopCountI:
|
||||||
// Unimplemented for subword types since bit count changes
|
// Unimplemented for subword types since bit count changes
|
||||||
// depending on size of lane (and sign bit).
|
// depending on size of lane (and sign bit).
|
||||||
@ -585,6 +589,9 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
|
|||||||
case Op_SqrtVF: return new SqrtVFNode(n1, vt);
|
case Op_SqrtVF: return new SqrtVFNode(n1, vt);
|
||||||
case Op_SqrtVD: return new SqrtVDNode(n1, vt);
|
case Op_SqrtVD: return new SqrtVDNode(n1, vt);
|
||||||
|
|
||||||
|
case Op_RoundVF: return new RoundVFNode(n1, vt);
|
||||||
|
case Op_RoundVD: return new RoundVDNode(n1, vt);
|
||||||
|
|
||||||
case Op_PopCountVI: return new PopCountVINode(n1, vt);
|
case Op_PopCountVI: return new PopCountVINode(n1, vt);
|
||||||
case Op_PopCountVL: return new PopCountVLNode(n1, vt);
|
case Op_PopCountVL: return new PopCountVLNode(n1, vt);
|
||||||
case Op_RotateLeftV: return new RotateLeftVNode(n1, n2, vt);
|
case Op_RotateLeftV: return new RotateLeftVNode(n1, n2, vt);
|
||||||
|
@ -1544,6 +1544,14 @@ class VectorCastD2XNode : public VectorCastNode {
|
|||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RoundVFNode : public VectorNode {
|
||||||
|
public:
|
||||||
|
RoundVFNode(Node* in, const TypeVect* vt) :VectorNode(in, vt) {
|
||||||
|
assert(in->bottom_type()->is_vect()->element_basic_type() == T_FLOAT, "must be float");
|
||||||
|
}
|
||||||
|
virtual int Opcode() const;
|
||||||
|
};
|
||||||
|
|
||||||
class VectorUCastB2XNode : public VectorCastNode {
|
class VectorUCastB2XNode : public VectorCastNode {
|
||||||
public:
|
public:
|
||||||
VectorUCastB2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
VectorUCastB2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
||||||
@ -1552,6 +1560,14 @@ class VectorUCastB2XNode : public VectorCastNode {
|
|||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RoundVDNode : public VectorNode {
|
||||||
|
public:
|
||||||
|
RoundVDNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {
|
||||||
|
assert(in->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE, "must be double");
|
||||||
|
}
|
||||||
|
virtual int Opcode() const;
|
||||||
|
};
|
||||||
|
|
||||||
class VectorUCastS2XNode : public VectorCastNode {
|
class VectorUCastS2XNode : public VectorCastNode {
|
||||||
public:
|
public:
|
||||||
VectorUCastS2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
VectorUCastS2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
||||||
|
@ -753,6 +753,7 @@ public final class Math {
|
|||||||
* @see java.lang.Integer#MAX_VALUE
|
* @see java.lang.Integer#MAX_VALUE
|
||||||
* @see java.lang.Integer#MIN_VALUE
|
* @see java.lang.Integer#MIN_VALUE
|
||||||
*/
|
*/
|
||||||
|
@IntrinsicCandidate
|
||||||
public static int round(float a) {
|
public static int round(float a) {
|
||||||
int intBits = Float.floatToRawIntBits(a);
|
int intBits = Float.floatToRawIntBits(a);
|
||||||
int biasedExp = (intBits & FloatConsts.EXP_BIT_MASK)
|
int biasedExp = (intBits & FloatConsts.EXP_BIT_MASK)
|
||||||
@ -802,6 +803,7 @@ public final class Math {
|
|||||||
* @see java.lang.Long#MAX_VALUE
|
* @see java.lang.Long#MAX_VALUE
|
||||||
* @see java.lang.Long#MIN_VALUE
|
* @see java.lang.Long#MIN_VALUE
|
||||||
*/
|
*/
|
||||||
|
@IntrinsicCandidate
|
||||||
public static long round(double a) {
|
public static long round(double a) {
|
||||||
long longBits = Double.doubleToRawLongBits(a);
|
long longBits = Double.doubleToRawLongBits(a);
|
||||||
long biasedExp = (longBits & DoubleConsts.EXP_BIT_MASK)
|
long biasedExp = (longBits & DoubleConsts.EXP_BIT_MASK)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -44,14 +44,16 @@ public class TestDoubleVect {
|
|||||||
System.out.println("Testing Double vectors");
|
System.out.println("Testing Double vectors");
|
||||||
int errn = test();
|
int errn = test();
|
||||||
if (errn > 0) {
|
if (errn > 0) {
|
||||||
System.err.println("FAILED: " + errn + " errors");
|
System.err.println("FAILED: " + errn + " errors");
|
||||||
System.exit(97);
|
System.exit(97);
|
||||||
}
|
}
|
||||||
System.out.println("PASSED");
|
System.out.println("PASSED");
|
||||||
}
|
}
|
||||||
|
|
||||||
static int test() {
|
static int test() {
|
||||||
double[] a0 = new double[ARRLEN];
|
double[] a0 = new double[ARRLEN];
|
||||||
|
long [] l0 = new long[ARRLEN];
|
||||||
|
|
||||||
double[] a1 = new double[ARRLEN];
|
double[] a1 = new double[ARRLEN];
|
||||||
double[] a2 = new double[ARRLEN];
|
double[] a2 = new double[ARRLEN];
|
||||||
double[] a3 = new double[ARRLEN];
|
double[] a3 = new double[ARRLEN];
|
||||||
@ -91,6 +93,7 @@ public class TestDoubleVect {
|
|||||||
test_ceil(a0, a1);
|
test_ceil(a0, a1);
|
||||||
test_floor(a0, a1);
|
test_floor(a0, a1);
|
||||||
test_sqrt(a0, a1);
|
test_sqrt(a0, a1);
|
||||||
|
test_round(l0, a1);
|
||||||
}
|
}
|
||||||
// Test and verify results
|
// Test and verify results
|
||||||
System.out.println("Verification");
|
System.out.println("Verification");
|
||||||
@ -355,6 +358,7 @@ public class TestDoubleVect {
|
|||||||
errn += verify("test_negc: ", i, a0[i], (double)(-((double)(ADD_INIT+i))));
|
errn += verify("test_negc: ", i, a0[i], (double)(-((double)(ADD_INIT+i))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// To test -ve and +ve Zero scenarios.
|
// To test -ve and +ve Zero scenarios.
|
||||||
double [] other_corner_cases = { -0.0, 0.0, 9.007199254740992E15 };
|
double [] other_corner_cases = { -0.0, 0.0, 9.007199254740992E15 };
|
||||||
double [] other_corner_cases_res = new double[3];
|
double [] other_corner_cases_res = new double[3];
|
||||||
@ -421,6 +425,35 @@ public class TestDoubleVect {
|
|||||||
for (int i=8; i<ARRLEN; i++) {
|
for (int i=8; i<ARRLEN; i++) {
|
||||||
errn += verify("test_sqrt: ", i, a0[i], Math.sqrt((double)(ADD_INIT+i)));
|
errn += verify("test_sqrt: ", i, a0[i], Math.sqrt((double)(ADD_INIT+i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
a1[6] = +0x1.fffffffffffffp-2;
|
||||||
|
a1[7] = +0x1.0p-1;
|
||||||
|
a1[8] = +0x1.0000000000001p-1;
|
||||||
|
a1[9] = -0x1.fffffffffffffp-2;
|
||||||
|
a1[10] = -0x1.0p-1;
|
||||||
|
a1[11] = -0x1.0000000000001p-1;
|
||||||
|
a1[12] = 1.7976931348623157E19;
|
||||||
|
a1[13] = -1.7976931348623157E19;
|
||||||
|
|
||||||
|
test_round(l0, a1);
|
||||||
|
errn += verify("test_round: ", 0, l0[0], 0L);
|
||||||
|
errn += verify("test_round: ", 1, l0[1], Long.MAX_VALUE);
|
||||||
|
errn += verify("test_round: ", 2, l0[2], Long.MIN_VALUE);
|
||||||
|
errn += verify("test_round: ", 3, l0[3], Long.MAX_VALUE);
|
||||||
|
errn += verify("test_round: ", 4, l0[4], 0L);
|
||||||
|
errn += verify("test_round: ", 5, l0[5], 0L);
|
||||||
|
|
||||||
|
errn += verify("test_round: ", 6, l0[6], 0L);
|
||||||
|
errn += verify("test_round: ", 7, l0[7], 1L);
|
||||||
|
errn += verify("test_round: ", 8, l0[8], 1L);
|
||||||
|
errn += verify("test_round: ", 9, l0[9], 0L);
|
||||||
|
errn += verify("test_round: ", 10, l0[10], 0L);
|
||||||
|
errn += verify("test_round: ", 11, l0[11], -1L);
|
||||||
|
errn += verify("test_round: ", 12, l0[12], Long.MAX_VALUE);
|
||||||
|
errn += verify("test_round: ", 13, l0[13], Long.MIN_VALUE);
|
||||||
|
for (int i=14; i<ARRLEN; i++) {
|
||||||
|
errn += verify("test_round: ", i, l0[i], Math.round((double)(ADD_INIT+i)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (errn > 0)
|
if (errn > 0)
|
||||||
@ -564,6 +597,12 @@ public class TestDoubleVect {
|
|||||||
end = System.currentTimeMillis();
|
end = System.currentTimeMillis();
|
||||||
System.out.println("test_sqrt_n: " + (end - start));
|
System.out.println("test_sqrt_n: " + (end - start));
|
||||||
|
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
for (int i=0; i<ITERS; i++) {
|
||||||
|
test_round(l0, a1);
|
||||||
|
}
|
||||||
|
end = System.currentTimeMillis();
|
||||||
|
System.out.println("test_round_n: " + (end - start));
|
||||||
return errn;
|
return errn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -691,6 +730,20 @@ public class TestDoubleVect {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_round(long[] a0, double[] a1) {
|
||||||
|
for (int i = 0; i < a0.length; i+=1) {
|
||||||
|
a0[i] = Math.round(a1[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int verify(String text, int i, long elem, long val) {
|
||||||
|
if (elem != val) {
|
||||||
|
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int verify(String text, int i, double elem, double val) {
|
static int verify(String text, int i, double elem, double val) {
|
||||||
if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
|
if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
|
||||||
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -52,6 +52,7 @@ public class TestFloatVect {
|
|||||||
|
|
||||||
static int test() {
|
static int test() {
|
||||||
float[] a0 = new float[ARRLEN];
|
float[] a0 = new float[ARRLEN];
|
||||||
|
int[] i0 = new int[ARRLEN];
|
||||||
float[] a1 = new float[ARRLEN];
|
float[] a1 = new float[ARRLEN];
|
||||||
float[] a2 = new float[ARRLEN];
|
float[] a2 = new float[ARRLEN];
|
||||||
float[] a3 = new float[ARRLEN];
|
float[] a3 = new float[ARRLEN];
|
||||||
@ -88,7 +89,9 @@ public class TestFloatVect {
|
|||||||
test_diva(a0, a1, a3);
|
test_diva(a0, a1, a3);
|
||||||
test_negc(a0, a1);
|
test_negc(a0, a1);
|
||||||
test_sqrt(a0, a1);
|
test_sqrt(a0, a1);
|
||||||
|
test_round(i0, a1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test and verify results
|
// Test and verify results
|
||||||
System.out.println("Verification");
|
System.out.println("Verification");
|
||||||
int errn = 0;
|
int errn = 0;
|
||||||
@ -369,6 +372,35 @@ public class TestFloatVect {
|
|||||||
errn += verify("test_sqrt: ", i, a0[i], (float)(Math.sqrt((double)(ADD_INIT+i))));
|
errn += verify("test_sqrt: ", i, a0[i], (float)(Math.sqrt((double)(ADD_INIT+i))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
a1[6] = +0x1.fffffep-2f;
|
||||||
|
a1[7] = +0x1.0p-1f;
|
||||||
|
a1[8] = +0x1.000002p-1f;
|
||||||
|
a1[9] = -0x1.fffffep-2f;
|
||||||
|
a1[10] = -0x1.0p-1f;
|
||||||
|
a1[11] = -0x1.000002p-1f;
|
||||||
|
a1[12] = 3.4028235E10f;
|
||||||
|
a1[13] = -3.4028235E10f;
|
||||||
|
|
||||||
|
test_round(i0, a1);
|
||||||
|
errn += verify("test_round: ", 0, i0[0], 0);
|
||||||
|
errn += verify("test_round: ", 1, i0[1], Integer.MAX_VALUE);
|
||||||
|
errn += verify("test_round: ", 2, i0[2], Integer.MIN_VALUE);
|
||||||
|
errn += verify("test_round: ", 3, i0[3], Integer.MAX_VALUE);
|
||||||
|
errn += verify("test_round: ", 4, i0[4], 0);
|
||||||
|
errn += verify("test_round: ", 5, i0[5], 0);
|
||||||
|
errn += verify("test_round: ", 6, i0[6], 0);
|
||||||
|
errn += verify("test_round: ", 7, i0[7], 1);
|
||||||
|
errn += verify("test_round: ", 8, i0[8], 1);
|
||||||
|
errn += verify("test_round: ", 9, i0[9], 0);
|
||||||
|
errn += verify("test_round: ", 10, i0[10], 0);
|
||||||
|
errn += verify("test_round: ", 11, i0[11], -1);
|
||||||
|
errn += verify("test_round: ", 12, i0[12], Integer.MAX_VALUE);
|
||||||
|
errn += verify("test_round: ", 13, i0[13], Integer.MIN_VALUE);
|
||||||
|
|
||||||
|
for (int i=14; i<ARRLEN; i++) {
|
||||||
|
errn += verify("test_round: ", i, i0[i], Math.round(((float)(ADD_INIT+i))));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (errn > 0)
|
if (errn > 0)
|
||||||
@ -512,6 +544,12 @@ public class TestFloatVect {
|
|||||||
end = System.currentTimeMillis();
|
end = System.currentTimeMillis();
|
||||||
System.out.println("test_sqrt_n: " + (end - start));
|
System.out.println("test_sqrt_n: " + (end - start));
|
||||||
|
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
for (int i=0; i<ITERS; i++) {
|
||||||
|
test_round(i0, a1);
|
||||||
|
}
|
||||||
|
end = System.currentTimeMillis();
|
||||||
|
System.out.println("test_round_n: " + (end - start));
|
||||||
return errn;
|
return errn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -609,6 +647,20 @@ public class TestFloatVect {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_round(int[] a0, float[] a1) {
|
||||||
|
for (int i = 0; i < a0.length; i+=1) {
|
||||||
|
a0[i] = Math.round(a1[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int verify(String text, int i, int elem, int val) {
|
||||||
|
if (elem != val) {
|
||||||
|
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int verify(String text, int i, float elem, float val) {
|
static int verify(String text, int i, float elem, float val) {
|
||||||
if (elem != val && !(Float.isNaN(elem) && Float.isNaN(val))) {
|
if (elem != val && !(Float.isNaN(elem) && Float.isNaN(val))) {
|
||||||
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
||||||
|
@ -0,0 +1,72 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8279508
|
||||||
|
* @summary Auto-vectorize Math.round API
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @requires vm.cpu.features ~= ".*avx512dq.*"
|
||||||
|
* @requires os.simpleArch == "x64"
|
||||||
|
* @library /test/lib /
|
||||||
|
* @run driver compiler.vectorization.TestRoundVectDouble
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.vectorization;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
|
||||||
|
public class TestRoundVectDouble {
|
||||||
|
private static final int ARRLEN = 1024;
|
||||||
|
private static final int ITERS = 11000;
|
||||||
|
|
||||||
|
private static double [] dinp;
|
||||||
|
private static long [] lout;
|
||||||
|
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestFramework.runWithFlags("-XX:-TieredCompilation",
|
||||||
|
"-XX:UseAVX=3",
|
||||||
|
"-XX:CompileThresholdScaling=0.3");
|
||||||
|
System.out.println("PASSED");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(applyIf = {"UseAVX", "3"}, counts = {"RoundVD" , " > 0 "})
|
||||||
|
public void test_round_double(long[] lout, double[] dinp) {
|
||||||
|
for (int i = 0; i < lout.length; i+=1) {
|
||||||
|
lout[i] = Math.round(dinp[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_round_double"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_round_double() {
|
||||||
|
dinp = new double[ARRLEN];
|
||||||
|
lout = new long[ARRLEN];
|
||||||
|
for(int i = 0 ; i < ARRLEN; i++) {
|
||||||
|
dinp[i] = (double)i*1.4;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_round_double(lout , dinp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,71 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8279508
|
||||||
|
* @summary Auto-vectorize Math.round API
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @requires vm.cpu.features ~= ".*avx.*"
|
||||||
|
* @requires os.simpleArch == "x64"
|
||||||
|
* @library /test/lib /
|
||||||
|
* @run driver compiler.vectorization.TestRoundVectFloat
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.vectorization;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
|
||||||
|
public class TestRoundVectFloat {
|
||||||
|
private static final int ARRLEN = 1024;
|
||||||
|
private static final int ITERS = 11000;
|
||||||
|
private static float [] finp;
|
||||||
|
private static int [] iout;
|
||||||
|
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestFramework.runWithFlags("-XX:-TieredCompilation",
|
||||||
|
"-XX:UseAVX=1",
|
||||||
|
"-XX:CompileThresholdScaling=0.3");
|
||||||
|
System.out.println("PASSED");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(applyIf = {"UseAVX", " > 1"}, counts = {"RoundVF" , " > 0 "})
|
||||||
|
public void test_round_float(int[] iout, float[] finp) {
|
||||||
|
for (int i = 0; i < finp.length; i+=1) {
|
||||||
|
iout[i] = Math.round(finp[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_round_float"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_round() {
|
||||||
|
finp = new float[ARRLEN];
|
||||||
|
iout = new int[ARRLEN];
|
||||||
|
for(int i = 0 ; i < ARRLEN; i++) {
|
||||||
|
finp[i] = (float)i*1.4f;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_round_float(iout , finp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -25,20 +25,22 @@
|
|||||||
* @test
|
* @test
|
||||||
* @bug 6430675 8010430
|
* @bug 6430675 8010430
|
||||||
* @summary Check for correct implementation of {Math, StrictMath}.round
|
* @summary Check for correct implementation of {Math, StrictMath}.round
|
||||||
|
* @run main/othervm -XX:Tier3CompileThreshold=50 -XX:CompileThresholdScaling=0.01 -XX:+TieredCompilation RoundTests
|
||||||
*/
|
*/
|
||||||
public class RoundTests {
|
public class RoundTests {
|
||||||
public static void main(String... args) {
|
public static void main(String... args) {
|
||||||
int failures = 0;
|
int failures = 0;
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
failures += testNearFloatHalfCases();
|
||||||
|
failures += testNearDoubleHalfCases();
|
||||||
|
failures += testUnityULPCases();
|
||||||
|
failures += testSpecialCases();
|
||||||
|
|
||||||
failures += testNearFloatHalfCases();
|
if (failures > 0) {
|
||||||
failures += testNearDoubleHalfCases();
|
System.err.println("Testing {Math, StrictMath}.round incurred "
|
||||||
failures += testUnityULPCases();
|
+ failures + " failures.");
|
||||||
failures += testSpecialCases();
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
if (failures > 0) {
|
|
||||||
System.err.println("Testing {Math, StrictMath}.round incurred "
|
|
||||||
+ failures + " failures.");
|
|
||||||
throw new RuntimeException();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
|
// Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
//
|
//
|
||||||
// This code is free software; you can redistribute it and/or modify it
|
// This code is free software; you can redistribute it and/or modify it
|
||||||
@ -26,53 +26,97 @@ package org.openjdk.bench.java.math;
|
|||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import org.openjdk.jmh.annotations.*;
|
import org.openjdk.jmh.annotations.*;
|
||||||
import org.openjdk.jmh.infra.Blackhole;
|
|
||||||
|
|
||||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||||
@State(Scope.Thread)
|
@State(Scope.Thread)
|
||||||
public class FpRoundingBenchmark {
|
public class FpRoundingBenchmark {
|
||||||
|
|
||||||
@Param({"1024"})
|
@Param({"1024", "2048"})
|
||||||
public int TESTSIZE;
|
public int TESTSIZE;
|
||||||
|
|
||||||
public double[] DargV1;
|
public double[] DargV1;
|
||||||
|
public double[] ResD;
|
||||||
public double[] Res;
|
public long[] ResL;
|
||||||
|
public float[] FargV1;
|
||||||
|
public float[] ResF;
|
||||||
|
public int[] ResI;
|
||||||
|
|
||||||
public final double[] DspecialVals = {
|
public final double[] DspecialVals = {
|
||||||
0.0, -0.0, Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY};
|
0.0, -0.0, Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY,
|
||||||
|
Double.MAX_VALUE, -Double.MAX_VALUE, Double.MIN_VALUE, -Double.MIN_VALUE,
|
||||||
|
Double.MIN_NORMAL
|
||||||
|
};
|
||||||
|
|
||||||
|
public final float[] FspecialVals = {
|
||||||
|
0.0f, -0.0f, Float.NaN, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY,
|
||||||
|
Float.MAX_VALUE, -Float.MAX_VALUE, Float.MIN_VALUE, -Float.MIN_VALUE,
|
||||||
|
Float.MIN_NORMAL
|
||||||
|
};
|
||||||
|
|
||||||
@Setup(Level.Trial)
|
@Setup(Level.Trial)
|
||||||
public void BmSetup() {
|
public void BmSetup() {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
Random r = new Random(1024);
|
Random r = new Random(1024);
|
||||||
DargV1 = new double[TESTSIZE];
|
|
||||||
Res = new double[TESTSIZE];
|
|
||||||
|
|
||||||
for (; i < DspecialVals.length; i++) {
|
DargV1 = new double[TESTSIZE];
|
||||||
DargV1[i] = DspecialVals[i];
|
ResD = new double[TESTSIZE];
|
||||||
}
|
|
||||||
|
|
||||||
for (; i < TESTSIZE; i++) {
|
for (; i < DspecialVals.length; i++) {
|
||||||
DargV1[i] = r.nextDouble()*TESTSIZE;
|
DargV1[i] = DspecialVals[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (; i < TESTSIZE; i++) {
|
||||||
|
DargV1[i] = Double.longBitsToDouble(r.nextLong());;
|
||||||
|
}
|
||||||
|
|
||||||
|
FargV1 = new float[TESTSIZE];
|
||||||
|
ResF = new float[TESTSIZE];
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
for (; i < FspecialVals.length; i++) {
|
||||||
|
FargV1[i] = FspecialVals[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < TESTSIZE; i++) {
|
||||||
|
FargV1[i] = Float.intBitsToFloat(r.nextInt());
|
||||||
|
}
|
||||||
|
|
||||||
|
ResI = new int[TESTSIZE];
|
||||||
|
ResL = new long[TESTSIZE];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void testceil(Blackhole bh) {
|
public void test_ceil() {
|
||||||
for (int i = 0; i < TESTSIZE; i++)
|
for (int i = 0; i < TESTSIZE; i++) {
|
||||||
Res[i] = Math.ceil(DargV1[i]);
|
ResD[i] = Math.ceil(DargV1[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void testfloor(Blackhole bh) {
|
public void test_floor() {
|
||||||
for (int i = 0; i < TESTSIZE; i++)
|
for (int i = 0; i < TESTSIZE; i++) {
|
||||||
Res[i] = Math.floor(DargV1[i]);
|
ResD[i] = Math.floor(DargV1[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void testrint(Blackhole bh) {
|
public void test_rint() {
|
||||||
for (int i = 0; i < TESTSIZE; i++)
|
for (int i = 0; i < TESTSIZE; i++) {
|
||||||
Res[i] = Math.rint(DargV1[i]);
|
ResD[i] = Math.rint(DargV1[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void test_round_double() {
|
||||||
|
for (int i = 0; i < TESTSIZE; i++) {
|
||||||
|
ResL[i] = Math.round(DargV1[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void test_round_float() {
|
||||||
|
for (int i = 0; i < TESTSIZE; i++) {
|
||||||
|
ResI[i] = Math.round(FargV1[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user