8279508: Auto-vectorize Math.round API

Reviewed-by: sviswanathan, aph
This commit is contained in:
Jatin Bhateja 2022-04-02 18:00:33 +00:00
parent c1e67b6603
commit 003ec21f3c
25 changed files with 765 additions and 90 deletions

View File

@ -1995,6 +1995,13 @@ void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
emit_int16(0x2C, (0xC0 | encode));
}
void Assembler::cvtss2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int16(0x2D, (0xC0 | encode));
}
void Assembler::cvttss2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@ -2088,6 +2095,21 @@ void Assembler::vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x5B, (0xC0 | encode));
}
void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x5B, (0xC0 | encode));
}
void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x7B, (0xC0 | encode));
}
void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@ -6493,7 +6515,6 @@ void Assembler::vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vec
emit_int8((rmode));
}
void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@ -12285,6 +12306,13 @@ void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
emit_int16(0x2C, (0xC0 | encode));
}
void Assembler::cvtsd2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int16(0x2D, (0xC0 | encode));
}
void Assembler::cvttss2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);

View File

@ -1149,6 +1149,7 @@ private:
void cvtss2sd(XMMRegister dst, Address src);
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
void cvtsd2siq(Register dst, XMMRegister src);
void cvttsd2sil(Register dst, Address src);
void cvttsd2sil(Register dst, XMMRegister src);
void cvttsd2siq(Register dst, Address src);
@ -1157,6 +1158,7 @@ private:
// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
void cvtss2sil(Register dst, XMMRegister src);
// Convert vector double to int
void cvttpd2dq(XMMRegister dst, XMMRegister src);
@ -1166,6 +1168,7 @@ private:
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
// Convert vector float and int
void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
// Convert vector long to vector FP
@ -1173,6 +1176,7 @@ private:
void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
// Convert vector double to long
void evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
// Evex casts with truncation

View File

@ -4061,41 +4061,18 @@ void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
}
/*
* Algorithm for vector D2L and F2I conversions:-
* a) Perform vector D2L/F2I cast.
* b) Choose fast path if none of the result vector lane contains 0x80000000 value.
* It signifies that source value could be any of the special floating point
* values(NaN,-Inf,Inf,Max,-Min).
* c) Set destination to zero if source is NaN value.
* d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
* If src is NaN, the result is 0.
* If the src is negative infinity or any value less than or equal to the value of Integer.MIN_VALUE,
* the result is equal to the value of Integer.MIN_VALUE.
* If the src is positive infinity or any value greater than or equal to the value of Integer.MAX_VALUE,
* the result is equal to the value of Integer.MAX_VALUE.
*/
void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Register scratch, int vec_enc) {
Label done;
evcvttpd2qq(dst, src, vec_enc);
evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
kortestwl(ktmp1, ktmp1);
jccb(Assembler::equal, done);
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
kxorwl(ktmp1, ktmp1, ktmp2);
evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
bind(done);
}
void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
void C2_MacroAssembler::vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
Register scratch, AddressLiteral float_sign_flip,
int vec_enc) {
Label done;
vcvttps2dq(dst, src, vec_enc);
vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc);
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
vptest(xtmp2, xtmp2, vec_enc);
@ -4120,11 +4097,11 @@ void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMM
bind(done);
}
void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
Register scratch, int vec_enc) {
void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
Register scratch, AddressLiteral float_sign_flip,
int vec_enc) {
Label done;
vcvttps2dq(dst, src, vec_enc);
evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, scratch);
Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
kortestwl(ktmp1, ktmp1);
@ -4141,6 +4118,115 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
bind(done);
}
/*
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
* If src is NaN, the result is 0.
* If the src is negative infinity or any value less than or equal to the value of Long.MIN_VALUE,
* the result is equal to the value of Long.MIN_VALUE.
* If the src is positive infinity or any value greater than or equal to the value of Long.MAX_VALUE,
* the result is equal to the value of Long.MAX_VALUE.
*/
void C2_MacroAssembler::vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
Register scratch, AddressLiteral double_sign_flip,
int vec_enc) {
Label done;
evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
kortestwl(ktmp1, ktmp1);
jccb(Assembler::equal, done);
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
kxorwl(ktmp1, ktmp1, ktmp2);
evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
bind(done);
}
/*
* Algorithm for vector D2L and F2I conversions:-
* a) Perform vector D2L/F2I cast.
* b) Choose fast path if none of the result vector lane contains 0x80000000 value.
* It signifies that source value could be any of the special floating point
* values(NaN,-Inf,Inf,Max,-Min).
* c) Set destination to zero if source is NaN value.
* d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
*/
void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Register scratch, int vec_enc) {
evcvttpd2qq(dst, src, vec_enc);
vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
}
void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
vcvttps2dq(dst, src, vec_enc);
vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
}
void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
Register scratch, int vec_enc) {
vcvttps2dq(dst, src, vec_enc);
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
}
#ifdef _LP64
void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
// Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
// and re-instantiate original MXCSR.RC mode after that.
ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
ldmxcsr(new_mxcsr, scratch);
mov64(scratch, julong_cast(0.5L));
evpbroadcastq(xtmp1, scratch, vec_enc);
vaddpd(xtmp1, src , xtmp1, vec_enc);
evcvtpd2qq(dst, xtmp1, vec_enc);
vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
ldmxcsr(mxcsr_std, scratch);
}
void C2_MacroAssembler::vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
// Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
// and re-instantiate original MXCSR.RC mode after that.
ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
ldmxcsr(new_mxcsr, scratch);
movl(scratch, jint_cast(0.5));
movq(xtmp1, scratch);
vbroadcastss(xtmp1, xtmp1, vec_enc);
vaddps(xtmp1, src , xtmp1, vec_enc);
vcvtps2dq(dst, xtmp1, vec_enc);
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
ldmxcsr(mxcsr_std, scratch);
}
void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip,
AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
// Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
// and re-instantiate original MXCSR.RC mode after that.
ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
ldmxcsr(new_mxcsr, scratch);
movl(scratch, jint_cast(0.5));
movq(xtmp1, scratch);
vbroadcastss(xtmp1, xtmp1, vec_enc);
vaddps(xtmp1, src , xtmp1, vec_enc);
vcvtps2dq(dst, xtmp1, vec_enc);
vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
ldmxcsr(mxcsr_std, scratch);
}
#endif
void C2_MacroAssembler::vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
BasicType from_elem_bt, BasicType to_elem_bt) {
switch (from_elem_bt) {

View File

@ -303,6 +303,7 @@ public:
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
Register scratch, int vec_enc);
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Register scratch, int vec_enc);
@ -310,6 +311,33 @@ public:
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
BasicType from_elem_bt, BasicType to_elem_bt);
void vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral double_sign_flip,
int vec_enc);
void vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip,
int vec_enc);
void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
Register scratch, AddressLiteral float_sign_flip,
int vec_enc);
#ifdef _LP64
void vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
AddressLiteral new_mxcsr, Register scratch, int vec_enc);
void vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
AddressLiteral new_mxcsr, Register scratch, int vec_enc);
void vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip,
AddressLiteral new_mxcsr, Register scratch, int vec_enc);
#endif
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
bool merge, BasicType bt, int vlen_enc);

View File

@ -2252,12 +2252,12 @@ void MacroAssembler::fld_x(AddressLiteral src) {
Assembler::fld_x(as_Address(src));
}
void MacroAssembler::ldmxcsr(AddressLiteral src) {
void MacroAssembler::ldmxcsr(AddressLiteral src, Register scratchReg) {
if (reachable(src)) {
Assembler::ldmxcsr(as_Address(src));
} else {
lea(rscratch1, src);
Assembler::ldmxcsr(Address(rscratch1, 0));
lea(scratchReg, src);
Assembler::ldmxcsr(Address(scratchReg, 0));
}
}
@ -9120,6 +9120,80 @@ void MacroAssembler::convert_f2l(Register dst, XMMRegister src) {
bind(done);
}
void MacroAssembler::round_float(Register dst, XMMRegister src, Register rtmp, Register rcx) {
// Following code is line by line assembly translation rounding algorithm.
// Please refer to java.lang.Math.round(float) algorithm for details.
const int32_t FloatConsts_EXP_BIT_MASK = 0x7F800000;
const int32_t FloatConsts_SIGNIFICAND_WIDTH = 24;
const int32_t FloatConsts_EXP_BIAS = 127;
const int32_t FloatConsts_SIGNIF_BIT_MASK = 0x007FFFFF;
const int32_t MINUS_32 = 0xFFFFFFE0;
Label L_special_case, L_block1, L_exit;
movl(rtmp, FloatConsts_EXP_BIT_MASK);
movdl(dst, src);
andl(dst, rtmp);
sarl(dst, FloatConsts_SIGNIFICAND_WIDTH - 1);
movl(rtmp, FloatConsts_SIGNIFICAND_WIDTH - 2 + FloatConsts_EXP_BIAS);
subl(rtmp, dst);
movl(rcx, rtmp);
movl(dst, MINUS_32);
testl(rtmp, dst);
jccb(Assembler::notEqual, L_special_case);
movdl(dst, src);
andl(dst, FloatConsts_SIGNIF_BIT_MASK);
orl(dst, FloatConsts_SIGNIF_BIT_MASK + 1);
movdl(rtmp, src);
testl(rtmp, rtmp);
jccb(Assembler::greaterEqual, L_block1);
negl(dst);
bind(L_block1);
sarl(dst);
addl(dst, 0x1);
sarl(dst, 0x1);
jmp(L_exit);
bind(L_special_case);
convert_f2i(dst, src);
bind(L_exit);
}
void MacroAssembler::round_double(Register dst, XMMRegister src, Register rtmp, Register rcx) {
// Following code is line by line assembly translation rounding algorithm.
// Please refer to java.lang.Math.round(double) algorithm for details.
const int64_t DoubleConsts_EXP_BIT_MASK = 0x7FF0000000000000L;
const int64_t DoubleConsts_SIGNIFICAND_WIDTH = 53;
const int64_t DoubleConsts_EXP_BIAS = 1023;
const int64_t DoubleConsts_SIGNIF_BIT_MASK = 0x000FFFFFFFFFFFFFL;
const int64_t MINUS_64 = 0xFFFFFFFFFFFFFFC0L;
Label L_special_case, L_block1, L_exit;
mov64(rtmp, DoubleConsts_EXP_BIT_MASK);
movq(dst, src);
andq(dst, rtmp);
sarq(dst, DoubleConsts_SIGNIFICAND_WIDTH - 1);
mov64(rtmp, DoubleConsts_SIGNIFICAND_WIDTH - 2 + DoubleConsts_EXP_BIAS);
subq(rtmp, dst);
movq(rcx, rtmp);
mov64(dst, MINUS_64);
testq(rtmp, dst);
jccb(Assembler::notEqual, L_special_case);
movq(dst, src);
mov64(rtmp, DoubleConsts_SIGNIF_BIT_MASK);
andq(dst, rtmp);
mov64(rtmp, DoubleConsts_SIGNIF_BIT_MASK + 1);
orq(dst, rtmp);
movq(rtmp, src);
testq(rtmp, rtmp);
jccb(Assembler::greaterEqual, L_block1);
negq(dst);
bind(L_block1);
sarq(dst);
addq(dst, 0x1);
sarq(dst, 0x1);
jmp(L_exit);
bind(L_special_case);
convert_d2l(dst, src);
bind(L_exit);
}
void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
Label done;
cvttsd2siq(dst, src);

View File

@ -906,7 +906,7 @@ public:
void fld_x(AddressLiteral src);
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
void ldmxcsr(AddressLiteral src);
void ldmxcsr(AddressLiteral src, Register scratchReg = rscratch1);
#ifdef _LP64
private:
@ -1994,6 +1994,8 @@ public:
void convert_d2i(Register dst, XMMRegister src);
void convert_f2l(Register dst, XMMRegister src);
void convert_d2l(Register dst, XMMRegister src);
void round_double(Register dst, XMMRegister src, Register rtmp, Register rcx);
void round_float(Register dst, XMMRegister src, Register rtmp, Register rcx);
void cache_wb(Address line);
void cache_wbsync(bool is_pre);

View File

@ -1468,6 +1468,16 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_RoundVF:
if (UseAVX < 2) { // enabled for AVX2 only
return false;
}
break;
case Op_RoundVD:
if (UseAVX < 3) {
return false; // enabled for AVX3 only
}
break;
case Op_CompareAndSwapL:
#ifdef _LP64
case Op_CompareAndSwapP:
@ -1572,6 +1582,12 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_RoundF:
case Op_RoundD:
if (!is_LP64) {
return false;
}
break;
case Op_CopySignD:
case Op_CopySignF:
if (UseAVX < 3 || !is_LP64) {
@ -1817,6 +1833,11 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
case Op_RoundVD:
if (!VM_Version::supports_avx512dq()) {
return false;
}
break;
case Op_VectorCastF2X:
if (is_subword_type(bt) || bt == T_LONG) {
return false;
@ -7173,13 +7194,14 @@ instruct vcastFtoD_reg(vec dst, vec src) %{
ins_pipe( pipe_slow );
%}
instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512vl() &&
Matcher::vector_length_in_bytes(n) < 64 &&
Matcher::vector_element_basic_type(n) == T_INT);
match(Set dst (VectorCastF2X src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@ -7189,13 +7211,13 @@ instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, ve
ins_pipe( pipe_slow );
%}
instruct vcastFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
predicate((VM_Version::supports_avx512vl() ||
Matcher::vector_length_in_bytes(n) == 64) &&
Matcher::vector_element_basic_type(n) == T_INT);
match(Set dst (VectorCastF2X src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@ -7216,11 +7238,11 @@ instruct vcastDtoF_reg(vec dst, vec src) %{
ins_pipe( pipe_slow );
%}
instruct vcastDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
match(Set dst (VectorCastD2X src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@ -7246,6 +7268,56 @@ instruct vucast(vec dst, vec src) %{
ins_pipe( pipe_slow );
%}
#ifdef _LP64
instruct vround_float_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512vl() &&
Matcher::vector_length_in_bytes(n) < 64 &&
Matcher::vector_element_basic_type(n) == T_INT);
match(Set dst (RoundVF src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
__ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vround_float_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
predicate((VM_Version::supports_avx512vl() ||
Matcher::vector_length_in_bytes(n) == 64) &&
Matcher::vector_element_basic_type(n) == T_INT);
match(Set dst (RoundVF src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
__ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vround_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
match(Set dst (RoundVD src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
format %{ "vector_round_long $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
__ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
ExternalAddress(vector_double_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
#endif
// --------------------------------- VectorMaskCmp --------------------------------------
instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{

View File

@ -10821,6 +10821,28 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
ins_pipe(pipe_slow);
%}
instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
%{
match(Set dst (RoundD src));
effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
ins_encode %{
__ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
%{
match(Set dst (RoundF src));
effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
format %{ "round_float $dst,$src" %}
ins_encode %{
__ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct convI2F_reg_reg(regF dst, rRegI src)
%{
predicate(!UseXmmI2F);

View File

@ -4239,6 +4239,7 @@ bool MatchRule::is_vector() const {
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
// Next are vector mask ops.
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
"RoundVF", "RoundVD",
// Next are not supported currently.
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
"ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -134,6 +134,7 @@ class methodHandle;
do_name(log_name,"log") do_name(log10_name,"log10") do_name(pow_name,"pow") \
do_name(exp_name,"exp") do_name(min_name,"min") do_name(max_name,"max") \
do_name(floor_name, "floor") do_name(ceil_name, "ceil") do_name(rint_name, "rint") \
do_name(round_name, "round") \
\
do_name(addExact_name,"addExact") \
do_name(decrementExact_name,"decrementExact") \
@ -185,6 +186,8 @@ class methodHandle;
do_intrinsic(_minF, java_lang_Math, min_name, float2_float_signature, F_S) \
do_intrinsic(_maxD, java_lang_Math, max_name, double2_double_signature, F_S) \
do_intrinsic(_minD, java_lang_Math, min_name, double2_double_signature, F_S) \
do_intrinsic(_roundD, java_lang_Math, round_name, double_long_signature, F_S) \
do_intrinsic(_roundF, java_lang_Math, round_name, float_int_signature, F_S) \
do_intrinsic(_dcopySign, java_lang_Math, copySign_name, double2_double_signature, F_S) \
do_intrinsic(_fcopySign, java_lang_Math, copySign_name, float2_float_signature, F_S) \
do_intrinsic(_dsignum, java_lang_Math, signum_name, double_double_signature, F_S) \

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -523,6 +523,8 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dpow:
case vmIntrinsics::_roundD:
case vmIntrinsics::_roundF:
case vmIntrinsics::_min:
case vmIntrinsics::_max:
case vmIntrinsics::_min_strict:

View File

@ -311,6 +311,8 @@ macro(SignumD)
macro(SignumF)
macro(SqrtD)
macro(SqrtF)
macro(RoundF)
macro(RoundD)
macro(Start)
macro(StartOSR)
macro(StoreB)
@ -446,6 +448,8 @@ macro(ReplicateI)
macro(ReplicateL)
macro(ReplicateF)
macro(ReplicateD)
macro(RoundVF)
macro(RoundVD)
macro(Extract)
macro(ExtractB)
macro(ExtractUB)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -81,6 +81,14 @@ class ConvD2LNode : public Node {
virtual uint ideal_reg() const { return Op_RegL; }
};
class RoundDNode : public Node {
public:
RoundDNode( Node *dbl ) : Node(0,dbl) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
};
//------------------------------ConvF2DNode------------------------------------
// Convert Float to a Double.
class ConvF2DNode : public Node {
@ -105,6 +113,7 @@ class ConvF2INode : public Node {
virtual uint ideal_reg() const { return Op_RegI; }
};
//------------------------------ConvF2LNode------------------------------------
// Convert float to long
class ConvF2LNode : public Node {
@ -141,6 +150,14 @@ class ConvI2FNode : public Node {
virtual uint ideal_reg() const { return Op_RegF; }
};
class RoundFNode : public Node {
public:
RoundFNode( Node *in1 ) : Node(0,in1) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//------------------------------ConvI2LNode------------------------------------
// Convert integer to long
class ConvI2LNode : public TypeNode {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -269,6 +269,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_dcopySign:
case vmIntrinsics::_fcopySign:
case vmIntrinsics::_dsignum:
case vmIntrinsics::_roundF:
case vmIntrinsics::_roundD:
case vmIntrinsics::_fsignum: return inline_math_native(intrinsic_id());
case vmIntrinsics::_notify:
@ -1605,6 +1607,7 @@ Node* LibraryCallKit::round_double_node(Node* n) {
// public static double Math.sqrt(double)
// public static double Math.log(double)
// public static double Math.log10(double)
// public static double Math.round(double)
bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
Node* arg = round_double_node(argument(0));
Node* n = NULL;
@ -1616,6 +1619,7 @@ bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
case vmIntrinsics::_ceil: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_ceil); break;
case vmIntrinsics::_floor: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_floor); break;
case vmIntrinsics::_rint: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_rint); break;
case vmIntrinsics::_roundD: n = new RoundDNode(arg); break;
case vmIntrinsics::_dcopySign: n = CopySignDNode::make(_gvn, arg, round_double_node(argument(2))); break;
case vmIntrinsics::_dsignum: n = SignumDNode::make(_gvn, arg); break;
default: fatal_unexpected_iid(id); break;
@ -1637,6 +1641,7 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
case vmIntrinsics::_labs: n = new AbsLNode( arg); break;
case vmIntrinsics::_fcopySign: n = new CopySignFNode(arg, argument(1)); break;
case vmIntrinsics::_fsignum: n = SignumFNode::make(_gvn, arg); break;
case vmIntrinsics::_roundF: n = new RoundFNode(arg); break;
default: fatal_unexpected_iid(id); break;
}
set_result(_gvn.transform(n));
@ -1752,9 +1757,11 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
// These intrinsics are supported on all hardware
case vmIntrinsics::_roundD: return Matcher::match_rule_supported(Op_RoundD) ? inline_double_math(id) : false;
case vmIntrinsics::_ceil:
case vmIntrinsics::_floor:
case vmIntrinsics::_rint: return Matcher::match_rule_supported(Op_RoundDoubleMode) ? inline_double_math(id) : false;
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dsqrt_strict:
return Matcher::match_rule_supported(Op_SqrtD) ? inline_double_math(id) : false;
@ -1774,6 +1781,7 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
case vmIntrinsics::_fcopySign: return inline_math(id);
case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;
case vmIntrinsics::_roundF: return Matcher::match_rule_supported(Op_RoundF) ? inline_math(id) : false;
// These intrinsics are not yet correctly implemented
case vmIntrinsics::_datan2:

View File

@ -970,6 +970,10 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
case Op_ModL: body_size += 30; break;
case Op_DivL: body_size += 30; break;
case Op_MulL: body_size += 10; break;
case Op_RoundF: body_size += 30; break;
case Op_RoundD: body_size += 30; break;
case Op_RoundVF: body_size += 30; break;
case Op_RoundVD: body_size += 30; break;
case Op_PopCountVI:
case Op_PopCountVL: {
const TypeVect* vt = n->bottom_type()->is_vect();

View File

@ -2563,6 +2563,7 @@ bool SuperWord::output() {
opc == Op_AbsF || opc == Op_AbsD ||
opc == Op_AbsI || opc == Op_AbsL ||
opc == Op_NegF || opc == Op_NegD ||
opc == Op_RoundF || opc == Op_RoundD ||
opc == Op_PopCountI || opc == Op_PopCountL) {
assert(n->req() == 2, "only one input expected");
Node* in = vector_opd(p, 1);

View File

@ -157,6 +157,10 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return (bt == T_FLOAT ? Op_SqrtVF : 0);
case Op_SqrtD:
return (bt == T_DOUBLE ? Op_SqrtVD : 0);
case Op_RoundF:
return (bt == T_INT ? Op_RoundVF : 0);
case Op_RoundD:
return (bt == T_LONG ? Op_RoundVD : 0);
case Op_PopCountI:
// Unimplemented for subword types since bit count changes
// depending on size of lane (and sign bit).
@ -585,6 +589,9 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
case Op_SqrtVF: return new SqrtVFNode(n1, vt);
case Op_SqrtVD: return new SqrtVDNode(n1, vt);
case Op_RoundVF: return new RoundVFNode(n1, vt);
case Op_RoundVD: return new RoundVDNode(n1, vt);
case Op_PopCountVI: return new PopCountVINode(n1, vt);
case Op_PopCountVL: return new PopCountVLNode(n1, vt);
case Op_RotateLeftV: return new RotateLeftVNode(n1, n2, vt);

View File

@ -1544,6 +1544,14 @@ class VectorCastD2XNode : public VectorCastNode {
virtual int Opcode() const;
};
class RoundVFNode : public VectorNode {
public:
RoundVFNode(Node* in, const TypeVect* vt) :VectorNode(in, vt) {
assert(in->bottom_type()->is_vect()->element_basic_type() == T_FLOAT, "must be float");
}
virtual int Opcode() const;
};
class VectorUCastB2XNode : public VectorCastNode {
public:
VectorUCastB2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
@ -1552,6 +1560,14 @@ class VectorUCastB2XNode : public VectorCastNode {
virtual int Opcode() const;
};
class RoundVDNode : public VectorNode {
public:
RoundVDNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {
assert(in->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE, "must be double");
}
virtual int Opcode() const;
};
class VectorUCastS2XNode : public VectorCastNode {
public:
VectorUCastS2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {

View File

@ -753,6 +753,7 @@ public final class Math {
* @see java.lang.Integer#MAX_VALUE
* @see java.lang.Integer#MIN_VALUE
*/
@IntrinsicCandidate
public static int round(float a) {
int intBits = Float.floatToRawIntBits(a);
int biasedExp = (intBits & FloatConsts.EXP_BIT_MASK)
@ -802,6 +803,7 @@ public final class Math {
* @see java.lang.Long#MAX_VALUE
* @see java.lang.Long#MIN_VALUE
*/
@IntrinsicCandidate
public static long round(double a) {
long longBits = Double.doubleToRawLongBits(a);
long biasedExp = (longBits & DoubleConsts.EXP_BIT_MASK)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -52,6 +52,8 @@ public class TestDoubleVect {
static int test() {
double[] a0 = new double[ARRLEN];
long [] l0 = new long[ARRLEN];
double[] a1 = new double[ARRLEN];
double[] a2 = new double[ARRLEN];
double[] a3 = new double[ARRLEN];
@ -91,6 +93,7 @@ public class TestDoubleVect {
test_ceil(a0, a1);
test_floor(a0, a1);
test_sqrt(a0, a1);
test_round(l0, a1);
}
// Test and verify results
System.out.println("Verification");
@ -355,6 +358,7 @@ public class TestDoubleVect {
errn += verify("test_negc: ", i, a0[i], (double)(-((double)(ADD_INIT+i))));
}
// To test -ve and +ve Zero scenarios.
double [] other_corner_cases = { -0.0, 0.0, 9.007199254740992E15 };
double [] other_corner_cases_res = new double[3];
@ -421,6 +425,35 @@ public class TestDoubleVect {
for (int i=8; i<ARRLEN; i++) {
errn += verify("test_sqrt: ", i, a0[i], Math.sqrt((double)(ADD_INIT+i)));
}
a1[6] = +0x1.fffffffffffffp-2;
a1[7] = +0x1.0p-1;
a1[8] = +0x1.0000000000001p-1;
a1[9] = -0x1.fffffffffffffp-2;
a1[10] = -0x1.0p-1;
a1[11] = -0x1.0000000000001p-1;
a1[12] = 1.7976931348623157E19;
a1[13] = -1.7976931348623157E19;
test_round(l0, a1);
errn += verify("test_round: ", 0, l0[0], 0L);
errn += verify("test_round: ", 1, l0[1], Long.MAX_VALUE);
errn += verify("test_round: ", 2, l0[2], Long.MIN_VALUE);
errn += verify("test_round: ", 3, l0[3], Long.MAX_VALUE);
errn += verify("test_round: ", 4, l0[4], 0L);
errn += verify("test_round: ", 5, l0[5], 0L);
errn += verify("test_round: ", 6, l0[6], 0L);
errn += verify("test_round: ", 7, l0[7], 1L);
errn += verify("test_round: ", 8, l0[8], 1L);
errn += verify("test_round: ", 9, l0[9], 0L);
errn += verify("test_round: ", 10, l0[10], 0L);
errn += verify("test_round: ", 11, l0[11], -1L);
errn += verify("test_round: ", 12, l0[12], Long.MAX_VALUE);
errn += verify("test_round: ", 13, l0[13], Long.MIN_VALUE);
for (int i=14; i<ARRLEN; i++) {
errn += verify("test_round: ", i, l0[i], Math.round((double)(ADD_INIT+i)));
}
}
if (errn > 0)
@ -564,6 +597,12 @@ public class TestDoubleVect {
end = System.currentTimeMillis();
System.out.println("test_sqrt_n: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_round(l0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_round_n: " + (end - start));
return errn;
}
@ -691,6 +730,20 @@ public class TestDoubleVect {
}
}
static void test_round(long[] a0, double[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = Math.round(a1[i]);
}
}
static int verify(String text, int i, long elem, long val) {
if (elem != val) {
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
return 1;
}
return 0;
}
static int verify(String text, int i, double elem, double val) {
if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
System.err.println(text + "[" + i + "] = " + elem + " != " + val);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -52,6 +52,7 @@ public class TestFloatVect {
static int test() {
float[] a0 = new float[ARRLEN];
int[] i0 = new int[ARRLEN];
float[] a1 = new float[ARRLEN];
float[] a2 = new float[ARRLEN];
float[] a3 = new float[ARRLEN];
@ -88,7 +89,9 @@ public class TestFloatVect {
test_diva(a0, a1, a3);
test_negc(a0, a1);
test_sqrt(a0, a1);
test_round(i0, a1);
}
// Test and verify results
System.out.println("Verification");
int errn = 0;
@ -369,6 +372,35 @@ public class TestFloatVect {
errn += verify("test_sqrt: ", i, a0[i], (float)(Math.sqrt((double)(ADD_INIT+i))));
}
a1[6] = +0x1.fffffep-2f;
a1[7] = +0x1.0p-1f;
a1[8] = +0x1.000002p-1f;
a1[9] = -0x1.fffffep-2f;
a1[10] = -0x1.0p-1f;
a1[11] = -0x1.000002p-1f;
a1[12] = 3.4028235E10f;
a1[13] = -3.4028235E10f;
test_round(i0, a1);
errn += verify("test_round: ", 0, i0[0], 0);
errn += verify("test_round: ", 1, i0[1], Integer.MAX_VALUE);
errn += verify("test_round: ", 2, i0[2], Integer.MIN_VALUE);
errn += verify("test_round: ", 3, i0[3], Integer.MAX_VALUE);
errn += verify("test_round: ", 4, i0[4], 0);
errn += verify("test_round: ", 5, i0[5], 0);
errn += verify("test_round: ", 6, i0[6], 0);
errn += verify("test_round: ", 7, i0[7], 1);
errn += verify("test_round: ", 8, i0[8], 1);
errn += verify("test_round: ", 9, i0[9], 0);
errn += verify("test_round: ", 10, i0[10], 0);
errn += verify("test_round: ", 11, i0[11], -1);
errn += verify("test_round: ", 12, i0[12], Integer.MAX_VALUE);
errn += verify("test_round: ", 13, i0[13], Integer.MIN_VALUE);
for (int i=14; i<ARRLEN; i++) {
errn += verify("test_round: ", i, i0[i], Math.round(((float)(ADD_INIT+i))));
}
}
if (errn > 0)
@ -512,6 +544,12 @@ public class TestFloatVect {
end = System.currentTimeMillis();
System.out.println("test_sqrt_n: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_round(i0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_round_n: " + (end - start));
return errn;
}
@ -609,6 +647,20 @@ public class TestFloatVect {
}
}
static void test_round(int[] a0, float[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = Math.round(a1[i]);
}
}
static int verify(String text, int i, int elem, int val) {
if (elem != val) {
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
return 1;
}
return 0;
}
static int verify(String text, int i, float elem, float val) {
if (elem != val && !(Float.isNaN(elem) && Float.isNaN(val))) {
System.err.println(text + "[" + i + "] = " + elem + " != " + val);

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8279508
* @summary Auto-vectorize Math.round API
* @requires vm.compiler2.enabled
* @requires vm.cpu.features ~= ".*avx512dq.*"
* @requires os.simpleArch == "x64"
* @library /test/lib /
* @run driver compiler.vectorization.TestRoundVectDouble
*/
package compiler.vectorization;
import compiler.lib.ir_framework.*;
public class TestRoundVectDouble {
private static final int ARRLEN = 1024;
private static final int ITERS = 11000;
private static double [] dinp;
private static long [] lout;
public static void main(String args[]) {
TestFramework.runWithFlags("-XX:-TieredCompilation",
"-XX:UseAVX=3",
"-XX:CompileThresholdScaling=0.3");
System.out.println("PASSED");
}
@Test
@IR(applyIf = {"UseAVX", "3"}, counts = {"RoundVD" , " > 0 "})
public void test_round_double(long[] lout, double[] dinp) {
for (int i = 0; i < lout.length; i+=1) {
lout[i] = Math.round(dinp[i]);
}
}
@Run(test = {"test_round_double"}, mode = RunMode.STANDALONE)
public void kernel_test_round_double() {
dinp = new double[ARRLEN];
lout = new long[ARRLEN];
for(int i = 0 ; i < ARRLEN; i++) {
dinp[i] = (double)i*1.4;
}
for (int i = 0; i < ITERS; i++) {
test_round_double(lout , dinp);
}
}
}

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8279508
* @summary Auto-vectorize Math.round API
* @requires vm.compiler2.enabled
* @requires vm.cpu.features ~= ".*avx.*"
* @requires os.simpleArch == "x64"
* @library /test/lib /
* @run driver compiler.vectorization.TestRoundVectFloat
*/
package compiler.vectorization;
import compiler.lib.ir_framework.*;
public class TestRoundVectFloat {
private static final int ARRLEN = 1024;
private static final int ITERS = 11000;
private static float [] finp;
private static int [] iout;
public static void main(String args[]) {
TestFramework.runWithFlags("-XX:-TieredCompilation",
"-XX:UseAVX=1",
"-XX:CompileThresholdScaling=0.3");
System.out.println("PASSED");
}
@Test
@IR(applyIf = {"UseAVX", " > 1"}, counts = {"RoundVF" , " > 0 "})
public void test_round_float(int[] iout, float[] finp) {
for (int i = 0; i < finp.length; i+=1) {
iout[i] = Math.round(finp[i]);
}
}
@Run(test = {"test_round_float"}, mode = RunMode.STANDALONE)
public void kernel_test_round() {
finp = new float[ARRLEN];
iout = new int[ARRLEN];
for(int i = 0 ; i < ARRLEN; i++) {
finp[i] = (float)i*1.4f;
}
for (int i = 0; i < ITERS; i++) {
test_round_float(iout , finp);
}
}
}

View File

@ -25,11 +25,12 @@
* @test
* @bug 6430675 8010430
* @summary Check for correct implementation of {Math, StrictMath}.round
* @run main/othervm -XX:Tier3CompileThreshold=50 -XX:CompileThresholdScaling=0.01 -XX:+TieredCompilation RoundTests
*/
public class RoundTests {
public static void main(String... args) {
int failures = 0;
for (int i = 0; i < 500; i++) {
failures += testNearFloatHalfCases();
failures += testNearDoubleHalfCases();
failures += testUnityULPCases();
@ -41,6 +42,7 @@ public class RoundTests {
throw new RuntimeException();
}
}
}
private static int testNearDoubleHalfCases() {
int failures = 0;

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -26,53 +26,97 @@ package org.openjdk.bench.java.math;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
public class FpRoundingBenchmark {
@Param({"1024"})
@Param({"1024", "2048"})
public int TESTSIZE;
public double[] DargV1;
public double[] Res;
public double[] ResD;
public long[] ResL;
public float[] FargV1;
public float[] ResF;
public int[] ResI;
public final double[] DspecialVals = {
0.0, -0.0, Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY};
0.0, -0.0, Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY,
Double.MAX_VALUE, -Double.MAX_VALUE, Double.MIN_VALUE, -Double.MIN_VALUE,
Double.MIN_NORMAL
};
public final float[] FspecialVals = {
0.0f, -0.0f, Float.NaN, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY,
Float.MAX_VALUE, -Float.MAX_VALUE, Float.MIN_VALUE, -Float.MIN_VALUE,
Float.MIN_NORMAL
};
@Setup(Level.Trial)
public void BmSetup() {
int i = 0;
Random r = new Random(1024);
DargV1 = new double[TESTSIZE];
Res = new double[TESTSIZE];
ResD = new double[TESTSIZE];
for (; i < DspecialVals.length; i++) {
DargV1[i] = DspecialVals[i];
}
for (; i < TESTSIZE; i++) {
DargV1[i] = r.nextDouble()*TESTSIZE;
DargV1[i] = Double.longBitsToDouble(r.nextLong());;
}
FargV1 = new float[TESTSIZE];
ResF = new float[TESTSIZE];
i = 0;
for (; i < FspecialVals.length; i++) {
FargV1[i] = FspecialVals[i];
}
for (; i < TESTSIZE; i++) {
FargV1[i] = Float.intBitsToFloat(r.nextInt());
}
ResI = new int[TESTSIZE];
ResL = new long[TESTSIZE];
}
@Benchmark
public void test_ceil() {
for (int i = 0; i < TESTSIZE; i++) {
ResD[i] = Math.ceil(DargV1[i]);
}
}
@Benchmark
public void testceil(Blackhole bh) {
for (int i = 0; i < TESTSIZE; i++)
Res[i] = Math.ceil(DargV1[i]);
public void test_floor() {
for (int i = 0; i < TESTSIZE; i++) {
ResD[i] = Math.floor(DargV1[i]);
}
}
@Benchmark
public void testfloor(Blackhole bh) {
for (int i = 0; i < TESTSIZE; i++)
Res[i] = Math.floor(DargV1[i]);
public void test_rint() {
for (int i = 0; i < TESTSIZE; i++) {
ResD[i] = Math.rint(DargV1[i]);
}
}
@Benchmark
public void testrint(Blackhole bh) {
for (int i = 0; i < TESTSIZE; i++)
Res[i] = Math.rint(DargV1[i]);
public void test_round_double() {
for (int i = 0; i < TESTSIZE; i++) {
ResL[i] = Math.round(DargV1[i]);
}
}
@Benchmark
public void test_round_float() {
for (int i = 0; i < TESTSIZE; i++) {
ResI[i] = Math.round(FargV1[i]);
}
}
}