8282541: AArch64: Auto-vectorize Math.round API
Reviewed-by: njian, ngasson, adinn
This commit is contained in:
parent
8de3c65545
commit
a7b5157375
src/hotspot
cpu
aarch64
aarch64.adaarch64_neon.adaarch64_neon_ad.m4aarch64_sve.adaarch64_sve_ad.m4assembler_aarch64.hppc2_MacroAssembler_aarch64.cppc2_MacroAssembler_aarch64.hppmacroAssembler_aarch64.cppmacroAssembler_aarch64.hppmatcher_aarch64.hpp
arm
ppc
riscv
s390
x86
share/opto
test/hotspot
@ -15141,6 +15141,30 @@ instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
|
||||
ins_pipe(fp_l2d);
|
||||
%}
|
||||
|
||||
instruct round_double_reg(iRegLNoSp dst, vRegD src, vRegD ftmp, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (RoundD src));
|
||||
effect(TEMP_DEF dst, TEMP ftmp, KILL cr);
|
||||
format %{ "java_round_double $dst,$src"%}
|
||||
ins_encode %{
|
||||
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($ftmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct round_float_reg(iRegINoSp dst, vRegF src, vRegF ftmp, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (RoundF src));
|
||||
effect(TEMP_DEF dst, TEMP ftmp, KILL cr);
|
||||
format %{ "java_round_float $dst,$src"%}
|
||||
ins_encode %{
|
||||
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($ftmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// stack <-> reg and reg <-> reg shuffles with no conversion
|
||||
|
||||
instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
|
||||
|
@ -570,6 +570,52 @@ instruct vcvt2Dto2F(vecD dst, vecX src)
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
|
||||
instruct vroundvecD2Fto2I(vecD dst, vecD src, vecD tmp1, vecD tmp2, vecD tmp3)
|
||||
%{
|
||||
predicate(UseSVE == 0 &&
|
||||
n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (RoundVF src));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
||||
format %{ "vround $dst, T2S, $src\t# round vecD 2F to 2I vector" %}
|
||||
ins_encode %{
|
||||
__ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_FloatRegister($tmp3$$reg), __ T2S);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct vroundvecX4Fto4I(vecX dst, vecX src, vecX tmp1, vecX tmp2, vecX tmp3)
|
||||
%{
|
||||
predicate(UseSVE == 0 &&
|
||||
n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (RoundVF src));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
||||
format %{ "vround $dst, T4S, $src\t# round vecX 4F to 4I vector" %}
|
||||
ins_encode %{
|
||||
__ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_FloatRegister($tmp3$$reg), __ T4S);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct vroundvecX2Dto2L(vecX dst, vecX src, vecX tmp1, vecX tmp2, vecX tmp3)
|
||||
%{
|
||||
predicate(UseSVE == 0 &&
|
||||
n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
|
||||
match(Set dst (RoundVD src));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
||||
format %{ "vround $dst, T2D, $src\t# round vecX 2D to 2L vector" %}
|
||||
ins_encode %{
|
||||
__ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_FloatRegister($tmp3$$reg), __ T2D);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// ------------------------------ Reduction -------------------------------
|
||||
|
||||
instruct reduce_add8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
|
||||
|
@ -349,6 +349,25 @@ VECTOR_CAST_F2F(F, D, X, D, fcvtl, 2S, 2D)
|
||||
VECTOR_CAST_F2F(D, F, D, X, fcvtn, 2D, 2S)
|
||||
dnl
|
||||
|
||||
define(`VECTOR_JAVA_FROUND', `
|
||||
instruct vround$7$2to$5$3($7 dst, $7 src, $7 tmp1, $7 tmp2, $7 tmp3)
|
||||
%{
|
||||
predicate(UseSVE == 0 &&
|
||||
n->as_Vector()->length() == $5 && n->bottom_type()->is_vect()->element_basic_type() == T_$6);
|
||||
match(Set dst (RoundV$1 src));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
||||
format %{ "vround $dst, $4, $src\t# round $7 $2 to $5$3 vector" %}
|
||||
ins_encode %{
|
||||
__ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_FloatRegister($tmp3$$reg), __ $4);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}')dnl $1 $2 $3 $4 $5 $6 $7
|
||||
VECTOR_JAVA_FROUND(F, 2F, I, T2S, 2, INT, vecD)
|
||||
VECTOR_JAVA_FROUND(F, 4F, I, T4S, 4, INT, vecX)
|
||||
VECTOR_JAVA_FROUND(D, 2D, L, T2D, 2, LONG, vecX)
|
||||
|
||||
// ------------------------------ Reduction -------------------------------
|
||||
dnl
|
||||
define(`REDUCE_ADD_BORS', `
|
||||
|
@ -162,7 +162,6 @@ source %{
|
||||
}
|
||||
return op_sve_supported(opcode, vlen, bt);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
definitions %{
|
||||
@ -3277,6 +3276,54 @@ instruct vroundD(vReg dst, vReg src, immI rmode) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vroundFtoI(vReg dst, vReg src, vReg tmp1, vReg tmp2, vReg tmp3, pRegGov ptmp)
|
||||
%{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (RoundVF src));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp);
|
||||
format %{ "sve_vround $dst, S, $src\t# round F to I vector" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
int vlen = Matcher::vector_length_in_bytes(this);
|
||||
if (vlen > 16) {
|
||||
__ vector_round_sve(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_PRegister($ptmp$$reg), __ S);
|
||||
} else {
|
||||
__ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_FloatRegister($tmp3$$reg),
|
||||
__ esize2arrangement(type2aelembytes(bt),
|
||||
/*isQ*/ vlen == 16));
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct vroundDtoL(vReg dst, vReg src, vReg tmp1, vReg tmp2, vReg tmp3, pRegGov ptmp)
|
||||
%{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (RoundVD src));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp);
|
||||
format %{ "sve_vround $dst, D, $src\t# round D to L vector" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
int vlen = Matcher::vector_length_in_bytes(this);
|
||||
if (vlen > 16) {
|
||||
__ vector_round_sve(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_PRegister($ptmp$$reg), __ D);
|
||||
} else {
|
||||
__ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_FloatRegister($tmp3$$reg),
|
||||
__ esize2arrangement(type2aelembytes(bt),
|
||||
/*isQ*/ vlen == 16));
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// vector replicate
|
||||
|
||||
instruct replicateB(vReg dst, iRegIorL2I src) %{
|
||||
|
@ -157,7 +157,6 @@ source %{
|
||||
}
|
||||
return op_sve_supported(opcode, vlen, bt);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
definitions %{
|
||||
@ -1793,6 +1792,32 @@ instruct vroundD(vReg dst, vReg src, immI rmode) %{
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
define(`VECTOR_JAVA_FROUND', `
|
||||
instruct vround$1to$3($7 dst, $7 src, $7 tmp1, $7 tmp2, $7 tmp3, pRegGov ptmp)
|
||||
%{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (RoundV$1 src));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp);
|
||||
format %{ "sve_vround $dst, $4, $src\t# round $1 to $3 vector" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
int vlen = Matcher::vector_length_in_bytes(this);
|
||||
if (vlen > 16) {
|
||||
__ vector_round_sve(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_PRegister($ptmp$$reg), __ $4);
|
||||
} else {
|
||||
__ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
|
||||
as_FloatRegister($tmp3$$reg),
|
||||
__ esize2arrangement(type2aelembytes(bt),
|
||||
/*isQ*/ vlen == 16));
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}')dnl $1 $2 $3 $4 $5 $6 $7
|
||||
VECTOR_JAVA_FROUND(F, 8F, I, S, 8, INT, vReg)
|
||||
VECTOR_JAVA_FROUND(D, 4D, L, D, 4, LONG, vReg)
|
||||
dnl
|
||||
dnl REPLICATE($1, $2, $3, $4, $5 )
|
||||
dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
|
||||
|
@ -306,14 +306,6 @@ public:
|
||||
assert_cond((bits & mask) == mask);
|
||||
return (insn & mask) >> lsb;
|
||||
}
|
||||
|
||||
void fixed(unsigned value, unsigned mask) {
|
||||
assert_cond ((mask & bits) == 0);
|
||||
#ifdef ASSERT
|
||||
bits |= mask;
|
||||
#endif
|
||||
insn |= value;
|
||||
}
|
||||
};
|
||||
|
||||
#define starti Instruction_aarch64 current_insn(this);
|
||||
@ -698,7 +690,6 @@ public:
|
||||
#define zrf current_insn.zrf
|
||||
#define prf current_insn.prf
|
||||
#define pgrf current_insn.pgrf
|
||||
#define fixed current_insn.fixed
|
||||
|
||||
typedef void (Assembler::* uncond_branch_insn)(address dest);
|
||||
typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest);
|
||||
@ -1085,7 +1076,7 @@ public:
|
||||
|
||||
// A more convenient access to dmb for our purposes
|
||||
enum Membar_mask_bits {
|
||||
// We can use ISH for a barrier because the ARM ARM says "This
|
||||
// We can use ISH for a barrier because the Arm ARM says "This
|
||||
// architecture assumes that all Processing Elements that use the
|
||||
// same operating system or hypervisor are in the same Inner
|
||||
// Shareable shareability domain."
|
||||
@ -2082,46 +2073,55 @@ public:
|
||||
#undef INSN
|
||||
|
||||
// Floating-point<->integer conversions
|
||||
void float_int_convert(unsigned op31, unsigned type,
|
||||
void float_int_convert(unsigned sflag, unsigned ftype,
|
||||
unsigned rmode, unsigned opcode,
|
||||
Register Rd, Register Rn) {
|
||||
starti;
|
||||
f(op31, 31, 29);
|
||||
f(sflag, 31);
|
||||
f(0b00, 30, 29);
|
||||
f(0b11110, 28, 24);
|
||||
f(type, 23, 22), f(1, 21), f(rmode, 20, 19);
|
||||
f(ftype, 23, 22), f(1, 21), f(rmode, 20, 19);
|
||||
f(opcode, 18, 16), f(0b000000, 15, 10);
|
||||
zrf(Rn, 5), zrf(Rd, 0);
|
||||
}
|
||||
|
||||
#define INSN(NAME, op31, type, rmode, opcode) \
|
||||
void NAME(Register Rd, FloatRegister Vn) { \
|
||||
float_int_convert(op31, type, rmode, opcode, Rd, as_Register(Vn)); \
|
||||
#define INSN(NAME, sflag, ftype, rmode, opcode) \
|
||||
void NAME(Register Rd, FloatRegister Vn) { \
|
||||
float_int_convert(sflag, ftype, rmode, opcode, Rd, as_Register(Vn)); \
|
||||
}
|
||||
|
||||
INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000);
|
||||
INSN(fcvtzs, 0b100, 0b00, 0b11, 0b000);
|
||||
INSN(fcvtzdw, 0b000, 0b01, 0b11, 0b000);
|
||||
INSN(fcvtzd, 0b100, 0b01, 0b11, 0b000);
|
||||
INSN(fcvtzsw, 0b0, 0b00, 0b11, 0b000);
|
||||
INSN(fcvtzs, 0b1, 0b00, 0b11, 0b000);
|
||||
INSN(fcvtzdw, 0b0, 0b01, 0b11, 0b000);
|
||||
INSN(fcvtzd, 0b1, 0b01, 0b11, 0b000);
|
||||
|
||||
INSN(fmovs, 0b000, 0b00, 0b00, 0b110);
|
||||
INSN(fmovd, 0b100, 0b01, 0b00, 0b110);
|
||||
// RoundToNearestTiesAway
|
||||
INSN(fcvtassw, 0b0, 0b00, 0b00, 0b100); // float -> signed word
|
||||
INSN(fcvtasd, 0b1, 0b01, 0b00, 0b100); // double -> signed xword
|
||||
|
||||
INSN(fmovhid, 0b100, 0b10, 0b01, 0b110);
|
||||
// RoundTowardsNegative
|
||||
INSN(fcvtmssw, 0b0, 0b00, 0b10, 0b000); // float -> signed word
|
||||
INSN(fcvtmsd, 0b1, 0b01, 0b10, 0b000); // double -> signed xword
|
||||
|
||||
INSN(fmovs, 0b0, 0b00, 0b00, 0b110);
|
||||
INSN(fmovd, 0b1, 0b01, 0b00, 0b110);
|
||||
|
||||
INSN(fmovhid, 0b1, 0b10, 0b01, 0b110);
|
||||
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, op31, type, rmode, opcode) \
|
||||
#define INSN(NAME, sflag, type, rmode, opcode) \
|
||||
void NAME(FloatRegister Vd, Register Rn) { \
|
||||
float_int_convert(op31, type, rmode, opcode, as_Register(Vd), Rn); \
|
||||
float_int_convert(sflag, type, rmode, opcode, as_Register(Vd), Rn); \
|
||||
}
|
||||
|
||||
INSN(fmovs, 0b000, 0b00, 0b00, 0b111);
|
||||
INSN(fmovd, 0b100, 0b01, 0b00, 0b111);
|
||||
INSN(fmovs, 0b0, 0b00, 0b00, 0b111);
|
||||
INSN(fmovd, 0b1, 0b01, 0b00, 0b111);
|
||||
|
||||
INSN(scvtfws, 0b000, 0b00, 0b00, 0b010);
|
||||
INSN(scvtfs, 0b100, 0b00, 0b00, 0b010);
|
||||
INSN(scvtfwd, 0b000, 0b01, 0b00, 0b010);
|
||||
INSN(scvtfd, 0b100, 0b01, 0b00, 0b010);
|
||||
INSN(scvtfws, 0b0, 0b00, 0b00, 0b010);
|
||||
INSN(scvtfs, 0b1, 0b00, 0b00, 0b010);
|
||||
INSN(scvtfwd, 0b0, 0b01, 0b00, 0b010);
|
||||
INSN(scvtfd, 0b1, 0b01, 0b00, 0b010);
|
||||
|
||||
// INSN(fmovhid, 0b100, 0b10, 0b01, 0b111);
|
||||
|
||||
@ -2510,6 +2510,7 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
// Advanced SIMD modified immediate
|
||||
#define INSN(NAME, op0, cmode0) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) { \
|
||||
unsigned cmode = cmode0; \
|
||||
@ -2537,7 +2538,22 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, op1, op2, op3) \
|
||||
#define INSN(NAME, op, cmode) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, double imm) { \
|
||||
unsigned imm8 = pack(imm); \
|
||||
starti; \
|
||||
f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19); \
|
||||
f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \
|
||||
rf(Vd, 0); \
|
||||
}
|
||||
|
||||
INSN(fmovs, 0, 0b1111);
|
||||
INSN(fmovd, 1, 0b1111);
|
||||
|
||||
#undef INSN
|
||||
|
||||
// Advanced SIMD three same
|
||||
#define INSN(NAME, op1, op2, op3) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
|
||||
starti; \
|
||||
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \
|
||||
@ -2984,7 +3000,9 @@ public:
|
||||
INSN(frintn, 0, 0b00, 0b01, 0b11000);
|
||||
INSN(frintm, 0, 0b00, 0b01, 0b11001);
|
||||
INSN(frintp, 0, 0b10, 0b01, 0b11000);
|
||||
INSN(fcvtas, 0, 0b00, 0b01, 0b11100);
|
||||
INSN(fcvtzs, 0, 0b10, 0b01, 0b11011);
|
||||
INSN(fcvtms, 0, 0b00, 0b01, 0b11011);
|
||||
#undef ASSERTION
|
||||
|
||||
#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
|
||||
@ -3154,6 +3172,7 @@ public:
|
||||
INSN(sve_fneg, 0b00000100, 0b011101101);
|
||||
INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
|
||||
INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
|
||||
INSN(sve_frinta, 0b01100101, 0b000100101); // floating-point round to integral value, nearest with ties to away
|
||||
INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
|
||||
INSN(sve_fsqrt, 0b01100101, 0b001101101);
|
||||
INSN(sve_fsub, 0b01100101, 0b000001100);
|
||||
@ -3449,8 +3468,9 @@ public:
|
||||
pgrf(Pg, 10), srf(Rn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
// SVE copy signed integer immediate to vector elements (predicated)
|
||||
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) {
|
||||
private:
|
||||
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8,
|
||||
bool isMerge, bool isFloat) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
int sh = 0;
|
||||
@ -3464,7 +3484,17 @@ public:
|
||||
}
|
||||
int m = isMerge ? 1 : 0;
|
||||
f(0b00000101, 31, 24), f(T, 23, 22), f(0b01, 21, 20);
|
||||
prf(Pg, 16), f(0b0, 15), f(m, 14), f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
|
||||
prf(Pg, 16), f(isFloat ? 1 : 0, 15), f(m, 14), f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
public:
|
||||
// SVE copy signed integer immediate to vector elements (predicated)
|
||||
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) {
|
||||
sve_cpy(Zd, T, Pg, imm8, isMerge, /*isFloat*/false);
|
||||
}
|
||||
// SVE copy floating-point immediate to vector elements (predicated)
|
||||
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, double d) {
|
||||
sve_cpy(Zd, T, Pg, checked_cast<int8_t>(pack(d)), /*isMerge*/true, /*isFloat*/true);
|
||||
}
|
||||
|
||||
// SVE conditionally select elements from two vectors
|
||||
@ -3528,6 +3558,29 @@ void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
||||
f(cond_op & 0x1, 4), prf(Pd, 0);
|
||||
}
|
||||
|
||||
// SVE Floating-point compare vector with zero
|
||||
void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
||||
PRegister Pg, FloatRegister Zn, double d) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
guarantee(d == 0.0, "invalid immediate");
|
||||
int cond_op;
|
||||
switch(cond) {
|
||||
case EQ: cond_op = 0b100; break;
|
||||
case GT: cond_op = 0b001; break;
|
||||
case GE: cond_op = 0b000; break;
|
||||
case LT: cond_op = 0b010; break;
|
||||
case LE: cond_op = 0b011; break;
|
||||
case NE: cond_op = 0b110; break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
f(0b01100101, 31, 24), f(T, 23, 22), f(0b0100, 21, 18),
|
||||
f((cond_op >> 1) & 0x3, 17, 16), f(0b001, 15, 13),
|
||||
pgrf(Pg, 10), rf(Zn, 5);
|
||||
f(cond_op & 0x1, 4), prf(Pd, 0);
|
||||
}
|
||||
|
||||
// SVE unpack vector elements
|
||||
#define INSN(NAME, op) \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { \
|
||||
|
@ -1267,3 +1267,74 @@ void C2_MacroAssembler::sve_ptrue_lanecnt(PRegister dst, SIMD_RegVariant size, i
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
// java.lang.Math::round intrinsics
|
||||
|
||||
void C2_MacroAssembler::vector_round_neon(FloatRegister dst, FloatRegister src, FloatRegister tmp1,
|
||||
FloatRegister tmp2, FloatRegister tmp3, SIMD_Arrangement T) {
|
||||
assert_different_registers(tmp1, tmp2, tmp3, src, dst);
|
||||
switch (T) {
|
||||
case T2S:
|
||||
case T4S:
|
||||
fmovs(tmp1, T, 0.5f);
|
||||
mov(rscratch1, jint_cast(0x1.0p23f));
|
||||
break;
|
||||
case T2D:
|
||||
fmovd(tmp1, T, 0.5);
|
||||
mov(rscratch1, julong_cast(0x1.0p52));
|
||||
break;
|
||||
default:
|
||||
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
|
||||
}
|
||||
fadd(tmp1, T, tmp1, src);
|
||||
fcvtms(tmp1, T, tmp1);
|
||||
// tmp1 = floor(src + 0.5, ties to even)
|
||||
|
||||
fcvtas(dst, T, src);
|
||||
// dst = round(src), ties to away
|
||||
|
||||
fneg(tmp3, T, src);
|
||||
dup(tmp2, T, rscratch1);
|
||||
cmhs(tmp3, T, tmp3, tmp2);
|
||||
// tmp3 is now a set of flags
|
||||
|
||||
bif(dst, T16B, tmp1, tmp3);
|
||||
// result in dst
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_round_sve(FloatRegister dst, FloatRegister src, FloatRegister tmp1,
|
||||
FloatRegister tmp2, PRegister ptmp, SIMD_RegVariant T) {
|
||||
assert_different_registers(tmp1, tmp2, src, dst);
|
||||
|
||||
switch (T) {
|
||||
case S:
|
||||
mov(rscratch1, jint_cast(0x1.0p23f));
|
||||
break;
|
||||
case D:
|
||||
mov(rscratch1, julong_cast(0x1.0p52));
|
||||
break;
|
||||
default:
|
||||
assert(T == S || T == D, "invalid arrangement");
|
||||
}
|
||||
|
||||
sve_frinta(dst, T, ptrue, src);
|
||||
// dst = round(src), ties to away
|
||||
|
||||
Label none;
|
||||
|
||||
sve_fneg(tmp1, T, ptrue, src);
|
||||
sve_dup(tmp2, T, rscratch1);
|
||||
sve_cmp(HS, ptmp, T, ptrue, tmp2, tmp1);
|
||||
br(EQ, none);
|
||||
{
|
||||
sve_cpy(tmp1, T, ptmp, 0.5);
|
||||
sve_fadd(tmp1, T, ptmp, src);
|
||||
sve_frintm(dst, T, ptmp, tmp1);
|
||||
// dst = floor(src + 0.5, ties to even)
|
||||
}
|
||||
bind(none);
|
||||
|
||||
sve_fcvtzs(dst, T, ptrue, dst, T);
|
||||
// result in dst
|
||||
}
|
||||
|
||||
|
@ -103,4 +103,12 @@
|
||||
sve_lastb(dst, size, pg, src);
|
||||
}
|
||||
|
||||
// java.lang.Math::round intrinsics
|
||||
void vector_round_neon(FloatRegister dst, FloatRegister src, FloatRegister tmp1,
|
||||
FloatRegister tmp2, FloatRegister tmp3,
|
||||
SIMD_Arrangement T);
|
||||
void vector_round_sve(FloatRegister dst, FloatRegister src, FloatRegister tmp1,
|
||||
FloatRegister tmp2, PRegister ptmp,
|
||||
SIMD_RegVariant T);
|
||||
|
||||
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
|
||||
|
@ -5178,6 +5178,56 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
csel(res, res, zr, EQ);
|
||||
}
|
||||
|
||||
// java.math.round(double a)
|
||||
// Returns the closest long to the argument, with ties rounding to
|
||||
// positive infinity. This requires some fiddling for corner
|
||||
// cases. We take care to avoid double rounding in e.g. (jlong)(a + 0.5).
|
||||
void MacroAssembler::java_round_double(Register dst, FloatRegister src,
|
||||
FloatRegister ftmp) {
|
||||
Label DONE;
|
||||
BLOCK_COMMENT("java_round_double: { ");
|
||||
fmovd(rscratch1, src);
|
||||
// Use RoundToNearestTiesAway unless src small and -ve.
|
||||
fcvtasd(dst, src);
|
||||
// Test if src >= 0 || abs(src) >= 0x1.0p52
|
||||
eor(rscratch1, rscratch1, UCONST64(1) << 63); // flip sign bit
|
||||
mov(rscratch2, julong_cast(0x1.0p52));
|
||||
cmp(rscratch1, rscratch2);
|
||||
br(HS, DONE); {
|
||||
// src < 0 && abs(src) < 0x1.0p52
|
||||
// src may have a fractional part, so add 0.5
|
||||
fmovd(ftmp, 0.5);
|
||||
faddd(ftmp, src, ftmp);
|
||||
// Convert double to jlong, use RoundTowardsNegative
|
||||
fcvtmsd(dst, ftmp);
|
||||
}
|
||||
bind(DONE);
|
||||
BLOCK_COMMENT("} java_round_double");
|
||||
}
|
||||
|
||||
void MacroAssembler::java_round_float(Register dst, FloatRegister src,
|
||||
FloatRegister ftmp) {
|
||||
Label DONE;
|
||||
BLOCK_COMMENT("java_round_float: { ");
|
||||
fmovs(rscratch1, src);
|
||||
// Use RoundToNearestTiesAway unless src small and -ve.
|
||||
fcvtassw(dst, src);
|
||||
// Test if src >= 0 || abs(src) >= 0x1.0p23
|
||||
eor(rscratch1, rscratch1, 0x80000000); // flip sign bit
|
||||
mov(rscratch2, jint_cast(0x1.0p23f));
|
||||
cmp(rscratch1, rscratch2);
|
||||
br(HS, DONE); {
|
||||
// src < 0 && |src| < 0x1.0p23
|
||||
// src may have a fractional part, so add 0.5
|
||||
fmovs(ftmp, 0.5f);
|
||||
fadds(ftmp, src, ftmp);
|
||||
// Convert float to jint, use RoundTowardsNegative
|
||||
fcvtmssw(dst, ftmp);
|
||||
}
|
||||
bind(DONE);
|
||||
BLOCK_COMMENT("} java_round_float");
|
||||
}
|
||||
|
||||
// get_thread() can be called anywhere inside generated code so we
|
||||
// need to save whatever non-callee save context might get clobbered
|
||||
// by the call to JavaThread::aarch64_get_thread_helper() or, indeed,
|
||||
|
@ -877,6 +877,10 @@ public:
|
||||
// Round up to a power of two
|
||||
void round_to(Register reg, int modulus);
|
||||
|
||||
// java.lang.Math::round intrinsics
|
||||
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
|
||||
void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
|
||||
|
||||
// allocation
|
||||
void eden_allocate(
|
||||
Register obj, // result: pointer to object after successful allocation
|
||||
|
@ -165,8 +165,23 @@
|
||||
|
||||
// Returns pre-selection estimated size of a vector operation.
|
||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||
return 0;
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundVF: // fall through
|
||||
case Op_RoundVD: {
|
||||
return 15;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Returns pre-selection estimated size of a scalar operation.
|
||||
static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundF: // fall through
|
||||
case Op_RoundD: {
|
||||
return 15;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // CPU_AARCH64_MATCHER_AARCH64_HPP
|
||||
|
@ -155,9 +155,25 @@
|
||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||
static const bool supports_encode_ascii_array = false;
|
||||
|
||||
// Returns pre-selection estimated cost of a vector operation.
|
||||
// Returns pre-selection estimated size of a vector operation.
|
||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||
return 0;
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundVF: // fall through
|
||||
case Op_RoundVD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Returns pre-selection estimated size of a scalar operation.
|
||||
static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundF: // fall through
|
||||
case Op_RoundD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CPU_ARM_MATCHER_ARM_HPP
|
||||
|
@ -164,10 +164,25 @@
|
||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||
static const bool supports_encode_ascii_array = true;
|
||||
|
||||
// Returns pre-selection estimated cost of a vector operation.
|
||||
// Returns pre-selection estimated size of a vector operation.
|
||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||
return 0;
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundVF: // fall through
|
||||
case Op_RoundVD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Returns pre-selection estimated size of a scalar operation.
|
||||
static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundF: // fall through
|
||||
case Op_RoundD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // CPU_PPC_MATCHER_PPC_HPP
|
||||
|
@ -163,7 +163,23 @@
|
||||
|
||||
// Returns pre-selection estimated size of a vector operation.
|
||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||
return 0;
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundVF: // fall through
|
||||
case Op_RoundVD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Returns pre-selection estimated size of a scalar operation.
|
||||
static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundF: // fall through
|
||||
case Op_RoundD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CPU_RISCV_MATCHER_RISCV_HPP
|
||||
|
@ -153,9 +153,25 @@
|
||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||
static const bool supports_encode_ascii_array = true;
|
||||
|
||||
// Returns pre-selection estimated cost of a vector operation.
|
||||
// Returns pre-selection estimated size of a vector operation.
|
||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||
return 0;
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundVF: // fall through
|
||||
case Op_RoundVD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Returns pre-selection estimated size of a scalar operation.
|
||||
static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundF: // fall through
|
||||
case Op_RoundD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CPU_S390_MATCHER_S390_HPP
|
||||
|
@ -183,12 +183,26 @@
|
||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||
static const bool supports_encode_ascii_array = true;
|
||||
|
||||
// Returns pre-selection estimated cost of a vector operation.
|
||||
// Returns pre-selection estimated size of a vector operation.
|
||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_PopCountVI: return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50;
|
||||
case Op_PopCountVL: return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
|
||||
case Op_RoundVF: // fall through
|
||||
case Op_RoundVD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Returns pre-selection estimated size of a scalar operation.
|
||||
static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_RoundF: // fall through
|
||||
case Op_RoundD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -970,10 +970,12 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
|
||||
case Op_ModL: body_size += 30; break;
|
||||
case Op_DivL: body_size += 30; break;
|
||||
case Op_MulL: body_size += 10; break;
|
||||
case Op_RoundF: body_size += 30; break;
|
||||
case Op_RoundD: body_size += 30; break;
|
||||
case Op_RoundVF: body_size += 30; break;
|
||||
case Op_RoundVD: body_size += 30; break;
|
||||
case Op_RoundF:
|
||||
case Op_RoundD: {
|
||||
body_size += Matcher::scalar_op_pre_select_sz_estimate(n->Opcode(), n->bottom_type()->basic_type());
|
||||
} break;
|
||||
case Op_RoundVF:
|
||||
case Op_RoundVD:
|
||||
case Op_PopCountVI:
|
||||
case Op_PopCountVL: {
|
||||
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||
|
@ -462,6 +462,29 @@ class SVEBinaryImmOp(Instruction):
|
||||
return (formatStr
|
||||
% tuple([Instruction.astr(self)] + Regs + [self.immed]))
|
||||
|
||||
class SVEComparisonWithZero(Instruction):
|
||||
def __init__(self, arg):
|
||||
Instruction.__init__(self, "fcm")
|
||||
self.condition = arg
|
||||
self.dest = OperandFactory.create('p').generate()
|
||||
self.reg = SVEVectorRegister().generate()
|
||||
self._width = RegVariant(2, 3)
|
||||
self.preg = OperandFactory.create('P').generate()
|
||||
|
||||
def generate(self):
|
||||
return Instruction.generate(self)
|
||||
|
||||
def cstr(self):
|
||||
return ("%s(%s, %s, %s, %s, %s, 0.0);"
|
||||
% ("__ sve_" + self._name, "Assembler::" + self.condition,
|
||||
str(self.dest), self._width.cstr(), str(self.preg), str(self.reg)))
|
||||
|
||||
def astr(self):
|
||||
val = ("%s%s\t%s%s, %s/z, %s%s, #0.0"
|
||||
% (self._name, self.condition.lower(), str(self.dest), self._width.astr(),
|
||||
str(self.preg), str(self.reg), self._width.astr()))
|
||||
return val
|
||||
|
||||
class MultiOp():
|
||||
|
||||
def multipleForms(self):
|
||||
@ -1444,6 +1467,8 @@ generate(FloatConvertOp, [["fcvtzsw", "fcvtzs", "ws"], ["fcvtzs", "fcvtzs", "xs"
|
||||
["fcvtzdw", "fcvtzs", "wd"], ["fcvtzd", "fcvtzs", "xd"],
|
||||
["scvtfws", "scvtf", "sw"], ["scvtfs", "scvtf", "sx"],
|
||||
["scvtfwd", "scvtf", "dw"], ["scvtfd", "scvtf", "dx"],
|
||||
["fcvtassw", "fcvtas", "ws"], ["fcvtasd", "fcvtas", "xd"],
|
||||
["fcvtmssw", "fcvtms", "ws"], ["fcvtmsd", "fcvtms", "xd"],
|
||||
["fmovs", "fmov", "ws"], ["fmovd", "fmov", "xd"],
|
||||
["fmovs", "fmov", "sw"], ["fmovd", "fmov", "dx"]])
|
||||
|
||||
@ -1590,6 +1615,8 @@ generate(ThreeRegNEONOp,
|
||||
["fcmge", "fcmge", "2D"],
|
||||
])
|
||||
|
||||
generate(SVEComparisonWithZero, ["EQ", "GT", "GE", "LT", "LE", "NE"])
|
||||
|
||||
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
|
||||
["ccmnw", "__ ccmnw(zr, zr, 5u, Assembler::EQ);", "ccmn\twzr, wzr, #5, EQ"],
|
||||
["ccmp", "__ ccmp(zr, 1, 4u, Assembler::NE);", "ccmp\txzr, 1, #4, NE"],
|
||||
@ -1613,8 +1640,12 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["umov", "__ umov(r0, v1, __ H, 2);", "umov\tw0, v1.h[2]"],
|
||||
["umov", "__ umov(r0, v1, __ B, 3);", "umov\tw0, v1.b[3]"],
|
||||
["fmov", "__ fmovhid(r0, v1);", "fmov\tx0, v1.d[1]"],
|
||||
["fmov", "__ fmovs(v9, __ T2S, 0.5f);", "fmov\tv9.2s, 0.5"],
|
||||
["fmov", "__ fmovd(v14, __ T2D, 0.5f);", "fmov\tv14.2d, 0.5"],
|
||||
["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"],
|
||||
["fcvtzs", "__ fcvtzs(v0, __ T4S, v1);", "fcvtzs\tv0.4s, v1.4s"],
|
||||
["fcvtzs", "__ fcvtzs(v0, __ T2S, v1);", "fcvtzs\tv0.2s, v1.2s"],
|
||||
["fcvtas", "__ fcvtas(v2, __ T4S, v3);", "fcvtas\tv2.4s, v3.4s"],
|
||||
["fcvtms", "__ fcvtms(v4, __ T2D, v5);", "fcvtms\tv4.2d, v5.2d"],
|
||||
# SVE instructions
|
||||
["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"],
|
||||
["cpy", "__ sve_cpy(z0, __ B, p0, 127, true);", "mov\tz0.b, p0/m, 127"],
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8282541
|
||||
* @summary Auto-vectorize Math.round API
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires os.simpleArch == "aarch64"
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorization.TestRoundVectAArch64
|
||||
*/
|
||||
|
||||
package compiler.vectorization;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
public class TestRoundVectAArch64 {
|
||||
private static final int ARRLEN = 1024;
|
||||
private static final int ITERS = 11000;
|
||||
|
||||
private static double [] dinp;
|
||||
private static long [] lout;
|
||||
private static float [] finp;
|
||||
private static int [] iout;
|
||||
|
||||
public static void main(String args[]) {
|
||||
if (System.getProperty("os.arch").equals("aarch64")) {
|
||||
TestFramework.runWithFlags("-XX:-TieredCompilation",
|
||||
"-XX:CompileThresholdScaling=0.3");
|
||||
}
|
||||
System.out.println("PASSED");
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"RoundVD" , " > 0 "})
|
||||
public void test_round_double(long[] lout, double[] dinp) {
|
||||
for (int i = 0; i < lout.length; i+=1) {
|
||||
lout[i] = Math.round(dinp[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"test_round_double"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_round_double() {
|
||||
dinp = new double[ARRLEN];
|
||||
lout = new long[ARRLEN];
|
||||
for(int i = 0 ; i < ARRLEN; i++) {
|
||||
dinp[i] = (double)i*1.4;
|
||||
}
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_round_double(lout , dinp);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"RoundVF" , " > 0 "})
|
||||
public void test_round_float(int[] iout, float[] finp) {
|
||||
for (int i = 0; i < finp.length; i+=1) {
|
||||
iout[i] = Math.round(finp[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"test_round_float"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_round() {
|
||||
finp = new float[ARRLEN];
|
||||
iout = new int[ARRLEN];
|
||||
for(int i = 0 ; i < ARRLEN; i++) {
|
||||
finp[i] = (float)i*1.4f;
|
||||
}
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_round_float(iout , finp);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user