8318158: RISC-V: implement roundD/roundF intrinsics

Co-authored-by: Vladimir Kempik <vkempik@openjdk.org>
Reviewed-by: luhenry, fyang, mli
This commit is contained in:
Olga Mikhaltsova 2023-12-29 18:33:43 +00:00 committed by Vladimir Kempik
parent 2a59243cba
commit 19147f326c
3 changed files with 82 additions and 0 deletions

View File

@ -4481,6 +4481,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
bge(cnt, tmp1, loop);
}
// java.lang.Math.round(float a)
// Returns the closest int to the argument, with ties rounding to positive infinity.
void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
// this instructions calling sequence provides performance improvement on all tested devices;
// don't change it without re-verification
Label done;
mv(t0, jint_cast(0.5f));
fmv_w_x(ftmp, t0);
// dst = 0 if NaN
feq_s(t0, src, src); // replacing fclass with feq as performance optimization
mv(dst, zr);
beqz(t0, done);
// dst = (src + 0.5f) rounded down towards negative infinity
// Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
// RDN is required for fadd_s, RNE gives incorrect results:
// --------------------------------------------------------------------
// fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
// fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
// --------------------------------------------------------------------
// fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
// fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
// --------------------------------------------------------------------
fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
fcvt_w_s(dst, ftmp, RoundingMode::rdn);
bind(done);
}
// java.lang.Math.round(double a)
// Returns the closest long to the argument, with ties rounding to positive infinity.
void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
// this instructions calling sequence provides performance improvement on all tested devices;
// don't change it without re-verification
Label done;
mv(t0, julong_cast(0.5));
fmv_d_x(ftmp, t0);
// dst = 0 if NaN
feq_d(t0, src, src); // replacing fclass with feq as performance optimization
mv(dst, zr);
beqz(t0, done);
// dst = (src + 0.5) rounded down towards negative infinity
fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
fcvt_l_d(dst, ftmp, RoundingMode::rdn);
bind(done);
}
#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
Label done; \

View File

@ -1265,6 +1265,9 @@ public:
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
// vector load/store unit-stride instructions
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
switch (sew) {

View File

@ -8417,6 +8417,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
ins_pipe(ialu_reg);
%}
instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
match(Set dst (RoundD src));
ins_cost(XFER_COST + BRANCH_COST);
effect(TEMP ftmp);
format %{ "java_round_double $dst, $src\t#@round_double_reg" %}
ins_encode %{
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
match(Set dst (RoundF src));
ins_cost(XFER_COST + BRANCH_COST);
effect(TEMP ftmp);
format %{ "java_round_float $dst, $src\t#@round_float_reg" %}
ins_encode %{
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
%}
ins_pipe(pipe_slow);
%}
// Convert oop pointer into compressed form
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
match(Set dst (EncodeP src));