diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index edf2e0b26ca..50ad3efbdb4 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -4481,6 +4481,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm bge(cnt, tmp1, loop); } +// java.lang.Math.round(float a) +// Returns the closest int to the argument, with ties rounding to positive infinity. +void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { + // this instructions calling sequence provides performance improvement on all tested devices; + // don't change it without re-verification + Label done; + mv(t0, jint_cast(0.5f)); + fmv_w_x(ftmp, t0); + + // dst = 0 if NaN + feq_s(t0, src, src); // replacing fclass with feq as performance optimization + mv(dst, zr); + beqz(t0, done); + + // dst = (src + 0.5f) rounded down towards negative infinity + // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. + // RDN is required for fadd_s, RNE gives incorrect results: + // -------------------------------------------------------------------- + // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 + // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 + // -------------------------------------------------------------------- + // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 + // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 + // -------------------------------------------------------------------- + fadd_s(ftmp, src, ftmp, RoundingMode::rdn); + fcvt_w_s(dst, ftmp, RoundingMode::rdn); + + bind(done); +} + +// java.lang.Math.round(double a) +// Returns the closest long to the argument, with ties rounding to positive infinity. +void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { + // this instructions calling sequence provides performance improvement on all tested devices; + // don't change it without re-verification + Label done; + mv(t0, julong_cast(0.5)); + fmv_d_x(ftmp, t0); + + // dst = 0 if NaN + feq_d(t0, src, src); // replacing fclass with feq as performance optimization + mv(dst, zr); + beqz(t0, done); + + // dst = (src + 0.5) rounded down towards negative infinity + fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results + fcvt_l_d(dst, ftmp, RoundingMode::rdn); + + bind(done); +} + #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ Label done; \ diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 042570fd0c6..3e020674624 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1265,6 +1265,9 @@ public: void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); + void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp); + void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp); + // vector load/store unit-stride instructions void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { switch (sew) { diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 5af25b26a16..d5a95b2e3ba 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -8417,6 +8417,34 @@ instruct convN2I(iRegINoSp dst, iRegN src) ins_pipe(ialu_reg); %} +instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{ + match(Set dst (RoundD src)); + + ins_cost(XFER_COST + BRANCH_COST); + effect(TEMP ftmp); + format %{ "java_round_double $dst, $src\t#@round_double_reg" %} + + ins_encode %{ + __ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg)); + %} + + ins_pipe(pipe_slow); +%} + +instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{ + match(Set dst (RoundF src)); + + ins_cost(XFER_COST + BRANCH_COST); + effect(TEMP ftmp); + format %{ "java_round_float $dst, $src\t#@round_float_reg" %} + + ins_encode %{ + __ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg)); + %} + + ins_pipe(pipe_slow); +%} + // Convert oop pointer into compressed form instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{ match(Set dst (EncodeP src));