8318158: RISC-V: implement roundD/roundF intrinsics

Co-authored-by: Vladimir Kempik <vkempik@openjdk.org> Reviewed-by: luhenry, fyang, mli
2023-12-29 18:33:43 +00:00 · 2023-12-29 18:33:43 +00:00 · 19147f326c
commit 19147f326c
parent 2a59243cba
3 changed files with 82 additions and 0 deletions
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@ -4481,6 +4481,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
  bge(cnt, tmp1, loop);
 }
 // java.lang.Math.round(float a)
 // Returns the closest int to the argument, with ties rounding to positive infinity.
 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
  // this instructions calling sequence provides performance improvement on all tested devices;
  // don't change it without re-verification
  Label done;
  mv(t0, jint_cast(0.5f));
  fmv_w_x(ftmp, t0);
  // dst = 0 if NaN
  feq_s(t0, src, src); // replacing fclass with feq as performance optimization
  mv(dst, zr);
  beqz(t0, done);
  // dst = (src + 0.5f) rounded down towards negative infinity
  //   Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
  //   RDN is required for fadd_s, RNE gives incorrect results:
  //     --------------------------------------------------------------------
  //     fadd.s rne (src + 0.5f): src = 8388609.000000  ftmp = 8388610.000000
  //     fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
  //     --------------------------------------------------------------------
  //     fadd.s rdn (src + 0.5f): src = 8388609.000000  ftmp = 8388609.000000
  //     fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
  //     --------------------------------------------------------------------
  fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
  fcvt_w_s(dst, ftmp, RoundingMode::rdn);
  bind(done);
 }
 // java.lang.Math.round(double a)
 // Returns the closest long to the argument, with ties rounding to positive infinity.
 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
  // this instructions calling sequence provides performance improvement on all tested devices;
  // don't change it without re-verification
  Label done;
  mv(t0, julong_cast(0.5));
  fmv_d_x(ftmp, t0);
  // dst = 0 if NaN
  feq_d(t0, src, src); // replacing fclass with feq as performance optimization
  mv(dst, zr);
  beqz(t0, done);
  // dst = (src + 0.5) rounded down towards negative infinity
  fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
  fcvt_l_d(dst, ftmp, RoundingMode::rdn);
  bind(done);
 }
 #define FCVT_SAFE(FLOATCVT, FLOATSIG)                                                     \
 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {     \
  Label done;                                                                             \
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@ -1265,6 +1265,9 @@ public:
  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
  void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
  void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
  // vector load/store unit-stride instructions
  void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
    switch (sew) {
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@ -8417,6 +8417,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
  ins_pipe(ialu_reg);
 %}
 instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
  match(Set dst (RoundD src));
  ins_cost(XFER_COST + BRANCH_COST);
  effect(TEMP ftmp);
  format %{ "java_round_double $dst, $src\t#@round_double_reg" %}
  ins_encode %{
    __ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
  %}
  ins_pipe(pipe_slow);
 %}
 instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
  match(Set dst (RoundF src));
  ins_cost(XFER_COST + BRANCH_COST);
  effect(TEMP ftmp);
  format %{ "java_round_float $dst, $src\t#@round_float_reg" %}
  ins_encode %{
    __ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
  %}
  ins_pipe(pipe_slow);
 %}
 // Convert oop pointer into compressed form
 instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
  match(Set dst (EncodeP src));