8318158: RISC-V: implement roundD/roundF intrinsics
Co-authored-by: Vladimir Kempik <vkempik@openjdk.org> Reviewed-by: luhenry, fyang, mli
This commit is contained in:
parent
2a59243cba
commit
19147f326c
@ -4481,6 +4481,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
|
||||
bge(cnt, tmp1, loop);
|
||||
}
|
||||
|
||||
// java.lang.Math.round(float a)
|
||||
// Returns the closest int to the argument, with ties rounding to positive infinity.
|
||||
void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
|
||||
// this instructions calling sequence provides performance improvement on all tested devices;
|
||||
// don't change it without re-verification
|
||||
Label done;
|
||||
mv(t0, jint_cast(0.5f));
|
||||
fmv_w_x(ftmp, t0);
|
||||
|
||||
// dst = 0 if NaN
|
||||
feq_s(t0, src, src); // replacing fclass with feq as performance optimization
|
||||
mv(dst, zr);
|
||||
beqz(t0, done);
|
||||
|
||||
// dst = (src + 0.5f) rounded down towards negative infinity
|
||||
// Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
|
||||
// RDN is required for fadd_s, RNE gives incorrect results:
|
||||
// --------------------------------------------------------------------
|
||||
// fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
|
||||
// fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
|
||||
// --------------------------------------------------------------------
|
||||
// fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
|
||||
// fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
|
||||
// --------------------------------------------------------------------
|
||||
fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
|
||||
fcvt_w_s(dst, ftmp, RoundingMode::rdn);
|
||||
|
||||
bind(done);
|
||||
}
|
||||
|
||||
// java.lang.Math.round(double a)
|
||||
// Returns the closest long to the argument, with ties rounding to positive infinity.
|
||||
void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
|
||||
// this instructions calling sequence provides performance improvement on all tested devices;
|
||||
// don't change it without re-verification
|
||||
Label done;
|
||||
mv(t0, julong_cast(0.5));
|
||||
fmv_d_x(ftmp, t0);
|
||||
|
||||
// dst = 0 if NaN
|
||||
feq_d(t0, src, src); // replacing fclass with feq as performance optimization
|
||||
mv(dst, zr);
|
||||
beqz(t0, done);
|
||||
|
||||
// dst = (src + 0.5) rounded down towards negative infinity
|
||||
fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
|
||||
fcvt_l_d(dst, ftmp, RoundingMode::rdn);
|
||||
|
||||
bind(done);
|
||||
}
|
||||
|
||||
#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
|
||||
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
|
||||
Label done; \
|
||||
|
@ -1265,6 +1265,9 @@ public:
|
||||
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
|
||||
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
|
||||
|
||||
void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
|
||||
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
|
||||
|
||||
// vector load/store unit-stride instructions
|
||||
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
|
||||
switch (sew) {
|
||||
|
@ -8417,6 +8417,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
|
||||
match(Set dst (RoundD src));
|
||||
|
||||
ins_cost(XFER_COST + BRANCH_COST);
|
||||
effect(TEMP ftmp);
|
||||
format %{ "java_round_double $dst, $src\t#@round_double_reg" %}
|
||||
|
||||
ins_encode %{
|
||||
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
|
||||
match(Set dst (RoundF src));
|
||||
|
||||
ins_cost(XFER_COST + BRANCH_COST);
|
||||
effect(TEMP ftmp);
|
||||
format %{ "java_round_float $dst, $src\t#@round_float_reg" %}
|
||||
|
||||
ins_encode %{
|
||||
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Convert oop pointer into compressed form
|
||||
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
|
||||
match(Set dst (EncodeP src));
|
||||
|
Loading…
x
Reference in New Issue
Block a user