8318158: RISC-V: implement roundD/roundF intrinsics
Co-authored-by: Vladimir Kempik <vkempik@openjdk.org> Reviewed-by: luhenry, fyang, mli
This commit is contained in:
parent
2a59243cba
commit
19147f326c
@ -4481,6 +4481,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
|
|||||||
bge(cnt, tmp1, loop);
|
bge(cnt, tmp1, loop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// java.lang.Math.round(float a)
|
||||||
|
// Returns the closest int to the argument, with ties rounding to positive infinity.
|
||||||
|
void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
|
||||||
|
// this instructions calling sequence provides performance improvement on all tested devices;
|
||||||
|
// don't change it without re-verification
|
||||||
|
Label done;
|
||||||
|
mv(t0, jint_cast(0.5f));
|
||||||
|
fmv_w_x(ftmp, t0);
|
||||||
|
|
||||||
|
// dst = 0 if NaN
|
||||||
|
feq_s(t0, src, src); // replacing fclass with feq as performance optimization
|
||||||
|
mv(dst, zr);
|
||||||
|
beqz(t0, done);
|
||||||
|
|
||||||
|
// dst = (src + 0.5f) rounded down towards negative infinity
|
||||||
|
// Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
|
||||||
|
// RDN is required for fadd_s, RNE gives incorrect results:
|
||||||
|
// --------------------------------------------------------------------
|
||||||
|
// fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
|
||||||
|
// fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
|
||||||
|
// --------------------------------------------------------------------
|
||||||
|
// fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
|
||||||
|
// fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
|
||||||
|
// --------------------------------------------------------------------
|
||||||
|
fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
|
||||||
|
fcvt_w_s(dst, ftmp, RoundingMode::rdn);
|
||||||
|
|
||||||
|
bind(done);
|
||||||
|
}
|
||||||
|
|
||||||
|
// java.lang.Math.round(double a)
|
||||||
|
// Returns the closest long to the argument, with ties rounding to positive infinity.
|
||||||
|
void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
|
||||||
|
// this instructions calling sequence provides performance improvement on all tested devices;
|
||||||
|
// don't change it without re-verification
|
||||||
|
Label done;
|
||||||
|
mv(t0, julong_cast(0.5));
|
||||||
|
fmv_d_x(ftmp, t0);
|
||||||
|
|
||||||
|
// dst = 0 if NaN
|
||||||
|
feq_d(t0, src, src); // replacing fclass with feq as performance optimization
|
||||||
|
mv(dst, zr);
|
||||||
|
beqz(t0, done);
|
||||||
|
|
||||||
|
// dst = (src + 0.5) rounded down towards negative infinity
|
||||||
|
fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
|
||||||
|
fcvt_l_d(dst, ftmp, RoundingMode::rdn);
|
||||||
|
|
||||||
|
bind(done);
|
||||||
|
}
|
||||||
|
|
||||||
#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
|
#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
|
||||||
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
|
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
|
||||||
Label done; \
|
Label done; \
|
||||||
|
@ -1265,6 +1265,9 @@ public:
|
|||||||
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
|
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
|
||||||
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
|
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
|
||||||
|
|
||||||
|
void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
|
||||||
|
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
|
||||||
|
|
||||||
// vector load/store unit-stride instructions
|
// vector load/store unit-stride instructions
|
||||||
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
|
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
|
||||||
switch (sew) {
|
switch (sew) {
|
||||||
|
@ -8417,6 +8417,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
|
|||||||
ins_pipe(ialu_reg);
|
ins_pipe(ialu_reg);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
|
||||||
|
match(Set dst (RoundD src));
|
||||||
|
|
||||||
|
ins_cost(XFER_COST + BRANCH_COST);
|
||||||
|
effect(TEMP ftmp);
|
||||||
|
format %{ "java_round_double $dst, $src\t#@round_double_reg" %}
|
||||||
|
|
||||||
|
ins_encode %{
|
||||||
|
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
|
||||||
|
%}
|
||||||
|
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
|
||||||
|
match(Set dst (RoundF src));
|
||||||
|
|
||||||
|
ins_cost(XFER_COST + BRANCH_COST);
|
||||||
|
effect(TEMP ftmp);
|
||||||
|
format %{ "java_round_float $dst, $src\t#@round_float_reg" %}
|
||||||
|
|
||||||
|
ins_encode %{
|
||||||
|
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
|
||||||
|
%}
|
||||||
|
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
// Convert oop pointer into compressed form
|
// Convert oop pointer into compressed form
|
||||||
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
|
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
|
||||||
match(Set dst (EncodeP src));
|
match(Set dst (EncodeP src));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user