8314056: Remove runtime platform check from frem/drem

Reviewed-by: sviswanathan, jbhateja
This commit is contained in:
Scott Gibbons 2023-08-30 01:28:27 +00:00 committed by Jatin Bhateja
parent 1c598c2245
commit ce2a7ea40a
8 changed files with 110 additions and 129 deletions

@ -6232,11 +6232,17 @@ void Assembler::subss(XMMRegister dst, Address src) {
emit_operand(dst, src, 0);
}
void Assembler::testb(Register dst, int imm8) {
void Assembler::testb(Register dst, int imm8, bool use_ral) {
NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
if (dst == rax) {
emit_int8((unsigned char)0xA8);
emit_int8(imm8);
if (use_ral) {
emit_int8((unsigned char)0xA8);
emit_int8(imm8);
} else {
emit_int8((unsigned char)0xF6);
emit_int8((unsigned char)0xC4);
emit_int8(imm8);
}
} else {
(void) prefix_and_encode(dst->encoding(), true);
emit_arith_b(0xF6, 0xC0, dst, imm8);
@ -10967,6 +10973,36 @@ void Assembler::emit_operand32(Register reg, Address adr, int post_addr_length)
emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec, post_addr_length);
}
void Assembler::fld_d(Address adr) {
InstructionMark im(this);
emit_int8((unsigned char)0xDD);
emit_operand32(rax, adr, 0);
}
void Assembler::fprem() {
emit_int16((unsigned char)0xD9, (unsigned char)0xF8);
}
void Assembler::fnstsw_ax() {
emit_int16((unsigned char)0xDF, (unsigned char)0xE0);
}
void Assembler::fstp_d(Address adr) {
InstructionMark im(this);
emit_int8((unsigned char)0xDD);
emit_operand32(rbx, adr, 0);
}
void Assembler::fstp_d(int index) {
emit_farith(0xDD, 0xD8, index);
}
void Assembler::emit_farith(int b1, int b2, int i) {
assert(isByte(b1) && isByte(b2), "wrong opcode");
assert(0 <= i && i < 8, "illegal stack offset");
emit_int16(b1, b2 + i);
}
#ifndef _LP64
// 32bit only pieces of the assembler
@ -11010,12 +11046,6 @@ void Assembler::decl(Register dst) {
// 64bit doesn't use the x87
void Assembler::emit_farith(int b1, int b2, int i) {
assert(isByte(b1) && isByte(b2), "wrong opcode");
assert(0 <= i && i < 8, "illegal stack offset");
emit_int16(b1, b2 + i);
}
void Assembler::fabs() {
emit_int16((unsigned char)0xD9, (unsigned char)0xE1);
}
@ -11177,12 +11207,6 @@ void Assembler::fld1() {
emit_int16((unsigned char)0xD9, (unsigned char)0xE8);
}
void Assembler::fld_d(Address adr) {
InstructionMark im(this);
emit_int8((unsigned char)0xDD);
emit_operand32(rax, adr, 0);
}
void Assembler::fld_s(Address adr) {
InstructionMark im(this);
emit_int8((unsigned char)0xD9);
@ -11266,14 +11290,6 @@ void Assembler::fnstcw(Address src) {
emit_operand32(rdi, src, 0);
}
void Assembler::fnstsw_ax() {
emit_int16((unsigned char)0xDF, (unsigned char)0xE0);
}
void Assembler::fprem() {
emit_int16((unsigned char)0xD9, (unsigned char)0xF8);
}
void Assembler::fprem1() {
emit_int16((unsigned char)0xD9, (unsigned char)0xF5);
}
@ -11304,16 +11320,6 @@ void Assembler::fst_s(Address adr) {
emit_operand32(rdx, adr, 0);
}
void Assembler::fstp_d(Address adr) {
InstructionMark im(this);
emit_int8((unsigned char)0xDD);
emit_operand32(rbx, adr, 0);
}
void Assembler::fstp_d(int index) {
emit_farith(0xDD, 0xD8, index);
}
void Assembler::fstp_s(Address adr) {
InstructionMark im(this);
emit_int8((unsigned char)0xD9);

@ -1245,12 +1245,18 @@ private:
void divss(XMMRegister dst, XMMRegister src);
#ifndef _LP64
void fnstsw_ax();
void fprem();
void fld_d(Address adr);
void fstp_d(Address adr);
void fstp_d(int index);
private:
void emit_farith(int b1, int b2, int i);
public:
#ifndef _LP64
void emms();
void fabs();
@ -1309,7 +1315,6 @@ private:
void fld1();
void fld_d(Address adr);
void fld_s(Address adr);
void fld_s(int index);
@ -1338,10 +1343,6 @@ private:
void fnsave(Address dst);
void fnstcw(Address src);
void fnstsw_ax();
void fprem();
void fprem1();
void frstor(Address src);
@ -1353,8 +1354,6 @@ private:
void fst_d(Address adr);
void fst_s(Address adr);
void fstp_d(Address adr);
void fstp_d(int index);
void fstp_s(Address adr);
void fsub(int i);
@ -2184,7 +2183,7 @@ private:
void subss(XMMRegister dst, XMMRegister src);
void testb(Address dst, int imm8);
void testb(Register dst, int imm8);
void testb(Register dst, int imm8, bool use_ral = true);
void testl(Address dst, int32_t imm32);
void testl(Register dst, int32_t imm32);

@ -84,52 +84,18 @@ void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* mas
}
#endif //COMPILER1
#if defined(TARGET_COMPILER_gcc) && !defined(_WIN64)
JRT_LEAF(jfloat, SharedRuntime::frem(jfloat x, jfloat y))
jfloat retval;
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
if (!is_LP64 || UseAVX < 1 || !UseFMA) {
asm ("\
1: \n\
fprem \n\
fnstsw %%ax \n\
test $0x4,%%ah \n\
jne 1b \n\
"
:"=t"(retval)
:"0"(x), "u"(y)
:"cc", "ax");
} else {
assert(StubRoutines::fmod() != nullptr, "");
jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod();
jdouble dx = (jdouble) x;
jdouble dy = (jdouble) y;
assert(StubRoutines::fmod() != nullptr, "");
jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod();
jdouble dx = (jdouble) x;
jdouble dy = (jdouble) y;
retval = (jfloat) (*addr)(dx, dy);
}
return retval;
return (jfloat) (*addr)(dx, dy);
JRT_END
JRT_LEAF(jdouble, SharedRuntime::drem(jdouble x, jdouble y))
jdouble retval;
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
if (!is_LP64 || UseAVX < 1 || !UseFMA) {
asm ("\
1: \n\
fprem \n\
fnstsw %%ax \n\
test $0x4,%%ah \n\
jne 1b \n\
"
:"=t"(retval)
:"0"(x), "u"(y)
:"cc", "ax");
} else {
assert(StubRoutines::fmod() != nullptr, "");
jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod();
assert(StubRoutines::fmod() != nullptr, "");
jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod();
retval = (*addr)(x, y);
}
return retval;
return (*addr)(x, y);
JRT_END
#endif // TARGET_COMPILER_gcc && !_WIN64

@ -3897,6 +3897,8 @@ address StubGenerator::generate_throw_exception(const char* name,
void StubGenerator::create_control_words() {
// Round to nearest, 64-bit mode, exceptions masked
StubRoutines::x86::_mxcsr_std = 0x1F80;
// Round to zero, 64-bit mode, exceptions masked
StubRoutines::x86::_mxcsr_rz = 0x7F80;
}
// Initialization
@ -3979,9 +3981,7 @@ void StubGenerator::generate_initial_stubs() {
generate_libm_stubs();
if ((UseAVX >= 1) && (VM_Version::supports_avx512vlbwdq() || VM_Version::supports_fma())) {
StubRoutines::_fmod = generate_libmFmod(); // from stubGenerator_x86_64_fmod.cpp
}
StubRoutines::_fmod = generate_libmFmod(); // from stubGenerator_x86_64_fmod.cpp
}
void StubGenerator::generate_continuation_stubs() {

@ -27,6 +27,7 @@
#include "precompiled.hpp"
#include "macroAssembler_x86.hpp"
#include "stubGenerator_x86_64.hpp"
#include "runtime/stubRoutines.hpp"
/******************************************************************************/
// ALGORITHM DESCRIPTION - FMOD()
@ -72,6 +73,7 @@ ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_e307[] = {
};
address StubGenerator::generate_libmFmod() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "libmFmod");
address start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
@ -287,21 +289,11 @@ address StubGenerator::generate_libmFmod() {
// {
// double a, b, sgn_a, q, bs, bs2, corr, res;
// unsigned eq;
// unsigned mxcsr, mxcsr_rz;
// __asm { stmxcsr DWORD PTR[mxcsr] }
// mxcsr_rz = 0x7f80 | mxcsr;
__ push(rax);
__ stmxcsr(Address(rsp, 0));
__ movl(rax, Address(rsp, 0));
__ movl(rcx, rax);
__ orl(rcx, 0x7f80);
__ movl(Address(rsp, 0x04), rcx);
// // |x|, |y|
// a = DP_AND(x, DP_CONST(7fffffffffffffff));
__ movq(xmm2, xmm0);
__ vmovdqu(xmm3, ExternalAddress((address)CONST_NaN), rcx);
__ movdqu(xmm3, ExternalAddress((address)CONST_NaN), rcx);
__ vpand(xmm4, xmm2, xmm3, Assembler::AVX_128bit);
// b = DP_AND(y, DP_CONST(7fffffffffffffff));
__ vpand(xmm3, xmm1, xmm3, Assembler::AVX_128bit);
@ -313,18 +305,16 @@ address StubGenerator::generate_libmFmod() {
// if (a < b) return x + sgn_a;
__ ucomisd(xmm3, xmm4);
__ jcc(Assembler::belowEqual, L_104a);
__ jccb(Assembler::belowEqual, L_104a);
__ vaddsd(xmm0, xmm2, xmm0);
__ jmp(L_11bd);
// if (((mxcsr & 0x6000)!=0x2000) && (a < b * 0x1p+260))
// if (a < b * 0x1p+260)
__ bind(L_104a);
__ andl(rax, 0x6000);
__ cmpl(rax, 0x2000);
__ jcc(Assembler::equal, L_10c1);
__ vmulsd(xmm0, xmm3, ExternalAddress((address)CONST_1p260), rax);
__ ucomisd(xmm0, xmm4);
__ jcc(Assembler::belowEqual, L_10c1);
__ jccb(Assembler::belowEqual, L_10c1);
// {
// q = DP_DIV(a, b);
__ vdivpd(xmm0, xmm4, xmm3, Assembler::AVX_128bit);
@ -340,7 +330,7 @@ address StubGenerator::generate_libmFmod() {
__ vroundsd(xmm0, xmm0, xmm0, 3);
// a = DP_FNMA(b, q, a);
__ vfnmadd213sd(xmm0, xmm3, xmm4);
__ align32();
__ align(16);
// while (b <= a)
__ bind(L_1090);
__ ucomisd(xmm0, xmm3);
@ -359,14 +349,14 @@ address StubGenerator::generate_libmFmod() {
__ vroundsd(xmm4, xmm4, xmm4, 3);
// a = DP_FNMA(b, q, a);
__ vfnmadd231sd(xmm0, xmm3, xmm4);
__ jmp(L_1090);
__ jmpb(L_1090);
// }
// return DP_XOR(a, sgn_a);
// }
// __asm { ldmxcsr DWORD PTR [mxcsr_rz] }
__ bind(L_10c1);
__ ldmxcsr(Address(rsp, 0x04));
__ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_rz()), rax /*rscratch*/);
// q = DP_DIV(a, b);
__ vdivpd(xmm0, xmm4, xmm3, Assembler::AVX_128bit);
@ -378,7 +368,7 @@ address StubGenerator::generate_libmFmod() {
// if (__builtin_expect((eq >= 0x7fefffffu), (0==1))) goto SPECIAL_FMOD;
__ cmpl(rax, 0x7feffffe);
__ jcc(Assembler::above, L_10e7);
__ jccb(Assembler::above, L_10e7);
// a = DP_FNMA(b, q, a);
__ vfnmadd213sd(xmm0, xmm3, xmm4);
@ -391,31 +381,31 @@ address StubGenerator::generate_libmFmod() {
__ bind(L_10e7);
__ vpxor(xmm5, xmm5, xmm5, Assembler::AVX_128bit);
__ ucomisd(xmm3, xmm5);
__ jcc(Assembler::notEqual, L_10f3);
__ jcc(Assembler::noParity, L_111c);
__ jccb(Assembler::notEqual, L_10f3);
__ jccb(Assembler::noParity, L_111c);
__ bind(L_10f3);
__ movsd(xmm5, ExternalAddress((address)CONST_MAX), rax);
__ ucomisd(xmm5, xmm4);
__ jcc(Assembler::below, L_111c);
__ jccb(Assembler::below, L_111c);
// return res;
// }
// // y is NaN?
// if (!(b <= DP_CONST(7ff0000000000000))) {
__ movsd(xmm0, ExternalAddress((address)CONST_INF), rax);
__ ucomisd(xmm0, xmm3);
__ jcc(Assembler::aboveEqual, L_112a);
__ jccb(Assembler::aboveEqual, L_112a);
// res = y + y;
__ vaddsd(xmm0, xmm1, xmm1);
// __asm { ldmxcsr DWORD PTR[mxcsr] }
__ ldmxcsr(Address(rsp, 0));
__ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), rax /*rscratch*/);
__ jmp(L_11bd);
// {
// res = DP_FNMA(b, q, a); // NaN
__ bind(L_111c);
__ vfnmadd213sd(xmm0, xmm3, xmm4);
// __asm { ldmxcsr DWORD PTR[mxcsr] }
__ ldmxcsr(Address(rsp, 0));
__ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), rax /*rscratch*/);
__ jmp(L_11bd);
// return res;
// }
@ -435,14 +425,14 @@ address StubGenerator::generate_libmFmod() {
// if (eq >= 0x7fefffffu)
__ cmpl(rax, 0x7fefffff);
__ jcc(Assembler::below, L_116e);
__ jccb(Assembler::below, L_116e);
// {
// // b* 2*1023 * 2^1023
// bs2 = bs * DP_CONST(7fe0000000000000);
__ vmulsd(xmm0, xmm1, ExternalAddress((address)CONST_e307), rax);
// while (bs2 <= a)
__ ucomisd(xmm4, xmm0);
__ jcc(Assembler::below, L_1173);
__ jccb(Assembler::below, L_1173);
// {
// q = DP_DIV(a, bs2);
__ bind(L_1157);
@ -453,8 +443,8 @@ address StubGenerator::generate_libmFmod() {
__ vfnmadd231sd(xmm4, xmm0, xmm5);
// while (bs2 <= a)
__ ucomisd(xmm4, xmm0);
__ jcc(Assembler::aboveEqual, L_1157);
__ jmp(L_1173);
__ jccb(Assembler::aboveEqual, L_1157);
__ jmpb(L_1173);
// }
// }
// else
@ -465,9 +455,9 @@ address StubGenerator::generate_libmFmod() {
// while (bs <= a)
__ bind(L_1173);
__ ucomisd(xmm4, xmm1);
__ jcc(Assembler::aboveEqual, L_117f);
__ jccb(Assembler::aboveEqual, L_117f);
__ movapd(xmm0, xmm4);
__ jmp(L_11af);
__ jmpb(L_11af);
// {
// q = DP_DIV(a, bs);
__ bind(L_117f);
@ -480,9 +470,9 @@ address StubGenerator::generate_libmFmod() {
// while (bs <= a)
__ ucomisd(xmm0, xmm1);
__ movapd(xmm4, xmm0);
__ jcc(Assembler::aboveEqual, L_117f);
__ jmp(L_11af);
__ align32();
__ jccb(Assembler::aboveEqual, L_117f);
__ jmpb(L_11af);
__ align(16);
// {
// q = DP_DIV(a, b);
__ bind(L_11a0);
@ -496,11 +486,11 @@ address StubGenerator::generate_libmFmod() {
// while (b <= a)
__ bind(L_11af);
__ ucomisd(xmm0, xmm3);
__ jcc(Assembler::aboveEqual, L_11a0);
__ jccb(Assembler::aboveEqual, L_11a0);
// }
// __asm { ldmxcsr DWORD PTR[mxcsr] }
__ ldmxcsr(Address(rsp, 0));
__ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), rax /*rscratch*/);
__ bind(L_11b9);
__ vpxor(xmm0, xmm2, xmm0, Assembler::AVX_128bit);
// }
@ -509,10 +499,23 @@ address StubGenerator::generate_libmFmod() {
// }
__ bind(L_11bd);
__ pop(rax);
} else { // SSE version
assert(false, "SSE not implemented");
Label x87_loop;
__ movsd(Address(rbp, -8), xmm1);
__ movsd(Address(rbp, -16), xmm0);
__ fld_d(Address(rbp, -8));
__ fld_d(Address(rbp, -16));
__ bind(x87_loop);
__ fprem();
__ fnstsw_ax();
__ testb(rax, 0x4, false);
__ jcc(Assembler::notZero, x87_loop);
__ fstp_d(1);
__ fstp_d(Address(rbp, -8));
__ movsd(xmm0, Address(rbp, -8));
}
__ leave(); // required for proper stackwalking of RuntimeStub frame

@ -126,6 +126,9 @@ class x86 {
private:
static jint _mxcsr_std;
#ifdef _LP64
static jint _mxcsr_rz;
#endif // _LP64
static address _verify_mxcsr_entry;
@ -207,6 +210,9 @@ class x86 {
public:
static address addr_mxcsr_std() { return (address)&_mxcsr_std; }
#ifdef _LP64
static address addr_mxcsr_rz() { return (address)&_mxcsr_rz; }
#endif // _LP64
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
#ifdef _LP64

@ -32,6 +32,7 @@
// a description of how to extend it, see the stubRoutines.hpp file.
jint StubRoutines::x86::_mxcsr_std = 0;
jint StubRoutines::x86::_mxcsr_rz = 0;
address StubRoutines::x86::_get_previous_sp_entry = nullptr;

@ -238,7 +238,7 @@ const julong double_sign_mask = CONST64(0x7FFFFFFFFFFFFFFF);
const julong double_infinity = CONST64(0x7FF0000000000000);
#endif
#if !defined(X86) || !defined(TARGET_COMPILER_gcc) || defined(_WIN64)
#if !defined(X86)
JRT_LEAF(jfloat, SharedRuntime::frem(jfloat x, jfloat y))
#ifdef _WIN64
// 64-bit Windows on amd64 returns the wrong values for
@ -270,7 +270,7 @@ JRT_LEAF(jdouble, SharedRuntime::drem(jdouble x, jdouble y))
return ((jdouble)fmod((double)x,(double)y));
#endif
JRT_END
#endif // !X86 || !TARGET_COMPILER_gcc || _WIN64
#endif // !X86
JRT_LEAF(jfloat, SharedRuntime::i2f(jint x))
return (jfloat)x;