8308966: Add intrinsic for float/double modulo for x86 AVX2 and AVX512
Co-authored-by: Marius Cornea <marius.cornea@intel.com> Reviewed-by: jbhateja, sviswanathan
This commit is contained in:
parent
8e4e6b056c
commit
5d5ae35288
src/hotspot
cpu/x86
assembler_x86.cppassembler_x86.hppc1_LIRGenerator_x86.cppsharedRuntime_x86.cppstubGenerator_x86_64.cppstubGenerator_x86_64.hppstubGenerator_x86_64_fmod.cpp
share/runtime
test/hotspot/jtreg/compiler/floatingpoint
@ -3555,6 +3555,14 @@ void Assembler::movsd(Address dst, XMMRegister src) {
|
||||
emit_operand(src, dst, 0);
|
||||
}
|
||||
|
||||
void Assembler::vmovsd(XMMRegister dst, XMMRegister src, XMMRegister src2) {
|
||||
assert(UseAVX > 0, "Requires some form of AVX");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src2->encoding(), src->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x11, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::movss(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
@ -6531,6 +6539,29 @@ void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2)
|
||||
emit_int16((unsigned char)0xB9, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2, EvexRoundPrefix rmode) { // Need to add rmode for rounding mode support
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(rmode, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_extended_context();
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0xAD, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
assert(VM_Version::supports_fma(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0xAD, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
assert(VM_Version::supports_fma(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0xBD, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
assert(VM_Version::supports_fma(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
@ -6892,6 +6923,22 @@ void Assembler::vroundpd(XMMRegister dst, Address src, int32_t rmode, int vecto
|
||||
emit_int8((rmode));
|
||||
}
|
||||
|
||||
void Assembler::vroundsd(XMMRegister dst, XMMRegister src, XMMRegister src2, int32_t rmode) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(rmode <= 0x0f, "rmode 0x%x", rmode);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int24(0x0B, (0xC0 | encode), (rmode));
|
||||
}
|
||||
|
||||
void Assembler::vrndscalesd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int32_t rmode) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int24(0x0B, (0xC0 | encode), (rmode));
|
||||
}
|
||||
|
||||
void Assembler::vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
@ -8857,6 +8904,19 @@ void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::extractps(Register dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_sse4_1(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
// imm8:
|
||||
// 0x00 - extract from bits 31:0
|
||||
// 0x01 - extract from bits 63:32
|
||||
// 0x02 - extract from bits 95:64
|
||||
// 0x03 - extract from bits 127:96
|
||||
emit_int24(0x17, (0xC0 | encode), imm8 & 0x03);
|
||||
}
|
||||
|
||||
// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
|
||||
void Assembler::vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
@ -9531,6 +9591,15 @@ void Assembler::evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Addres
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
void Assembler::evdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexRoundPrefix rmode) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(rmode, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_extended_context();
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x5E, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
|
@ -528,6 +528,13 @@ class Assembler : public AbstractAssembler {
|
||||
EVEX_Z = 0x80
|
||||
};
|
||||
|
||||
enum EvexRoundPrefix {
|
||||
EVEX_RNE = 0x0,
|
||||
EVEX_RD = 0x1,
|
||||
EVEX_RU = 0x2,
|
||||
EVEX_RZ = 0x3
|
||||
};
|
||||
|
||||
enum VexSimdPrefix {
|
||||
VEX_SIMD_NONE = 0x0,
|
||||
VEX_SIMD_66 = 0x1,
|
||||
@ -886,6 +893,8 @@ private:
|
||||
void movsd(Address dst, XMMRegister src);
|
||||
void movlpd(XMMRegister dst, Address src);
|
||||
|
||||
void vmovsd(XMMRegister dst, XMMRegister src, XMMRegister src2);
|
||||
|
||||
// New cpus require use of movaps and movapd to avoid partial register stall
|
||||
// when moving between registers.
|
||||
void movaps(XMMRegister dst, XMMRegister src);
|
||||
@ -2242,9 +2251,13 @@ private:
|
||||
void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
|
||||
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void evdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexRoundPrefix rmode);
|
||||
void vdivss(XMMRegister dst, XMMRegister nds, Address src);
|
||||
void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vfnmadd213sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void evfnmadd213sd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexRoundPrefix rmode);
|
||||
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
|
||||
void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
@ -2334,8 +2347,11 @@ private:
|
||||
// Round Packed Double precision value.
|
||||
void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
|
||||
void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
|
||||
void vrndscalesd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int32_t rmode);
|
||||
void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
|
||||
void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
|
||||
void vroundsd(XMMRegister dst, XMMRegister src, XMMRegister src2, int32_t rmode);
|
||||
void vroundsd(XMMRegister dst, XMMRegister src, Address src2, int32_t rmode);
|
||||
|
||||
// Bitwise Logical AND of Packed Floating-Point Values
|
||||
void andpd(XMMRegister dst, XMMRegister src);
|
||||
@ -2719,6 +2735,8 @@ private:
|
||||
void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
|
||||
|
||||
void extractps(Register dst, XMMRegister src, uint8_t imm8);
|
||||
|
||||
// xmm/mem sourced byte/word/dword/qword replicate
|
||||
void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
|
||||
@ -2952,6 +2970,8 @@ public:
|
||||
_embedded_opmask_register_specifier = mask->encoding() & 0x7;
|
||||
}
|
||||
|
||||
void set_extended_context(void) { _is_extended_context = true; }
|
||||
|
||||
};
|
||||
|
||||
#endif // CPU_X86_ASSEMBLER_X86_HPP
|
||||
|
@ -968,7 +968,7 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
if (StubRoutines::dpow() != nullptr) {
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
|
@ -87,6 +87,8 @@ void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* mas
|
||||
#if defined(TARGET_COMPILER_gcc) && !defined(_WIN64)
|
||||
JRT_LEAF(jfloat, SharedRuntime::frem(jfloat x, jfloat y))
|
||||
jfloat retval;
|
||||
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
|
||||
if (!is_LP64 || UseAVX < 1 || !UseFMA) {
|
||||
asm ("\
|
||||
1: \n\
|
||||
fprem \n\
|
||||
@ -97,11 +99,21 @@ jne 1b \n\
|
||||
:"=t"(retval)
|
||||
:"0"(x), "u"(y)
|
||||
:"cc", "ax");
|
||||
} else {
|
||||
assert(StubRoutines::fmod() != nullptr, "");
|
||||
jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod();
|
||||
jdouble dx = (jdouble) x;
|
||||
jdouble dy = (jdouble) y;
|
||||
|
||||
retval = (jfloat) (*addr)(dx, dy);
|
||||
}
|
||||
return retval;
|
||||
JRT_END
|
||||
|
||||
JRT_LEAF(jdouble, SharedRuntime::drem(jdouble x, jdouble y))
|
||||
jdouble retval;
|
||||
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
|
||||
if (!is_LP64 || UseAVX < 1 || !UseFMA) {
|
||||
asm ("\
|
||||
1: \n\
|
||||
fprem \n\
|
||||
@ -112,6 +124,12 @@ jne 1b \n\
|
||||
:"=t"(retval)
|
||||
:"0"(x), "u"(y)
|
||||
:"cc", "ax");
|
||||
} else {
|
||||
assert(StubRoutines::fmod() != nullptr, "");
|
||||
jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod();
|
||||
|
||||
retval = (*addr)(x, y);
|
||||
}
|
||||
return retval;
|
||||
JRT_END
|
||||
#endif // TARGET_COMPILER_gcc && !_WIN64
|
||||
|
@ -3937,6 +3937,10 @@ void StubGenerator::generate_initial_stubs() {
|
||||
}
|
||||
|
||||
generate_libm_stubs();
|
||||
|
||||
if ((UseAVX >= 1) && (VM_Version::supports_avx512vlbwdq() || VM_Version::supports_fma())) {
|
||||
StubRoutines::_fmod = generate_libmFmod(); // from stubGenerator_x86_64_fmod.cpp
|
||||
}
|
||||
}
|
||||
|
||||
void StubGenerator::generate_continuation_stubs() {
|
||||
|
@ -486,6 +486,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
address generate_libmPow();
|
||||
address generate_libmLog();
|
||||
address generate_libmLog10();
|
||||
address generate_libmFmod();
|
||||
|
||||
// Shared constants
|
||||
static address ZERO;
|
||||
|
524
src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp
Normal file
524
src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp
Normal file
@ -0,0 +1,524 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Intel Corporation. All rights reserved.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
#include "stubGenerator_x86_64.hpp"
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - FMOD()
|
||||
// ---------------------
|
||||
//
|
||||
// If either value1 or value2 is NaN, the result is NaN.
|
||||
//
|
||||
// If neither value1 nor value2 is NaN, the sign of the result equals the sign of the dividend.
|
||||
//
|
||||
// If the dividend is an infinity or the divisor is a zero or both, the result is NaN.
|
||||
//
|
||||
// If the dividend is finite and the divisor is an infinity, the result equals the dividend.
|
||||
//
|
||||
// If the dividend is a zero and the divisor is finite, the result equals the dividend.
|
||||
//
|
||||
// In the remaining cases, where neither operand is an infinity, a zero, or NaN, the floating-point
|
||||
// remainder result from a dividend value1 and a divisor value2 is defined by the mathematical
|
||||
// relation result = value1 - (value2 * q), where q is an integer that is negative only if
|
||||
// value1 / value2 is negative, and positive only if value1 / value2 is positive, and whose magnitude
|
||||
// is as large as possible without exceeding the magnitude of the true mathematical quotient of value1 and value2.
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#define __ _masm->
|
||||
|
||||
ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_NaN[] = {
|
||||
0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL // NaN vector
|
||||
};
|
||||
ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_1p260[] = {
|
||||
0x5030000000000000ULL, // 0x1p+260
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_MAX[] = {
|
||||
0x7FEFFFFFFFFFFFFFULL, // Max
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_INF[] = {
|
||||
0x7FF0000000000000ULL, // Inf
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_e307[] = {
|
||||
0x7FE0000000000000ULL
|
||||
};
|
||||
|
||||
address StubGenerator::generate_libmFmod() {
|
||||
StubCodeMark mark(this, "StubRoutines", "libmFmod");
|
||||
address start = __ pc();
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
if (VM_Version::supports_avx512vlbwdq()) { // AVX512 version
|
||||
|
||||
// Source used to generate the AVX512 fmod assembly below:
|
||||
//
|
||||
// #include <ia32intrin.h>
|
||||
// #include <emmintrin.h>
|
||||
// #pragma float_control(precise, on)
|
||||
//
|
||||
// #define UINT32 unsigned int
|
||||
// #define SINT32 int
|
||||
// #define UINT64 unsigned __int64
|
||||
// #define SINT64 __int64
|
||||
//
|
||||
// #define DP_FMA(a, b, c) __fence(_mm_cvtsd_f64(_mm_fmadd_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c))))
|
||||
// #define DP_FMA_RN(a, b, c) _mm_cvtsd_f64(_mm_fmadd_round_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c), (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)))
|
||||
// #define DP_FMA_RZ(a, b, c) __fence(_mm_cvtsd_f64(_mm_fmadd_round_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c), (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC))))
|
||||
//
|
||||
// #define DP_ROUND_RZ(a) _mm_cvtsd_f64(_mm_roundscale_sd(_mm_setzero_pd(), _mm_set_sd(a), (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)))
|
||||
//
|
||||
// #define DP_CONST(C) _castu64_f64(0x##C##ull)
|
||||
// #define DP_AND(X, Y) _mm_cvtsd_f64(_mm_and_pd(_mm_set_sd(X), _mm_set_sd(Y)))
|
||||
// #define DP_XOR(X, Y) _mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(X), _mm_set_sd(Y)))
|
||||
// #define DP_OR(X, Y) _mm_cvtsd_f64(_mm_or_pd(_mm_set_sd(X), _mm_set_sd(Y)))
|
||||
// #define DP_DIV_RZ(a, b) __fence(_mm_cvtsd_f64(_mm_div_round_sd(_mm_set_sd(a), _mm_set_sd(b), (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC))))
|
||||
// #define DP_FNMA(a, b, c) __fence(_mm_cvtsd_f64(_mm_fnmadd_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c))))
|
||||
// #define DP_FNMA_RZ(a, b, c) __fence(_mm_cvtsd_f64(_mm_fnmadd_round_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c), (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC))))
|
||||
//
|
||||
// #define D2L(x) _mm_castpd_si128(x)
|
||||
// // transfer highest 32 bits (of low 64b) to GPR
|
||||
// #define TRANSFER_HIGH_INT32(X) _mm_extract_epi32(D2L(_mm_set_sd(X)), 1)
|
||||
//
|
||||
// double fmod(double x, double y)
|
||||
// {
|
||||
// double a, b, sgn_a, q, bs, bs2;
|
||||
// unsigned eq;
|
||||
|
||||
Label L_5280, L_52a0, L_5256, L_5300, L_5320, L_52c0, L_52d0, L_5360, L_5380, L_53b0, L_5390;
|
||||
Label L_53c0, L_52a6, L_53d0, L_exit;
|
||||
|
||||
__ movdqa(xmm2, xmm0);
|
||||
// // |x|, |y|
|
||||
// a = DP_AND(x, DP_CONST(7fffffffffffffff));
|
||||
__ movq(xmm0, xmm0);
|
||||
__ mov64(rax, 0x7FFFFFFFFFFFFFFFULL);
|
||||
__ evpbroadcastq(xmm3, rax, Assembler::AVX_128bit);
|
||||
__ vpand(xmm6, xmm0, xmm3, Assembler::AVX_128bit);
|
||||
// b = DP_AND(y, DP_CONST(7fffffffffffffff));
|
||||
__ vpand(xmm4, xmm1, xmm3, Assembler::AVX_128bit);
|
||||
// // sign(x)
|
||||
// sgn_a = DP_XOR(x, a);
|
||||
__ vpxor(xmm3, xmm6, xmm0, Assembler::AVX_128bit);
|
||||
// q = DP_DIV_RZ(a, b);
|
||||
__ movq(xmm5, xmm4);
|
||||
__ evdivsd(xmm0, xmm6, xmm5, Assembler::EVEX_RZ);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ movq(xmm0, xmm0);
|
||||
// a = DP_AND(x, DP_CONST(7fffffffffffffff));
|
||||
__ vxorpd(xmm7, xmm7, xmm7, Assembler::AVX_128bit);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ vroundsd(xmm0, xmm7, xmm0, 0xb);
|
||||
// eq = TRANSFER_HIGH_INT32(q);
|
||||
__ extractps(rax, xmm0, 1);
|
||||
// if (!eq) return x + sgn_a;
|
||||
__ testl(rax, rax);
|
||||
__ jcc(Assembler::equal, L_5280);
|
||||
// if (eq >= 0x7fefffffu) goto SPECIAL_FMOD;
|
||||
__ cmpl(rax, 0x7feffffe);
|
||||
__ jcc(Assembler::belowEqual, L_52a0);
|
||||
__ vpxor(xmm2, xmm2, xmm2, Assembler::AVX_128bit);
|
||||
// SPECIAL_FMOD:
|
||||
//
|
||||
// // y==0 or x==Inf?
|
||||
// if ((b == 0.0) || (!(a <= DP_CONST(7fefffffffffffff))))
|
||||
__ ucomisd(xmm4, xmm2);
|
||||
__ jcc(Assembler::notEqual, L_5256);
|
||||
__ jcc(Assembler::noParity, L_5300);
|
||||
__ bind(L_5256);
|
||||
__ movsd(xmm2, ExternalAddress((address)CONST_MAX), rax);
|
||||
__ ucomisd(xmm2, xmm6);
|
||||
__ jcc(Assembler::below, L_5300);
|
||||
__ movsd(xmm0, ExternalAddress((address)CONST_INF), rax);
|
||||
// return DP_FNMA(b, q, a); // NaN
|
||||
// // y is NaN?
|
||||
// if (!(b <= DP_CONST(7ff0000000000000))) return y + y;
|
||||
__ ucomisd(xmm0, xmm4);
|
||||
__ jcc(Assembler::aboveEqual, L_5320);
|
||||
__ vaddsd(xmm0, xmm1, xmm1);
|
||||
__ jmp(L_exit);
|
||||
// if (!eq) return x + sgn_a;
|
||||
__ align32();
|
||||
__ bind(L_5280);
|
||||
__ vaddsd(xmm0, xmm3, xmm2);
|
||||
__ jmp(L_exit);
|
||||
// a = DP_FNMA_RZ(b, q, a);
|
||||
__ align(8);
|
||||
__ bind(L_52a0);
|
||||
__ evfnmadd213sd(xmm0, xmm4, xmm6, Assembler::EVEX_RZ);
|
||||
// while (b <= a)
|
||||
__ bind(L_52a6);
|
||||
__ ucomisd(xmm0, xmm4);
|
||||
__ jcc(Assembler::aboveEqual, L_52c0);
|
||||
// a = DP_XOR(a, sgn_a);
|
||||
__ vpxor(xmm0, xmm3, xmm0, Assembler::AVX_128bit);
|
||||
__ jmp(L_exit);
|
||||
__ bind(L_52c0);
|
||||
__ movq(xmm6, xmm0);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ vpxor(xmm1, xmm1, xmm1, Assembler::AVX_128bit);
|
||||
__ align32();
|
||||
__ bind(L_52d0);
|
||||
// q = DP_DIV_RZ(a, b);
|
||||
__ evdivsd(xmm2, xmm6, xmm5, Assembler::EVEX_RZ);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ movq(xmm2, xmm2);
|
||||
__ vroundsd(xmm2, xmm1, xmm2, 0xb);
|
||||
// a = DP_FNMA_RZ(b, q, a);
|
||||
__ evfnmadd213sd(xmm2, xmm4, xmm0, Assembler::EVEX_RZ);
|
||||
// while (b <= a)
|
||||
__ ucomisd(xmm2, xmm4);
|
||||
__ movq(xmm6, xmm2);
|
||||
__ movapd(xmm0, xmm2);
|
||||
__ jcc(Assembler::aboveEqual, L_52d0);
|
||||
// a = DP_XOR(a, sgn_a);
|
||||
__ vpxor(xmm0, xmm3, xmm2, Assembler::AVX_128bit);
|
||||
__ jmp(L_exit);
|
||||
// return DP_FNMA(b, q, a); // NaN
|
||||
__ bind(L_5300);
|
||||
__ vfnmadd213sd(xmm0, xmm4, xmm6);
|
||||
__ jmp(L_exit);
|
||||
// bs = b * DP_CONST(7fe0000000000000);
|
||||
__ bind(L_5320);
|
||||
__ vmulsd(xmm1, xmm4, ExternalAddress((address)CONST_e307), rax);
|
||||
// q = DP_DIV_RZ(a, bs);
|
||||
__ movq(xmm2, xmm1);
|
||||
__ evdivsd(xmm0, xmm6, xmm2, Assembler::EVEX_RZ);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ movq(xmm0, xmm0);
|
||||
__ vroundsd(xmm7, xmm7, xmm0, 0xb);
|
||||
// eq = TRANSFER_HIGH_INT32(q);
|
||||
__ extractps(rax, xmm7, 1);
|
||||
// if (eq >= 0x7fefffffu)
|
||||
__ cmpl(rax, 0x7fefffff);
|
||||
__ jcc(Assembler::below, L_5360);
|
||||
// // b* 2*1023 * 2^1023
|
||||
// bs2 = bs * DP_CONST(7fe0000000000000);
|
||||
__ vmulsd(xmm0, xmm1, ExternalAddress((address)CONST_e307), rax);
|
||||
// while (bs2 <= a)
|
||||
__ ucomisd(xmm6, xmm0);
|
||||
__ jcc(Assembler::aboveEqual, L_5380);
|
||||
__ movapd(xmm7, xmm6);
|
||||
__ jmp(L_53b0);
|
||||
// a = DP_FNMA_RZ(b, q, a);
|
||||
__ bind(L_5360);
|
||||
__ evfnmadd213sd(xmm7, xmm1, xmm6, Assembler::EVEX_RZ);
|
||||
__ jmp(L_53b0);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ bind(L_5380);
|
||||
__ vxorpd(xmm8, xmm8, xmm8, Assembler::AVX_128bit);
|
||||
// q = DP_DIV_RZ(qa, bs2);
|
||||
__ align32();
|
||||
__ bind(L_5390);
|
||||
__ evdivsd(xmm7, xmm6, xmm0, Assembler::EVEX_RZ);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ movq(xmm7, xmm7);
|
||||
__ vroundsd(xmm7, xmm8, xmm7, 0xb);
|
||||
// a = DP_FNMA_RZ(bs2, q, a);
|
||||
__ evfnmadd213sd(xmm7, xmm0, xmm6, Assembler::EVEX_RZ);
|
||||
// while (bs2 <= a)
|
||||
__ ucomisd(xmm7, xmm0);
|
||||
__ movapd(xmm6, xmm7);
|
||||
__ jcc(Assembler::aboveEqual, L_5390);
|
||||
// while (bs <= a)
|
||||
__ bind(L_53b0);
|
||||
__ ucomisd(xmm7, xmm1);
|
||||
__ jcc(Assembler::aboveEqual, L_53c0);
|
||||
__ movapd(xmm0, xmm7);
|
||||
__ jmp(L_52a6);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ bind(L_53c0);
|
||||
__ vxorpd(xmm6, xmm6, xmm6, Assembler::AVX_128bit);
|
||||
// q = DP_DIV_RZ(a, bs);
|
||||
__ align32();
|
||||
__ bind(L_53d0);
|
||||
__ evdivsd(xmm0, xmm7, xmm2, Assembler::EVEX_RZ);
|
||||
// q = DP_ROUND_RZ(q);
|
||||
__ movq(xmm0, xmm0);
|
||||
__ vroundsd(xmm0, xmm6, xmm0, 0xb);
|
||||
// a = DP_FNMA_RZ(bs, q, a);
|
||||
__ evfnmadd213sd(xmm0, xmm1, xmm7, Assembler::EVEX_RZ);
|
||||
// while (bs <= a)
|
||||
__ ucomisd(xmm0, xmm1);
|
||||
__ movapd(xmm7, xmm0);
|
||||
__ jcc(Assembler::aboveEqual, L_53d0);
|
||||
__ jmp(L_52a6);
|
||||
|
||||
__ bind(L_exit);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// AVX2 code
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
} else if (VM_Version::supports_fma()) { // AVX2 version
|
||||
|
||||
Label L_104a, L_11bd, L_10c1, L_1090, L_11b9, L_10e7, L_11af, L_111c, L_10f3, L_116e, L_112a;
|
||||
Label L_1173, L_1157, L_117f, L_11a0;
|
||||
|
||||
// double fmod(double x, double y)
|
||||
// {
|
||||
// double a, b, sgn_a, q, bs, bs2, corr, res;
|
||||
// unsigned eq;
|
||||
// unsigned mxcsr, mxcsr_rz;
|
||||
|
||||
// __asm { stmxcsr DWORD PTR[mxcsr] }
|
||||
// mxcsr_rz = 0x7f80 | mxcsr;
|
||||
__ push(rax);
|
||||
__ stmxcsr(Address(rsp, 0));
|
||||
__ movl(rax, Address(rsp, 0));
|
||||
__ movl(rcx, rax);
|
||||
__ orl(rcx, 0x7f80);
|
||||
__ movl(Address(rsp, 0x04), rcx);
|
||||
|
||||
// // |x|, |y|
|
||||
// a = DP_AND(x, DP_CONST(7fffffffffffffff));
|
||||
__ movq(xmm2, xmm0);
|
||||
__ vmovdqu(xmm3, ExternalAddress((address)CONST_NaN), rcx);
|
||||
__ vpand(xmm4, xmm2, xmm3, Assembler::AVX_128bit);
|
||||
// b = DP_AND(y, DP_CONST(7fffffffffffffff));
|
||||
__ vpand(xmm3, xmm1, xmm3, Assembler::AVX_128bit);
|
||||
// // sign(x)
|
||||
// sgn_a = DP_XOR(x, a);
|
||||
__ mov64(rcx, 0x8000000000000000ULL);
|
||||
__ movq(xmm5, rcx);
|
||||
__ vpand(xmm2, xmm2, xmm5, Assembler::AVX_128bit);
|
||||
|
||||
// if (a < b) return x + sgn_a;
|
||||
__ ucomisd(xmm3, xmm4);
|
||||
__ jcc(Assembler::belowEqual, L_104a);
|
||||
__ vaddsd(xmm0, xmm2, xmm0);
|
||||
__ jmp(L_11bd);
|
||||
|
||||
// if (((mxcsr & 0x6000)!=0x2000) && (a < b * 0x1p+260))
|
||||
__ bind(L_104a);
|
||||
__ andl(rax, 0x6000);
|
||||
__ cmpl(rax, 0x2000);
|
||||
__ jcc(Assembler::equal, L_10c1);
|
||||
__ vmulsd(xmm0, xmm3, ExternalAddress((address)CONST_1p260), rax);
|
||||
__ ucomisd(xmm0, xmm4);
|
||||
__ jcc(Assembler::belowEqual, L_10c1);
|
||||
// {
|
||||
// q = DP_DIV(a, b);
|
||||
__ vdivpd(xmm0, xmm4, xmm3, Assembler::AVX_128bit);
|
||||
// corr = DP_SHR(DP_FNMA(b, q, a), 63);
|
||||
__ movapd(xmm1, xmm0);
|
||||
__ vfnmadd213sd(xmm1, xmm3, xmm4);
|
||||
__ movq(xmm5, xmm1);
|
||||
__ vpxor(xmm1, xmm1, xmm1, Assembler::AVX_128bit);
|
||||
__ vpcmpgtq(xmm5, xmm1, xmm5, Assembler::AVX_128bit);
|
||||
// q = DP_PSUBQ(q, corr);
|
||||
__ vpaddq(xmm0, xmm5, xmm0, Assembler::AVX_128bit);
|
||||
// q = DP_TRUNC(q);
|
||||
__ vroundsd(xmm0, xmm0, xmm0, 3);
|
||||
// a = DP_FNMA(b, q, a);
|
||||
__ vfnmadd213sd(xmm0, xmm3, xmm4);
|
||||
__ align32();
|
||||
// while (b <= a)
|
||||
__ bind(L_1090);
|
||||
__ ucomisd(xmm0, xmm3);
|
||||
__ jcc(Assembler::below, L_11b9);
|
||||
// {
|
||||
// q = DP_DIV(a, b);
|
||||
__ vdivsd(xmm4, xmm0, xmm3);
|
||||
// corr = DP_SHR(DP_FNMA(b, q, a), 63);
|
||||
__ movapd(xmm5, xmm4);
|
||||
__ vfnmadd213sd(xmm5, xmm3, xmm0);
|
||||
__ movq(xmm5, xmm5);
|
||||
__ vpcmpgtq(xmm5, xmm1, xmm5, Assembler::AVX_128bit);
|
||||
// q = DP_PSUBQ(q, corr);
|
||||
__ vpaddq(xmm4, xmm5, xmm4, Assembler::AVX_128bit);
|
||||
// q = DP_TRUNC(q);
|
||||
__ vroundsd(xmm4, xmm4, xmm4, 3);
|
||||
// a = DP_FNMA(b, q, a);
|
||||
__ vfnmadd231sd(xmm0, xmm3, xmm4);
|
||||
__ jmp(L_1090);
|
||||
// }
|
||||
// return DP_XOR(a, sgn_a);
|
||||
// }
|
||||
|
||||
// __asm { ldmxcsr DWORD PTR [mxcsr_rz] }
|
||||
__ bind(L_10c1);
|
||||
__ ldmxcsr(Address(rsp, 0x04));
|
||||
|
||||
// q = DP_DIV(a, b);
|
||||
__ vdivpd(xmm0, xmm4, xmm3, Assembler::AVX_128bit);
|
||||
// q = DP_TRUNC(q);
|
||||
__ vroundsd(xmm0, xmm0, xmm0, 3);
|
||||
|
||||
// eq = TRANSFER_HIGH_INT32(q);
|
||||
__ extractps(rax, xmm0, 1);
|
||||
|
||||
// if (__builtin_expect((eq >= 0x7fefffffu), (0==1))) goto SPECIAL_FMOD;
|
||||
__ cmpl(rax, 0x7feffffe);
|
||||
__ jcc(Assembler::above, L_10e7);
|
||||
|
||||
// a = DP_FNMA(b, q, a);
|
||||
__ vfnmadd213sd(xmm0, xmm3, xmm4);
|
||||
__ jmp(L_11af);
|
||||
|
||||
// SPECIAL_FMOD:
|
||||
|
||||
// // y==0 or x==Inf?
|
||||
// if ((b == 0.0) || (!(a <= DP_CONST(7fefffffffffffff))))
|
||||
__ bind(L_10e7);
|
||||
__ vpxor(xmm5, xmm5, xmm5, Assembler::AVX_128bit);
|
||||
__ ucomisd(xmm3, xmm5);
|
||||
__ jcc(Assembler::notEqual, L_10f3);
|
||||
__ jcc(Assembler::noParity, L_111c);
|
||||
|
||||
__ bind(L_10f3);
|
||||
__ movsd(xmm5, ExternalAddress((address)CONST_MAX), rax);
|
||||
__ ucomisd(xmm5, xmm4);
|
||||
__ jcc(Assembler::below, L_111c);
|
||||
// return res;
|
||||
// }
|
||||
// // y is NaN?
|
||||
// if (!(b <= DP_CONST(7ff0000000000000))) {
|
||||
__ movsd(xmm0, ExternalAddress((address)CONST_INF), rax);
|
||||
__ ucomisd(xmm0, xmm3);
|
||||
__ jcc(Assembler::aboveEqual, L_112a);
|
||||
// res = y + y;
|
||||
__ vaddsd(xmm0, xmm1, xmm1);
|
||||
// __asm { ldmxcsr DWORD PTR[mxcsr] }
|
||||
__ ldmxcsr(Address(rsp, 0));
|
||||
__ jmp(L_11bd);
|
||||
// {
|
||||
// res = DP_FNMA(b, q, a); // NaN
|
||||
__ bind(L_111c);
|
||||
__ vfnmadd213sd(xmm0, xmm3, xmm4);
|
||||
// __asm { ldmxcsr DWORD PTR[mxcsr] }
|
||||
__ ldmxcsr(Address(rsp, 0));
|
||||
__ jmp(L_11bd);
|
||||
// return res;
|
||||
// }
|
||||
|
||||
// // b* 2*1023
|
||||
// bs = b * DP_CONST(7fe0000000000000);
|
||||
__ bind(L_112a);
|
||||
__ vmulsd(xmm1, xmm3, ExternalAddress((address)CONST_e307), rax);
|
||||
|
||||
// q = DP_DIV(a, bs);
|
||||
__ vdivsd(xmm0, xmm4, xmm1);
|
||||
// q = DP_TRUNC(q);
|
||||
__ vroundsd(xmm0, xmm0, xmm0, 3);
|
||||
|
||||
// eq = TRANSFER_HIGH_INT32(q);
|
||||
__ extractps(rax, xmm0, 1);
|
||||
|
||||
// if (eq >= 0x7fefffffu)
|
||||
__ cmpl(rax, 0x7fefffff);
|
||||
__ jcc(Assembler::below, L_116e);
|
||||
// {
|
||||
// // b* 2*1023 * 2^1023
|
||||
// bs2 = bs * DP_CONST(7fe0000000000000);
|
||||
__ vmulsd(xmm0, xmm1, ExternalAddress((address)CONST_e307), rax);
|
||||
// while (bs2 <= a)
|
||||
__ ucomisd(xmm4, xmm0);
|
||||
__ jcc(Assembler::below, L_1173);
|
||||
// {
|
||||
// q = DP_DIV(a, bs2);
|
||||
__ bind(L_1157);
|
||||
__ vdivsd(xmm5, xmm4, xmm0);
|
||||
// q = DP_TRUNC(q);
|
||||
__ vroundsd(xmm5, xmm5, xmm5, 3);
|
||||
// a = DP_FNMA(bs2, q, a);
|
||||
__ vfnmadd231sd(xmm4, xmm0, xmm5);
|
||||
// while (bs2 <= a)
|
||||
__ ucomisd(xmm4, xmm0);
|
||||
__ jcc(Assembler::aboveEqual, L_1157);
|
||||
__ jmp(L_1173);
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// a = DP_FNMA(bs, q, a);
|
||||
__ bind(L_116e);
|
||||
__ vfnmadd231sd(xmm4, xmm1, xmm0);
|
||||
|
||||
// while (bs <= a)
|
||||
__ bind(L_1173);
|
||||
__ ucomisd(xmm4, xmm1);
|
||||
__ jcc(Assembler::aboveEqual, L_117f);
|
||||
__ movapd(xmm0, xmm4);
|
||||
__ jmp(L_11af);
|
||||
// {
|
||||
// q = DP_DIV(a, bs);
|
||||
__ bind(L_117f);
|
||||
__ vdivsd(xmm0, xmm4, xmm1);
|
||||
// q = DP_TRUNC(q);
|
||||
__ vroundsd(xmm0, xmm0, xmm0, 3);
|
||||
// a = DP_FNMA(bs, q, a);
|
||||
__ vfnmadd213sd(xmm0, xmm1, xmm4);
|
||||
|
||||
// while (bs <= a)
|
||||
__ ucomisd(xmm0, xmm1);
|
||||
__ movapd(xmm4, xmm0);
|
||||
__ jcc(Assembler::aboveEqual, L_117f);
|
||||
__ jmp(L_11af);
|
||||
__ align32();
|
||||
// {
|
||||
// q = DP_DIV(a, b);
|
||||
__ bind(L_11a0);
|
||||
__ vdivsd(xmm1, xmm0, xmm3);
|
||||
// q = DP_TRUNC(q);
|
||||
__ vroundsd(xmm1, xmm1, xmm1, 3);
|
||||
// a = DP_FNMA(b, q, a);
|
||||
__ vfnmadd231sd(xmm0, xmm3, xmm1);
|
||||
|
||||
// FMOD_CONT:
|
||||
// while (b <= a)
|
||||
__ bind(L_11af);
|
||||
__ ucomisd(xmm0, xmm3);
|
||||
__ jcc(Assembler::aboveEqual, L_11a0);
|
||||
// }
|
||||
|
||||
// __asm { ldmxcsr DWORD PTR[mxcsr] }
|
||||
__ ldmxcsr(Address(rsp, 0));
|
||||
__ bind(L_11b9);
|
||||
__ vpxor(xmm0, xmm2, xmm0, Assembler::AVX_128bit);
|
||||
// }
|
||||
|
||||
// goto FMOD_CONT;
|
||||
|
||||
// }
|
||||
__ bind(L_11bd);
|
||||
__ pop(rax);
|
||||
|
||||
} else { // SSE version
|
||||
assert(false, "SSE not implemented");
|
||||
}
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
#undef __
|
@ -161,6 +161,7 @@ address StubRoutines::_vectorizedMismatch = nullptr;
|
||||
address StubRoutines::_dexp = nullptr;
|
||||
address StubRoutines::_dlog = nullptr;
|
||||
address StubRoutines::_dlog10 = nullptr;
|
||||
address StubRoutines::_fmod = nullptr;
|
||||
address StubRoutines::_dpow = nullptr;
|
||||
address StubRoutines::_dsin = nullptr;
|
||||
address StubRoutines::_dcos = nullptr;
|
||||
|
@ -249,6 +249,7 @@ class StubRoutines: AllStatic {
|
||||
static address _dlibm_reduce_pi04l;
|
||||
static address _dlibm_tan_cot_huge;
|
||||
static address _dtan;
|
||||
static address _fmod;
|
||||
|
||||
static address _f2hf;
|
||||
static address _hf2f;
|
||||
@ -425,6 +426,7 @@ class StubRoutines: AllStatic {
|
||||
static address dlog() { return _dlog; }
|
||||
static address dlog10() { return _dlog10; }
|
||||
static address dpow() { return _dpow; }
|
||||
static address fmod() { return _fmod; }
|
||||
static address dsin() { return _dsin; }
|
||||
static address dcos() { return _dcos; }
|
||||
static address dlibm_reduce_pi04l() { return _dlibm_reduce_pi04l; }
|
||||
|
@ -556,6 +556,7 @@
|
||||
static_field(StubRoutines, _dlog, address) \
|
||||
static_field(StubRoutines, _dlog10, address) \
|
||||
static_field(StubRoutines, _dpow, address) \
|
||||
static_field(StubRoutines, _fmod, address) \
|
||||
static_field(StubRoutines, _dsin, address) \
|
||||
static_field(StubRoutines, _dcos, address) \
|
||||
static_field(StubRoutines, _dtan, address) \
|
||||
|
131
test/hotspot/jtreg/compiler/floatingpoint/DmodTest.java
Normal file
131
test/hotspot/jtreg/compiler/floatingpoint/DmodTest.java
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Intel Corporation. All rights reserved.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8308966
|
||||
* @summary Add intrinsic for float/double modulo for x86 AVX2 and AVX512
|
||||
* @run main compiler.floatingpoint.DmodTest
|
||||
*/
|
||||
|
||||
package compiler.floatingpoint;
|
||||
|
||||
import java.lang.Double;
|
||||
|
||||
public class DmodTest {
|
||||
static double [] op1 = { 1.2345d, 0.0d, -0.0d, 1.0d/0.0d, -1.0d/0.0d, 0.0d/0.0d };
|
||||
static double [] op2 = { 1.2345d, 0.0d, -0.0d, 1.0d/0.0d, -1.0d/0.0d, 0.0d/0.0d };
|
||||
static double [][] res = {
|
||||
{
|
||||
0.0d,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
1.2345d,
|
||||
1.2345d,
|
||||
Double.NaN,
|
||||
},
|
||||
{
|
||||
0.0d,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
0.0d,
|
||||
0.0d,
|
||||
Double.NaN,
|
||||
},
|
||||
{
|
||||
-0.0d,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
-0.0d,
|
||||
-0.0d,
|
||||
Double.NaN,
|
||||
},
|
||||
{
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
},
|
||||
{
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
},
|
||||
{
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
Double.NaN,
|
||||
},
|
||||
};
|
||||
public static void main(String[] args) throws Exception {
|
||||
double f1, f2, f3;
|
||||
boolean failure = false;
|
||||
boolean print_failure = false;
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
for (int j = 0; j < op1.length; j++) {
|
||||
for (int k = 0; k < op2.length; k++) {
|
||||
f1 = op1[j];
|
||||
f2 = op2[k];
|
||||
f3 = f1 % f2;
|
||||
|
||||
if (Double.isNaN(res[j][k])) {
|
||||
if (!Double.isNaN(f3)) {
|
||||
failure = true;
|
||||
print_failure = true;
|
||||
}
|
||||
} else if (Double.isNaN(f3)) {
|
||||
failure = true;
|
||||
print_failure = true;
|
||||
} else if (f3 != res[j][k]) {
|
||||
failure = true;
|
||||
print_failure = true;
|
||||
}
|
||||
|
||||
if (print_failure) {
|
||||
System.out.println( "Actual " + f1 + " % " + f2 + " = " + f3);
|
||||
System.out.println( "Expected " + f1 + " % " + f2 + " = " + res[j][k]);
|
||||
print_failure = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (failure) {
|
||||
throw new RuntimeException("Test Failed");
|
||||
} else {
|
||||
System.out.println("Test passed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
130
test/hotspot/jtreg/compiler/floatingpoint/FmodTest.java
Normal file
130
test/hotspot/jtreg/compiler/floatingpoint/FmodTest.java
Normal file
@ -0,0 +1,130 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Intel Corporation. All rights reserved.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8308966
|
||||
* @summary Add intrinsic for float/double modulo for x86 AVX2 and AVX512
|
||||
* @run main compiler.floatingpoint.FmodTest
|
||||
*/
|
||||
|
||||
package compiler.floatingpoint;
|
||||
|
||||
import java.lang.Float;
|
||||
|
||||
public class FmodTest {
|
||||
static float [] op1 = { 1.2345f, 0.0f, -0.0f, 1.0f/0.0f, -1.0f/0.0f, 0.0f/0.0f };
|
||||
static float [] op2 = { 1.2345f, 0.0f, -0.0f, 1.0f/0.0f, -1.0f/0.0f, 0.0f/0.0f };
|
||||
static float [][] res = {
|
||||
{
|
||||
0.0f,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
1.2345f,
|
||||
1.2345f,
|
||||
Float.NaN,
|
||||
},
|
||||
{
|
||||
0.0f,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
0.0f,
|
||||
0.0f,
|
||||
Float.NaN,
|
||||
},
|
||||
{
|
||||
-0.0f,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
-0.0f,
|
||||
-0.0f,
|
||||
Float.NaN,
|
||||
},
|
||||
{
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
},
|
||||
{
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
},
|
||||
{
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
Float.NaN,
|
||||
},
|
||||
};
|
||||
public static void main(String[] args) throws Exception {
|
||||
float f1, f2, f3;
|
||||
boolean failure = false;
|
||||
boolean print_failure = false;
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
for (int j = 0; j < op1.length; j++) {
|
||||
for (int k = 0; k < op2.length; k++) {
|
||||
f1 = op1[j];
|
||||
f2 = op2[k];
|
||||
f3 = f1 % f2;
|
||||
|
||||
if (Float.isNaN(res[j][k])) {
|
||||
if (!Float.isNaN(f3)) {
|
||||
failure = true;
|
||||
print_failure = true;
|
||||
}
|
||||
} else if (Float.isNaN(f3)) {
|
||||
failure = true;
|
||||
print_failure = true;
|
||||
} else if (f3 != res[j][k]) {
|
||||
failure = true;
|
||||
print_failure = true;
|
||||
}
|
||||
|
||||
if (print_failure) {
|
||||
System.out.println( "Actual " + f1 + " % " + f2 + " = " + f3);
|
||||
System.out.println( "Expected " + f1 + " % " + f2 + " = " + res[j][k]);
|
||||
print_failure = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (failure) {
|
||||
throw new RuntimeException("Test Failed");
|
||||
} else {
|
||||
System.out.println("Test passed.");
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user