8152907: Update for x86 tan and log10 in the math lib
Optimize Math.tan() and log10() for 64 and 32 bit X86 architecture using Intel LIBM implementation. Reviewed-by: kvn, twisti
This commit is contained in:
parent
429b1c80a1
commit
ad79a5ae65
@ -1827,6 +1827,15 @@ void Assembler::cvttss2sil(Register dst, XMMRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xE6);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::decl(Address dst) {
|
||||
// Don't use it directly. Use MacroAssembler::decrement() instead.
|
||||
InstructionMark im(this);
|
||||
@ -4993,7 +5002,7 @@ void Assembler::paddq(XMMRegister dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
|
||||
assert(VM_Version::supports_sse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x01);
|
||||
@ -5001,7 +5010,7 @@ void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
|
||||
assert(VM_Version::supports_sse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x02);
|
||||
|
@ -1048,6 +1048,8 @@ private:
|
||||
void cvttss2sil(Register dst, XMMRegister src);
|
||||
void cvttss2siq(Register dst, XMMRegister src);
|
||||
|
||||
void cvttpd2dq(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Divide Scalar Double-Precision Floating-Point Values
|
||||
void divsd(XMMRegister dst, Address src);
|
||||
void divsd(XMMRegister dst, XMMRegister src);
|
||||
|
@ -2365,13 +2365,8 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
|
||||
} else if (value->is_double_fpu()) {
|
||||
assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
|
||||
switch(code) {
|
||||
case lir_log10 : __ flog10() ; break;
|
||||
case lir_abs : __ fabs() ; break;
|
||||
case lir_sqrt : __ fsqrt(); break;
|
||||
case lir_tan :
|
||||
// Should consider not saving rbx, if not necessary
|
||||
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
|
||||
break;
|
||||
default : ShouldNotReachHere();
|
||||
}
|
||||
} else {
|
||||
|
@ -812,7 +812,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||
|
||||
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
|
||||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
|
||||
x->id() == vmIntrinsics::_dsin) {
|
||||
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
|
||||
x->id() == vmIntrinsics::_dlog10) {
|
||||
do_LibmIntrinsic(x);
|
||||
return;
|
||||
}
|
||||
@ -820,58 +821,17 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||
LIRItem value(x->argument_at(0), this);
|
||||
|
||||
bool use_fpu = false;
|
||||
if (UseSSE >= 2) {
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dtan:
|
||||
case vmIntrinsics::_dlog10:
|
||||
use_fpu = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (UseSSE < 2) {
|
||||
value.set_destroys_register();
|
||||
}
|
||||
|
||||
value.load_item();
|
||||
|
||||
LIR_Opr calc_input = value.result();
|
||||
LIR_Opr calc_input2 = NULL;
|
||||
if (x->id() == vmIntrinsics::_dpow) {
|
||||
LIRItem extra_arg(x->argument_at(1), this);
|
||||
if (UseSSE < 2) {
|
||||
extra_arg.set_destroys_register();
|
||||
}
|
||||
extra_arg.load_item();
|
||||
calc_input2 = extra_arg.result();
|
||||
}
|
||||
LIR_Opr calc_result = rlock_result(x);
|
||||
|
||||
// sin, cos, pow and exp need two free fpu stack slots, so register
|
||||
// two temporary operands
|
||||
LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
|
||||
LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
|
||||
|
||||
if (use_fpu) {
|
||||
LIR_Opr tmp = FrameMap::fpu0_double_opr;
|
||||
int tmp_start = 1;
|
||||
if (calc_input2 != NULL) {
|
||||
__ move(calc_input2, tmp);
|
||||
tmp_start = 2;
|
||||
calc_input2 = tmp;
|
||||
}
|
||||
__ move(calc_input, tmp);
|
||||
|
||||
calc_input = tmp;
|
||||
calc_result = tmp;
|
||||
|
||||
tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
|
||||
tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
|
||||
}
|
||||
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
|
||||
case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
|
||||
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
|
||||
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
@ -912,21 +872,28 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
result_reg = tmp;
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dexp:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog10:
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
|
||||
@ -946,18 +913,44 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dtan:
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
#else
|
||||
switch (x->id()) {
|
||||
case vmIntrinsics::_dexp:
|
||||
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog:
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog10:
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dsin:
|
||||
if (StubRoutines::dsin() != NULL) {
|
||||
@ -973,6 +966,13 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dtan:
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
#endif // _LP64
|
||||
|
@ -786,58 +786,6 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
|
||||
break;
|
||||
}
|
||||
|
||||
case lir_log10: {
|
||||
// log and log10 need one temporary fpu stack slot, so
|
||||
// there is one temporary registers stored in temp of the
|
||||
// operation. the stack allocator must guarantee that the stack
|
||||
// slots are really free, otherwise there might be a stack
|
||||
// overflow.
|
||||
assert(right->is_illegal(), "must be");
|
||||
assert(left->is_fpu_register(), "must be");
|
||||
assert(res->is_fpu_register(), "must be");
|
||||
assert(op2->tmp1_opr()->is_fpu_register(), "must be");
|
||||
|
||||
insert_free_if_dead(op2->tmp1_opr());
|
||||
insert_free_if_dead(res, left);
|
||||
insert_exchange(left);
|
||||
do_rename(left, res);
|
||||
|
||||
new_left = to_fpu_stack_top(res);
|
||||
new_res = new_left;
|
||||
|
||||
op2->set_fpu_stack_size(sim()->stack_size());
|
||||
assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case lir_tan: {
|
||||
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
|
||||
// registers (stored in right and temp of the operation).
|
||||
// the stack allocator must guarantee that the stack slots are really free,
|
||||
// otherwise there might be a stack overflow.
|
||||
assert(left->is_fpu_register(), "must be");
|
||||
assert(res->is_fpu_register(), "must be");
|
||||
// assert(left->is_last_use(), "old value gets destroyed");
|
||||
assert(right->is_fpu_register(), "right is used as the first temporary register");
|
||||
assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
|
||||
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
|
||||
|
||||
insert_free_if_dead(right);
|
||||
insert_free_if_dead(op2->tmp1_opr());
|
||||
|
||||
insert_free_if_dead(res, left);
|
||||
insert_exchange(left);
|
||||
do_rename(left, res);
|
||||
|
||||
new_left = to_fpu_stack_top(res);
|
||||
new_res = new_left;
|
||||
|
||||
op2->set_fpu_stack_size(sim()->stack_size());
|
||||
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
assert(false, "missed a fpu-operation");
|
||||
}
|
||||
|
@ -194,9 +194,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
product(bool, UseBMI2Instructions, false, \
|
||||
"Use BMI2 instructions") \
|
||||
\
|
||||
diagnostic(bool, UseLibmSinIntrinsic, true, \
|
||||
"Use Libm Sin Intrinsic") \
|
||||
\
|
||||
diagnostic(bool, UseLibmCosIntrinsic, true, \
|
||||
"Use Libm Cos Intrinsic")
|
||||
diagnostic(bool, UseLibmIntrinsic, true, \
|
||||
"Use Libm Intrinsics")
|
||||
#endif // CPU_X86_VM_GLOBALS_X86_HPP
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -928,6 +928,10 @@ class MacroAssembler: public Assembler {
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
|
||||
|
||||
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register r11);
|
||||
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
|
||||
@ -941,11 +945,19 @@ class MacroAssembler: public Assembler {
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1,
|
||||
Register tmp2, Register tmp3, Register tmp4);
|
||||
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1,
|
||||
Register tmp2, Register tmp3, Register tmp4);
|
||||
#else
|
||||
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1);
|
||||
|
||||
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp);
|
||||
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx, Register tmp);
|
||||
@ -964,6 +976,14 @@ class MacroAssembler: public Assembler {
|
||||
|
||||
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
|
||||
Register esi, Register edi, Register ebp, Register esp);
|
||||
|
||||
void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
|
||||
Register edx, Register ebx, Register esi, Register edi,
|
||||
Register ebp, Register esp);
|
||||
|
||||
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp);
|
||||
#endif
|
||||
|
||||
void increase_precision();
|
||||
|
889
hotspot/src/cpu/x86/vm/macroAssembler_x86_cos.cpp
Normal file
889
hotspot/src/cpu/x86/vm/macroAssembler_x86_cos.cpp
Normal file
@ -0,0 +1,889 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - COS()
|
||||
// ---------------------
|
||||
//
|
||||
// 1. RANGE REDUCTION
|
||||
//
|
||||
// We perform an initial range reduction from X to r with
|
||||
//
|
||||
// X =~= N * pi/32 + r
|
||||
//
|
||||
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
|
||||
// where |N| <= 932560. Beyond this, the range reduction is
|
||||
// insufficiently accurate. For extremely small inputs,
|
||||
// denormalization can occur internally, impacting performance.
|
||||
// This means that the main path is actually only taken for
|
||||
// 2^-252 <= |X| < 90112.
|
||||
//
|
||||
// To avoid branches, we perform the range reduction to full
|
||||
// accuracy each time.
|
||||
//
|
||||
// X - N * (P_1 + P_2 + P_3)
|
||||
//
|
||||
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
|
||||
// is exact) and P_3 is a 53-bit number. Together, these
|
||||
// approximate pi well enough for all cases in the restricted
|
||||
// range.
|
||||
//
|
||||
// The main reduction sequence is:
|
||||
//
|
||||
// y = 32/pi * x
|
||||
// N = integer(y)
|
||||
// (computed by adding and subtracting off SHIFTER)
|
||||
//
|
||||
// m_1 = N * P_1
|
||||
// m_2 = N * P_2
|
||||
// r_1 = x - m_1
|
||||
// r = r_1 - m_2
|
||||
// (this r can be used for most of the calculation)
|
||||
//
|
||||
// c_1 = r_1 - r
|
||||
// m_3 = N * P_3
|
||||
// c_2 = c_1 - m_2
|
||||
// c = c_2 - m_3
|
||||
//
|
||||
// 2. MAIN ALGORITHM
|
||||
//
|
||||
// The algorithm uses a table lookup based on B = M * pi / 32
|
||||
// where M = N mod 64. The stored values are:
|
||||
// sigma closest power of 2 to cos(B)
|
||||
// C_hl 53-bit cos(B) - sigma
|
||||
// S_hi + S_lo 2 * 53-bit sin(B)
|
||||
//
|
||||
// The computation is organized as follows:
|
||||
//
|
||||
// sin(B + r + c) = [sin(B) + sigma * r] +
|
||||
// r * (cos(B) - sigma) +
|
||||
// sin(B) * [cos(r + c) - 1] +
|
||||
// cos(B) * [sin(r + c) - r]
|
||||
//
|
||||
// which is approximately:
|
||||
//
|
||||
// [S_hi + sigma * r] +
|
||||
// C_hl * r +
|
||||
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
|
||||
// (C_hl + sigma) * [(sin(r) - r) + c]
|
||||
//
|
||||
// and this is what is actually computed. We separate this sum
|
||||
// into four parts:
|
||||
//
|
||||
// hi + med + pols + corr
|
||||
//
|
||||
// where
|
||||
//
|
||||
// hi = S_hi + sigma r
|
||||
// med = C_hl * r
|
||||
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
|
||||
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
|
||||
//
|
||||
// 3. POLYNOMIAL
|
||||
//
|
||||
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
|
||||
// (sin(r) - r) can be rearranged freely, since it is quite
|
||||
// small, so we exploit parallelism to the fullest.
|
||||
//
|
||||
// psc4 = SC_4 * r_1
|
||||
// msc4 = psc4 * r
|
||||
// r2 = r * r
|
||||
// msc2 = SC_2 * r2
|
||||
// r4 = r2 * r2
|
||||
// psc3 = SC_3 + msc4
|
||||
// psc1 = SC_1 + msc2
|
||||
// msc3 = r4 * psc3
|
||||
// sincospols = psc1 + msc3
|
||||
// pols = sincospols *
|
||||
// <S_hi * r^2 | (C_hl + sigma) * r^3>
|
||||
//
|
||||
// 4. CORRECTION TERM
|
||||
//
|
||||
// This is where the "c" component of the range reduction is
|
||||
// taken into account; recall that just "r" is used for most of
|
||||
// the calculation.
|
||||
//
|
||||
// -c = m_3 - c_2
|
||||
// -d = S_hi * r - (C_hl + sigma)
|
||||
// corr = -c * -d + S_lo
|
||||
//
|
||||
// 5. COMPENSATED SUMMATIONS
|
||||
//
|
||||
// The two successive compensated summations add up the high
|
||||
// and medium parts, leaving just the low parts to add up at
|
||||
// the end.
|
||||
//
|
||||
// rs = sigma * r
|
||||
// res_int = S_hi + rs
|
||||
// k_0 = S_hi - res_int
|
||||
// k_2 = k_0 + rs
|
||||
// med = C_hl * r
|
||||
// res_hi = res_int + med
|
||||
// k_1 = res_int - res_hi
|
||||
// k_3 = k_1 + med
|
||||
//
|
||||
// 6. FINAL SUMMATION
|
||||
//
|
||||
// We now add up all the small parts:
|
||||
//
|
||||
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
|
||||
//
|
||||
// Now the overall result is just:
|
||||
//
|
||||
// res_hi + res_lo
|
||||
//
|
||||
// 7. SMALL ARGUMENTS
|
||||
//
|
||||
// Inputs with |X| < 2^-252 are treated specially as
|
||||
// 1 - |x|.
|
||||
//
|
||||
// Special cases:
|
||||
// cos(NaN) = quiet NaN, and raise invalid exception
|
||||
// cos(INF) = NaN and raise invalid exception
|
||||
// cos(0) = 1
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(8) juint _ONE[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) {
|
||||
|
||||
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
|
||||
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
|
||||
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
|
||||
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start;
|
||||
|
||||
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
|
||||
|
||||
address ONEHALF = StubRoutines::x86::_ONEHALF_addr();
|
||||
address P_2 = StubRoutines::x86::_P_2_addr();
|
||||
address SC_4 = StubRoutines::x86::_SC_4_addr();
|
||||
address Ctable = StubRoutines::x86::_Ctable_addr();
|
||||
address SC_2 = StubRoutines::x86::_SC_2_addr();
|
||||
address SC_3 = StubRoutines::x86::_SC_3_addr();
|
||||
address SC_1 = StubRoutines::x86::_SC_1_addr();
|
||||
address PI_INV_TABLE = StubRoutines::x86::_PI_INV_TABLE_addr();
|
||||
address PI_4 = (address)StubRoutines::x86::_PI_4_addr();
|
||||
address PI32INV = (address)StubRoutines::x86::_PI32INV_addr();
|
||||
address SIGN_MASK = (address)StubRoutines::x86::_SIGN_MASK_addr();
|
||||
address P_1 = (address)StubRoutines::x86::_P_1_addr();
|
||||
address P_3 = (address)StubRoutines::x86::_P_3_addr();
|
||||
address ONE = (address)_ONE;
|
||||
address NEG_ZERO = (address)StubRoutines::x86::_NEG_ZERO_addr();
|
||||
|
||||
bind(start);
|
||||
push(rbx);
|
||||
subq(rsp, 16);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
|
||||
andl(eax, 2147418112);
|
||||
subl(eax, 808452096);
|
||||
cmpl(eax, 281346048);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
|
||||
mulsd(xmm1, xmm0);
|
||||
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2sil(edx, xmm1);
|
||||
cvtsi2sdl(xmm1, edx);
|
||||
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
|
||||
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
addq(rdx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
andq(rdx, 63);
|
||||
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
|
||||
lea(rax, ExternalAddress(Ctable));
|
||||
shlq(rdx, 5);
|
||||
addq(rax, rdx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
|
||||
subsd(xmm4, xmm3);
|
||||
movq(xmm7, Address(rax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(rax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movq(xmm3, Address(rax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
|
||||
mulsd(xmm4, Address(rax, 0));
|
||||
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulpd(xmm5, xmm0);
|
||||
movdqu(xmm0, xmm3);
|
||||
addsd(xmm3, Address(rax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movdqu(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movq(xmm5, Address(rax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(rax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm0, xmm5);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm3);
|
||||
addsd(xmm0, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
addsd(xmm0, xmm6);
|
||||
addsd(xmm0, xmm4);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_1);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
|
||||
subsd(xmm1, xmm0);
|
||||
movdqu(xmm0, xmm1);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_1);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32752);
|
||||
cmpl(eax, 32752);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_2_0_1);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
subl(ecx, 16224);
|
||||
shrl(ecx, 7);
|
||||
andl(ecx, 65532);
|
||||
lea(r11, ExternalAddress(PI_INV_TABLE));
|
||||
addq(rcx, r11);
|
||||
movdq(rax, xmm0);
|
||||
movl(r10, Address(rcx, 20));
|
||||
movl(r8, Address(rcx, 24));
|
||||
movl(edx, eax);
|
||||
shrq(rax, 21);
|
||||
orl(eax, INT_MIN);
|
||||
shrl(eax, 11);
|
||||
movl(r9, r10);
|
||||
imulq(r10, rdx);
|
||||
imulq(r9, rax);
|
||||
imulq(r8, rax);
|
||||
movl(rsi, Address(rcx, 16));
|
||||
movl(rdi, Address(rcx, 12));
|
||||
movl(r11, r10);
|
||||
shrq(r10, 32);
|
||||
addq(r9, r10);
|
||||
addq(r11, r8);
|
||||
movl(r8, r11);
|
||||
shrq(r11, 32);
|
||||
addq(r9, r11);
|
||||
movl(r10, rsi);
|
||||
imulq(rsi, rdx);
|
||||
imulq(r10, rax);
|
||||
movl(r11, rdi);
|
||||
imulq(rdi, rdx);
|
||||
movl(rbx, rsi);
|
||||
shrq(rsi, 32);
|
||||
addq(r9, rbx);
|
||||
movl(rbx, r9);
|
||||
shrq(r9, 32);
|
||||
addq(r10, rsi);
|
||||
addq(r10, r9);
|
||||
shlq(rbx, 32);
|
||||
orq(r8, rbx);
|
||||
imulq(r11, rax);
|
||||
movl(r9, Address(rcx, 8));
|
||||
movl(rsi, Address(rcx, 4));
|
||||
movl(rbx, rdi);
|
||||
shrq(rdi, 32);
|
||||
addq(r10, rbx);
|
||||
movl(rbx, r10);
|
||||
shrq(r10, 32);
|
||||
addq(r11, rdi);
|
||||
addq(r11, r10);
|
||||
movq(rdi, r9);
|
||||
imulq(r9, rdx);
|
||||
imulq(rdi, rax);
|
||||
movl(r10, r9);
|
||||
shrq(r9, 32);
|
||||
addq(r11, r10);
|
||||
movl(r10, r11);
|
||||
shrq(r11, 32);
|
||||
addq(rdi, r9);
|
||||
addq(rdi, r11);
|
||||
movq(r9, rsi);
|
||||
imulq(rsi, rdx);
|
||||
imulq(r9, rax);
|
||||
shlq(r10, 32);
|
||||
orq(r10, rbx);
|
||||
movl(eax, Address(rcx, 0));
|
||||
movl(r11, rsi);
|
||||
shrq(rsi, 32);
|
||||
addq(rdi, r11);
|
||||
movl(r11, rdi);
|
||||
shrq(rdi, 32);
|
||||
addq(r9, rsi);
|
||||
addq(r9, rdi);
|
||||
imulq(rdx, rax);
|
||||
pextrw(rbx, xmm0, 3);
|
||||
lea(rdi, ExternalAddress(PI_INV_TABLE));
|
||||
subq(rcx, rdi);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, 19);
|
||||
movl(rsi, 32768);
|
||||
andl(rsi, rbx);
|
||||
shrl(rbx, 4);
|
||||
andl(rbx, 2047);
|
||||
subl(rbx, 1023);
|
||||
subl(ecx, rbx);
|
||||
addq(r9, rdx);
|
||||
movl(edx, ecx);
|
||||
addl(edx, 32);
|
||||
cmpl(ecx, 1);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_3_0_1);
|
||||
negl(ecx);
|
||||
addl(ecx, 29);
|
||||
shll(r9);
|
||||
movl(rdi, r9);
|
||||
andl(r9, 536870911);
|
||||
testl(r9, 268435456);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 0);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_1);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_1);
|
||||
bsrq(r11, r9);
|
||||
movl(ecx, 29);
|
||||
subl(ecx, r11);
|
||||
jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1);
|
||||
shlq(r9);
|
||||
movq(rax, r10);
|
||||
shlq(r10);
|
||||
addl(edx, ecx);
|
||||
negl(ecx);
|
||||
addl(ecx, 64);
|
||||
shrq(rax);
|
||||
shrq(r8);
|
||||
orq(r9, rax);
|
||||
orq(r10, r8);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_1);
|
||||
cvtsi2sdq(xmm0, r9);
|
||||
shrq(r10, 1);
|
||||
cvtsi2sdq(xmm3, r10);
|
||||
xorpd(xmm4, xmm4);
|
||||
shll(edx, 4);
|
||||
negl(edx);
|
||||
addl(edx, 16368);
|
||||
orl(edx, rsi);
|
||||
xorl(edx, rbx);
|
||||
pinsrw(xmm4, edx, 3);
|
||||
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
|
||||
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
|
||||
xorpd(xmm5, xmm5);
|
||||
subl(edx, 1008);
|
||||
pinsrw(xmm5, edx, 3);
|
||||
mulsd(xmm0, xmm4);
|
||||
shll(rsi, 16);
|
||||
sarl(rsi, 31);
|
||||
mulsd(xmm3, xmm5);
|
||||
movdqu(xmm1, xmm0);
|
||||
mulsd(xmm0, xmm2);
|
||||
shrl(rdi, 29);
|
||||
addsd(xmm1, xmm3);
|
||||
mulsd(xmm3, xmm2);
|
||||
addl(rdi, rsi);
|
||||
xorl(rdi, rsi);
|
||||
mulsd(xmm6, xmm1);
|
||||
movl(eax, rdi);
|
||||
addsd(xmm6, xmm3);
|
||||
movdqu(xmm2, xmm0);
|
||||
addsd(xmm0, xmm6);
|
||||
subsd(xmm2, xmm0);
|
||||
addsd(xmm6, xmm2);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_1);
|
||||
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2siq(rdx, xmm1);
|
||||
cvtsi2sdq(xmm1, rdx);
|
||||
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
|
||||
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
shll(eax, 3);
|
||||
addl(edx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
addl(edx, eax);
|
||||
andl(edx, 63);
|
||||
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
|
||||
lea(rax, ExternalAddress(Ctable));
|
||||
shll(edx, 5);
|
||||
addq(rax, rdx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
|
||||
subsd(xmm4, xmm3);
|
||||
movq(xmm7, Address(rax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(rax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movq(xmm3, Address(rax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
subsd(xmm1, xmm6);
|
||||
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
|
||||
mulsd(xmm4, Address(rax, 0));
|
||||
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulpd(xmm5, xmm0);
|
||||
movdqu(xmm0, xmm3);
|
||||
addsd(xmm3, Address(rax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movdqu(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movq(xmm5, Address(rax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(rax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm5, xmm0);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm1, xmm3);
|
||||
addsd(xmm1, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
movdqu(xmm0, xmm4);
|
||||
addsd(xmm1, xmm6);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_1);
|
||||
addl(edx, 64);
|
||||
movq(r9, r10);
|
||||
movq(r10, r8);
|
||||
movl(r8, 0);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
|
||||
addl(edx, 64);
|
||||
movq(r9, r10);
|
||||
movq(r10, r8);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
|
||||
xorpd(xmm0, xmm0);
|
||||
xorpd(xmm6, xmm6);
|
||||
jmp(L_2TAG_PACKET_11_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_1);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_10_0_1);
|
||||
negl(ecx);
|
||||
shrq(r10);
|
||||
movq(rax, r9);
|
||||
shrq(r9);
|
||||
subl(edx, ecx);
|
||||
negl(ecx);
|
||||
addl(ecx, 64);
|
||||
shlq(rax);
|
||||
orq(r10, rax);
|
||||
jmp(L_2TAG_PACKET_10_0_1);
|
||||
bind(L_2TAG_PACKET_3_0_1);
|
||||
negl(ecx);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
shlq(r9);
|
||||
movq(rdi, r9);
|
||||
testl(r9, INT_MIN);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 0);
|
||||
shrq(rdi, 3);
|
||||
jmp(L_2TAG_PACKET_6_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 536870912);
|
||||
shrl(rbx);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
shlq(rbx, 32);
|
||||
addl(rdi, 536870912);
|
||||
movl(rcx, 0);
|
||||
movl(r11, 0);
|
||||
subq(rcx, r8);
|
||||
sbbq(r11, r10);
|
||||
sbbq(rbx, r9);
|
||||
movq(r8, rcx);
|
||||
movq(r10, r11);
|
||||
movq(r9, rbx);
|
||||
movl(rbx, 32768);
|
||||
jmp(L_2TAG_PACKET_5_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_12_0_1);
|
||||
shrl(r9);
|
||||
mov64(rbx, 0x100000000);
|
||||
shrq(rbx);
|
||||
movl(rcx, 0);
|
||||
movl(r11, 0);
|
||||
subq(rcx, r8);
|
||||
sbbq(r11, r10);
|
||||
sbbq(rbx, r9);
|
||||
movq(r8, rcx);
|
||||
movq(r10, r11);
|
||||
movq(r9, rbx);
|
||||
movl(rbx, 32768);
|
||||
shrq(rdi, 3);
|
||||
addl(rdi, 536870912);
|
||||
jmp(L_2TAG_PACKET_6_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_1);
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
|
||||
movq(Address(rsp, 0), xmm0);
|
||||
|
||||
bind(L_2TAG_PACKET_13_0_1);
|
||||
|
||||
bind(B1_4);
|
||||
addq(rsp, 16);
|
||||
pop(rbx);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
|
||||
ALIGNED_(16) juint _static_const_table_cos[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
|
||||
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
|
||||
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
|
||||
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
|
||||
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
|
||||
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
|
||||
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
|
||||
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
|
||||
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
|
||||
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
|
||||
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
|
||||
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
|
||||
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
|
||||
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
|
||||
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
|
||||
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
|
||||
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
|
||||
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
|
||||
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
|
||||
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
|
||||
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
|
||||
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
|
||||
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
|
||||
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
|
||||
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
|
||||
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
|
||||
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
|
||||
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
|
||||
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
|
||||
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
|
||||
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
|
||||
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
|
||||
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
|
||||
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
|
||||
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
|
||||
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
|
||||
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
|
||||
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
|
||||
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
|
||||
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
|
||||
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
|
||||
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
|
||||
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
|
||||
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
|
||||
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
|
||||
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
|
||||
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
|
||||
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
|
||||
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
|
||||
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
|
||||
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
|
||||
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
|
||||
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
|
||||
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
|
||||
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
|
||||
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
|
||||
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
|
||||
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
|
||||
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
|
||||
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
|
||||
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
|
||||
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
|
||||
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
|
||||
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
|
||||
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
|
||||
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
|
||||
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
|
||||
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
|
||||
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
|
||||
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
|
||||
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
|
||||
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
|
||||
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
|
||||
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
|
||||
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
|
||||
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
|
||||
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
|
||||
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
|
||||
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
|
||||
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
|
||||
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
|
||||
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
|
||||
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
|
||||
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
|
||||
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
|
||||
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
|
||||
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
|
||||
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
};
|
||||
//registers,
|
||||
// input: (rbp + 8)
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
|
||||
address static_const_table_cos = (address)_static_const_table_cos;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 120);
|
||||
movl(Address(rsp, 56), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table_cos));
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
subl(eax, 12336);
|
||||
cmpl(eax, 4293);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm1, Address(tmp, 2160));
|
||||
mulsd(xmm1, xmm0);
|
||||
movdqu(xmm5, Address(tmp, 2240));
|
||||
movsd(xmm4, Address(tmp, 2224));
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
movsd(xmm3, Address(tmp, 2128));
|
||||
movdqu(xmm2, Address(tmp, 2112));
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2sil(edx, xmm1);
|
||||
cvtsi2sdl(xmm1, edx);
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
addl(edx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
andl(edx, 63);
|
||||
movdqu(xmm5, Address(tmp, 2096));
|
||||
lea(eax, Address(tmp, 0));
|
||||
shll(edx, 5);
|
||||
addl(eax, edx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, Address(tmp, 2144));
|
||||
subsd(xmm4, xmm3);
|
||||
movsd(xmm7, Address(eax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movapd(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
movdqu(xmm6, Address(tmp, 2064));
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(eax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movsd(xmm3, Address(eax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, Address(tmp, 2080));
|
||||
mulsd(xmm4, Address(eax, 0));
|
||||
addpd(xmm6, Address(tmp, 2048));
|
||||
mulpd(xmm5, xmm0);
|
||||
movapd(xmm0, xmm3);
|
||||
addsd(xmm3, Address(eax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movapd(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movsd(xmm5, Address(eax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(eax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm5, xmm0);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm1, xmm3);
|
||||
addsd(xmm1, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
addsd(xmm1, xmm6);
|
||||
addsd(xmm4, xmm1);
|
||||
movsd(Address(rsp, 0), xmm4);
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
movsd(xmm1, Address(tmp, 2192));
|
||||
subsd(xmm1, xmm0);
|
||||
movsd(Address(rsp, 0), xmm1);
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
andl(eax, 2146435072);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
|
||||
subl(rsp, 32);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
lea(eax, Address(rsp, 40));
|
||||
movl(Address(rsp, 8), eax);
|
||||
movl(eax, 1);
|
||||
movl(Address(rsp, 12), eax);
|
||||
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
|
||||
addl(rsp, 32);
|
||||
fld_d(Address(rsp, 8));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
fld_d(Address(rsp, 128));
|
||||
fmul_d(Address(tmp, 2208));
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
movl(tmp, Address(rsp, 56));
|
||||
}
|
||||
#endif
|
674
hotspot/src/cpu/x86/vm/macroAssembler_x86_exp.cpp
Normal file
674
hotspot/src/cpu/x86/vm/macroAssembler_x86_exp.cpp
Normal file
@ -0,0 +1,674 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - EXP()
|
||||
// ---------------------
|
||||
//
|
||||
// Description:
|
||||
// Let K = 64 (table size).
|
||||
// x x/log(2) n
|
||||
// e = 2 = 2 * T[j] * (1 + P(y))
|
||||
// where
|
||||
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
|
||||
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
|
||||
// j/K
|
||||
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
|
||||
//
|
||||
// P(y) is a minimax polynomial approximation of exp(x)-1
|
||||
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
|
||||
//
|
||||
// To avoid problems with arithmetic overflow and underflow,
|
||||
// n n1 n2
|
||||
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
|
||||
// where BIAS is a value of exponent bias.
|
||||
//
|
||||
// Special cases:
|
||||
// exp(NaN) = NaN
|
||||
// exp(+INF) = +INF
|
||||
// exp(-INF) = 0
|
||||
// exp(x) = 1 for subnormals
|
||||
// for finite argument, only exp(0)=1 is exact
|
||||
// For IEEE double
|
||||
// if x > 709.782712893383973096 then exp(x) overflow
|
||||
// if x < -745.133219101941108420 then exp(x) underflow
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _cv[] =
|
||||
{
|
||||
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
|
||||
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
|
||||
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
|
||||
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
|
||||
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _shifter[] =
|
||||
{
|
||||
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _mmask[] =
|
||||
{
|
||||
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _bias[] =
|
||||
{
|
||||
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _Tbl_addr[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||
0x000fa7c1UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _ALLONES[] =
|
||||
{
|
||||
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _ebias[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _XMAX[] =
|
||||
{
|
||||
0xffffffffUL, 0x7fefffffUL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _XMIN[] =
|
||||
{
|
||||
0x00000000UL, 0x00100000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _INF[] =
|
||||
{
|
||||
0x00000000UL, 0x7ff00000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _ZERO[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _ONE_val[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
|
||||
|
||||
// Registers:
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, tmp - r11
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address cv = (address)_cv;
|
||||
address Shifter = (address)_shifter;
|
||||
address mmask = (address)_mmask;
|
||||
address bias = (address)_bias;
|
||||
address Tbl_addr = (address)_Tbl_addr;
|
||||
address ALLONES = (address)_ALLONES;
|
||||
address ebias = (address)_ebias;
|
||||
address XMAX = (address)_XMAX;
|
||||
address XMIN = (address)_XMIN;
|
||||
address INF = (address)_INF;
|
||||
address ZERO = (address)_ZERO;
|
||||
address ONE_val = (address)_ONE_val;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||
movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
movl(edx, 16527);
|
||||
subl(edx, eax);
|
||||
subl(eax, 15504);
|
||||
orl(edx, eax);
|
||||
cmpl(edx, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
mulpd(xmm1, xmm0);
|
||||
addpd(xmm1, xmm6);
|
||||
movapd(xmm7, xmm1);
|
||||
subpd(xmm1, xmm6);
|
||||
mulpd(xmm2, xmm1);
|
||||
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||
mulpd(xmm3, xmm1);
|
||||
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
subpd(xmm0, xmm2);
|
||||
movdl(eax, xmm7);
|
||||
movl(ecx, eax);
|
||||
andl(ecx, 63);
|
||||
shll(ecx, 4);
|
||||
sarl(eax, 6);
|
||||
movl(edx, eax);
|
||||
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
pand(xmm7, xmm6);
|
||||
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
paddq(xmm7, xmm6);
|
||||
psllq(xmm7, 46);
|
||||
subpd(xmm0, xmm3);
|
||||
lea(tmp, ExternalAddress(Tbl_addr));
|
||||
movdqu(xmm2, Address(ecx, tmp));
|
||||
mulpd(xmm4, xmm0);
|
||||
movapd(xmm6, xmm0);
|
||||
movapd(xmm1, xmm0);
|
||||
mulpd(xmm6, xmm6);
|
||||
mulpd(xmm0, xmm6);
|
||||
addpd(xmm5, xmm4);
|
||||
mulsd(xmm0, xmm6);
|
||||
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||
addsd(xmm1, xmm2);
|
||||
unpckhpd(xmm2, xmm2);
|
||||
mulpd(xmm0, xmm5);
|
||||
addsd(xmm1, xmm0);
|
||||
por(xmm2, xmm7);
|
||||
unpckhpd(xmm0, xmm0);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
addl(edx, 894);
|
||||
cmpl(edx, 1916);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||
mulsd(xmm0, xmm2);
|
||||
addsd(xmm0, xmm2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
xorpd(xmm3, xmm3);
|
||||
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
||||
movl(edx, -1022);
|
||||
subl(edx, eax);
|
||||
movdl(xmm5, edx);
|
||||
psllq(xmm4, xmm5);
|
||||
movl(ecx, eax);
|
||||
sarl(eax, 1);
|
||||
pinsrw(xmm3, eax, 3);
|
||||
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
||||
psllq(xmm3, 4);
|
||||
psubd(xmm2, xmm3);
|
||||
mulsd(xmm0, xmm2);
|
||||
cmpl(edx, 52);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
||||
pand(xmm4, xmm2);
|
||||
paddd(xmm3, xmm6);
|
||||
subsd(xmm2, xmm4);
|
||||
addsd(xmm0, xmm2);
|
||||
cmpl(ecx, 1023);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32768);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||
movapd(xmm6, xmm0);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
mulsd(xmm6, xmm3);
|
||||
mulsd(xmm4, xmm3);
|
||||
movdqu(xmm0, xmm6);
|
||||
pxor(xmm6, xmm4);
|
||||
psrad(xmm6, 31);
|
||||
pshufd(xmm6, xmm6, 85);
|
||||
psllq(xmm0, 1);
|
||||
psrlq(xmm0, 1);
|
||||
pxor(xmm0, xmm6);
|
||||
psrlq(xmm6, 63);
|
||||
paddq(xmm0, xmm6);
|
||||
paddq(xmm0, xmm4);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 32752);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
paddd(xmm3, xmm6);
|
||||
addpd(xmm0, xmm2);
|
||||
mulsd(xmm0, xmm3);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
cmpl(eax, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
|
||||
mulsd(xmm0, xmm0);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
movl(Address(rsp, 0), 14);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movl(edx, Address(rsp, 8));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
|
||||
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_12_0_2);
|
||||
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
andl(eax, 2147483647);
|
||||
cmpl(eax, 1083179008);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
movq(Address(rsp, 16), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 16));
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table[] =
|
||||
{
|
||||
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
|
||||
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
|
||||
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
|
||||
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
|
||||
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
|
||||
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
|
||||
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
|
||||
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
|
||||
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
|
||||
0x00100000UL
|
||||
};
|
||||
|
||||
//registers,
|
||||
// input: (rbp + 8)
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address static_const_table = (address)_static_const_table;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 120);
|
||||
movl(Address(rsp, 64), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table));
|
||||
movdqu(xmm0, Address(rsp, 128));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
movl(edx, 16527);
|
||||
subl(edx, eax);
|
||||
subl(eax, 15504);
|
||||
orl(edx, eax);
|
||||
cmpl(edx, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
mulpd(xmm1, xmm0);
|
||||
addpd(xmm1, xmm6);
|
||||
movapd(xmm7, xmm1);
|
||||
subpd(xmm1, xmm6);
|
||||
mulpd(xmm2, xmm1);
|
||||
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||
mulpd(xmm3, xmm1);
|
||||
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
subpd(xmm0, xmm2);
|
||||
movdl(eax, xmm7);
|
||||
movl(ecx, eax);
|
||||
andl(ecx, 63);
|
||||
shll(ecx, 4);
|
||||
sarl(eax, 6);
|
||||
movl(edx, eax);
|
||||
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
pand(xmm7, xmm6);
|
||||
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
paddq(xmm7, xmm6);
|
||||
psllq(xmm7, 46);
|
||||
subpd(xmm0, xmm3);
|
||||
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
|
||||
mulpd(xmm4, xmm0);
|
||||
movapd(xmm6, xmm0);
|
||||
movapd(xmm1, xmm0);
|
||||
mulpd(xmm6, xmm6);
|
||||
mulpd(xmm0, xmm6);
|
||||
addpd(xmm5, xmm4);
|
||||
mulsd(xmm0, xmm6);
|
||||
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||
addsd(xmm1, xmm2);
|
||||
unpckhpd(xmm2, xmm2);
|
||||
mulpd(xmm0, xmm5);
|
||||
addsd(xmm1, xmm0);
|
||||
por(xmm2, xmm7);
|
||||
unpckhpd(xmm0, xmm0);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
addl(edx, 894);
|
||||
cmpl(edx, 1916);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||
mulsd(xmm0, xmm2);
|
||||
addsd(xmm0, xmm2);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
fnstcw(Address(rsp, 24));
|
||||
movzwl(edx, Address(rsp, 24));
|
||||
orl(edx, 768);
|
||||
movw(Address(rsp, 28), edx);
|
||||
fldcw(Address(rsp, 28));
|
||||
movl(edx, eax);
|
||||
sarl(eax, 1);
|
||||
subl(edx, eax);
|
||||
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
|
||||
pandn(xmm6, xmm2);
|
||||
addl(eax, 1023);
|
||||
movdl(xmm3, eax);
|
||||
psllq(xmm3, 52);
|
||||
por(xmm6, xmm3);
|
||||
addl(edx, 1023);
|
||||
movdl(xmm4, edx);
|
||||
psllq(xmm4, 52);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
fld_d(Address(rsp, 8));
|
||||
movsd(Address(rsp, 16), xmm6);
|
||||
fld_d(Address(rsp, 16));
|
||||
fmula(1);
|
||||
faddp(1);
|
||||
movsd(Address(rsp, 8), xmm4);
|
||||
fld_d(Address(rsp, 8));
|
||||
fmulp(1);
|
||||
fstp_d(Address(rsp, 8));
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
fldcw(Address(rsp, 24));
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 32752);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(ecx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
cmpl(ecx, INT_MIN);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(ecx, -1064950997);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
|
||||
movl(edx, Address(rsp, 128));
|
||||
cmpl(edx, -17155601);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
||||
jmp(L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movl(edx, 14);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movl(edx, 15);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
cmpl(eax, INT_MIN);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
|
||||
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(edx, 14);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(xmm0, Address(tmp, 1216));
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(edx, 15);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movl(edx, Address(rsp, 128));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
andl(eax, 2147483647);
|
||||
cmpl(eax, 1083179008);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 48), xmm0);
|
||||
fld_d(Address(rsp, 48));
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
movl(tmp, Address(rsp, 64));
|
||||
}
|
||||
#endif
|
655
hotspot/src/cpu/x86/vm/macroAssembler_x86_log.cpp
Normal file
655
hotspot/src/cpu/x86/vm/macroAssembler_x86_log.cpp
Normal file
@ -0,0 +1,655 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - LOG()
|
||||
// ---------------------
|
||||
//
|
||||
// x=2^k * mx, mx in [1,2)
|
||||
//
|
||||
// Get B~1/mx based on the output of rcpss instruction (B0)
|
||||
// B = int((B0*2^7+0.5))/2^7
|
||||
//
|
||||
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
|
||||
//
|
||||
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
|
||||
// p(r) is a degree 7 polynomial
|
||||
// -log(B) read from data table (high, low parts)
|
||||
// Result is formed from high and low parts
|
||||
//
|
||||
// Special cases:
|
||||
// log(NaN) = quiet NaN, and raise invalid exception
|
||||
// log(+INF) = that INF
|
||||
// log(0) = -INF with divide-by-zero exception raised
|
||||
// log(1) = +0
|
||||
// log(x) = NaN with invalid exception raised if x < -0, including -INF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _L_tbl[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
|
||||
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
|
||||
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
|
||||
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
|
||||
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
|
||||
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
|
||||
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
|
||||
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
|
||||
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
|
||||
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
|
||||
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
|
||||
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
|
||||
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
|
||||
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
|
||||
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
|
||||
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
|
||||
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
|
||||
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
|
||||
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
|
||||
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
|
||||
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
|
||||
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
|
||||
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
|
||||
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
|
||||
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
|
||||
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
|
||||
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
|
||||
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
|
||||
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
|
||||
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
|
||||
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
|
||||
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
|
||||
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
|
||||
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
|
||||
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
|
||||
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
|
||||
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
|
||||
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
|
||||
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
|
||||
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
|
||||
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
|
||||
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
|
||||
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
|
||||
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
|
||||
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
|
||||
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
|
||||
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
|
||||
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
|
||||
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
|
||||
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
|
||||
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
|
||||
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
|
||||
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
|
||||
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
|
||||
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
|
||||
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
|
||||
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
|
||||
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
|
||||
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
|
||||
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
|
||||
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
|
||||
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
|
||||
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
|
||||
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
|
||||
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
|
||||
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
|
||||
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
|
||||
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
|
||||
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
|
||||
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
|
||||
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
|
||||
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
|
||||
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
|
||||
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
|
||||
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
|
||||
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
|
||||
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
|
||||
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
|
||||
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
|
||||
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
|
||||
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
|
||||
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
|
||||
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
|
||||
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
|
||||
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
|
||||
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
|
||||
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
|
||||
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
|
||||
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
|
||||
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
|
||||
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
|
||||
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
|
||||
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
|
||||
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
|
||||
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
|
||||
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
|
||||
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
|
||||
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
|
||||
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
|
||||
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
|
||||
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
|
||||
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
|
||||
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x80000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _log2[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _coeff[] =
|
||||
{
|
||||
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
|
||||
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
|
||||
0x00000000UL, 0xbfe00000UL
|
||||
};
|
||||
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, r8, r11
|
||||
|
||||
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp1, tmp2, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address L_tbl = (address)_L_tbl;
|
||||
address log2 = (address)_log2;
|
||||
address coeff = (address)_coeff;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
mov64(rax, 0x3ff0000000000000);
|
||||
movdq(xmm2, rax);
|
||||
mov64(rdx, 0x77f0000000000000);
|
||||
movdq(xmm3, rdx);
|
||||
movl(ecx, 32768);
|
||||
movdl(xmm4, rcx);
|
||||
mov64(tmp1, 0xffffe00000000000);
|
||||
movdq(xmm5, tmp1);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 27);
|
||||
lea(tmp2, ExternalAddress(L_tbl));
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
paddd(xmm0, xmm4);
|
||||
por(xmm1, xmm3);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
pand(xmm5, xmm1);
|
||||
pand(xmm0, xmm6);
|
||||
subsd(xmm1, xmm5);
|
||||
mulpd(xmm5, xmm0);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL
|
||||
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp2, edx));
|
||||
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulsd(xmm6, xmm7);
|
||||
if (VM_Version::supports_sse3()) {
|
||||
movddup(xmm5, xmm1);
|
||||
}
|
||||
else {
|
||||
movdqu(xmm5, xmm1);
|
||||
movlhps(xmm5, xmm5);
|
||||
}
|
||||
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
mulpd(xmm5, xmm5);
|
||||
if (VM_Version::supports_sse3()) {
|
||||
movddup(xmm6, xmm0);
|
||||
}
|
||||
else {
|
||||
movdqu(xmm6, xmm0);
|
||||
movlhps(xmm6, xmm6);
|
||||
}
|
||||
addsd(xmm0, xmm1);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
subsd(xmm6, xmm0);
|
||||
mulsd(xmm4, xmm1);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
addsd(xmm1, xmm6);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movq(xmm0, Address(rsp, 0));
|
||||
movq(xmm1, Address(rsp, 0));
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
addsd(xmm1, xmm0);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psrlq(xmm0, 27);
|
||||
movl(ecx, 18416);
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 3);
|
||||
jmp(L_2TAG_PACKET_8_0_2);
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movq(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 8));
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table_log[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
|
||||
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
|
||||
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
|
||||
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
|
||||
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
|
||||
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
|
||||
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
|
||||
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
|
||||
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
|
||||
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
|
||||
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
|
||||
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
|
||||
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
|
||||
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
|
||||
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
|
||||
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
|
||||
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
|
||||
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
|
||||
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
|
||||
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
|
||||
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
|
||||
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
|
||||
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
|
||||
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
|
||||
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
|
||||
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
|
||||
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
|
||||
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
|
||||
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
|
||||
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
|
||||
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
|
||||
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
|
||||
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
|
||||
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
|
||||
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
|
||||
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
|
||||
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
|
||||
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
|
||||
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
|
||||
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
|
||||
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
|
||||
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
|
||||
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
|
||||
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
|
||||
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
|
||||
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
|
||||
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
|
||||
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
|
||||
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
|
||||
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
|
||||
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
|
||||
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
|
||||
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
|
||||
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
|
||||
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
|
||||
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
|
||||
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
|
||||
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
|
||||
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
|
||||
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
|
||||
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
|
||||
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
|
||||
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
|
||||
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
|
||||
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
|
||||
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
|
||||
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
|
||||
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
|
||||
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
|
||||
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
|
||||
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
|
||||
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
|
||||
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
|
||||
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
|
||||
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
|
||||
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
|
||||
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
|
||||
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
|
||||
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
|
||||
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
|
||||
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
|
||||
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
|
||||
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
|
||||
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
|
||||
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
|
||||
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
|
||||
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
|
||||
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
|
||||
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
|
||||
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
|
||||
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
|
||||
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
|
||||
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
|
||||
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
|
||||
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
|
||||
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
|
||||
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
|
||||
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
|
||||
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
|
||||
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
|
||||
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
|
||||
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
|
||||
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
|
||||
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
|
||||
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
|
||||
0xffffe000UL
|
||||
};
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
|
||||
Label L_2TAG_PACKET_10_0_2, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address static_const_table = (address)_static_const_table_log;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 104);
|
||||
movl(Address(rsp, 40), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table));
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movapd(xmm1, xmm0);
|
||||
movl(ecx, 32768);
|
||||
movdl(xmm4, ecx);
|
||||
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psllq(xmm0, 5);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
paddd(xmm0, xmm4);
|
||||
por(xmm1, xmm3);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
pand(xmm5, xmm1);
|
||||
pand(xmm0, xmm6);
|
||||
subsd(xmm1, xmm5);
|
||||
mulpd(xmm5, xmm0);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulsd(xmm1, xmm0);
|
||||
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
|
||||
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp, edx));
|
||||
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
mulpd(xmm5, xmm5);
|
||||
pshufd(xmm6, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
subsd(xmm6, xmm0);
|
||||
mulsd(xmm4, xmm1);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
addsd(xmm1, xmm6);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movdqu(xmm1, xmm0);
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
jmp(L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
movl(edx, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_10_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(edx, 2);
|
||||
jmp(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movapd(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psllq(xmm0, 5);
|
||||
movl(ecx, 18416);
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 24), xmm0);
|
||||
fld_d(Address(rsp, 24));
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movl(tmp, Address(rsp, 40));
|
||||
}
|
||||
#endif
|
687
hotspot/src/cpu/x86/vm/macroAssembler_x86_log10.cpp
Normal file
687
hotspot/src/cpu/x86/vm/macroAssembler_x86_log10.cpp
Normal file
@ -0,0 +1,687 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - LOG10()
|
||||
// ---------------------
|
||||
//
|
||||
// Let x=2^k * mx, mx in [1,2)
|
||||
//
|
||||
// Get B~1/mx based on the output of rcpss instruction (B0)
|
||||
// B = int((B0*LH*2^7+0.5))/2^7
|
||||
// LH is a short approximation for log10(e)
|
||||
//
|
||||
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
|
||||
//
|
||||
// Result: k*log10(2) - log(B) + p(r)
|
||||
// p(r) is a degree 7 polynomial
|
||||
// -log(B) read from data table (high, low parts)
|
||||
// Result is formed from high and low parts
|
||||
//
|
||||
// Special cases:
|
||||
// log10(0) = -INF with divide-by-zero exception raised
|
||||
// log10(1) = +0
|
||||
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
|
||||
// log10(+INF) = +INF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _HIGHSIGMASK_log10[] =
|
||||
{
|
||||
0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _LOG10_E[] =
|
||||
{
|
||||
0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _L_tbl_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
|
||||
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
|
||||
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
|
||||
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
|
||||
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
|
||||
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
|
||||
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
|
||||
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
|
||||
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
|
||||
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
|
||||
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
|
||||
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
|
||||
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
|
||||
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
|
||||
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
|
||||
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
|
||||
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
|
||||
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
|
||||
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
|
||||
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
|
||||
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
|
||||
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
|
||||
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
|
||||
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
|
||||
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
|
||||
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
|
||||
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
|
||||
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
|
||||
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
|
||||
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
|
||||
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
|
||||
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
|
||||
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
|
||||
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
|
||||
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
|
||||
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
|
||||
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
|
||||
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
|
||||
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
|
||||
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
|
||||
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
|
||||
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
|
||||
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
|
||||
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
|
||||
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
|
||||
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
|
||||
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
|
||||
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
|
||||
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
|
||||
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
|
||||
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
|
||||
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
|
||||
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
|
||||
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
|
||||
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
|
||||
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
|
||||
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
|
||||
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
|
||||
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
|
||||
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
|
||||
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
|
||||
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
|
||||
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
|
||||
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
|
||||
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
|
||||
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
|
||||
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
|
||||
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
|
||||
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
|
||||
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
|
||||
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
|
||||
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
|
||||
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
|
||||
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
|
||||
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
|
||||
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
|
||||
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
|
||||
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
|
||||
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
|
||||
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
|
||||
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
|
||||
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
|
||||
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
|
||||
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
|
||||
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
|
||||
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
|
||||
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
|
||||
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
|
||||
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
|
||||
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
|
||||
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
|
||||
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
|
||||
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
|
||||
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
|
||||
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
|
||||
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
|
||||
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
|
||||
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
|
||||
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
|
||||
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
|
||||
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
|
||||
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
|
||||
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _log2_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _coeff_log10[] =
|
||||
{
|
||||
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
|
||||
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
|
||||
0xdc77b115UL, 0xbff27af2UL
|
||||
};
|
||||
|
||||
// Registers:
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, tmp - r11
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start;
|
||||
|
||||
assert_different_registers(r11, eax, ecx, edx);
|
||||
|
||||
address HIGHSIGMASK = (address)_HIGHSIGMASK_log10;
|
||||
address LOG10_E = (address)_LOG10_E;
|
||||
address L_tbl = (address)_L_tbl_log10;
|
||||
address log2 = (address)_log2_log10;
|
||||
address coeff = (address)_coeff_log10;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
|
||||
bind(B1_2);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movl(ecx, 1054736384);
|
||||
movdl(xmm7, ecx);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
movl(edx, 32768);
|
||||
movdl(xmm4, edx);
|
||||
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 27);
|
||||
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
mulss(xmm0, xmm7);
|
||||
por(xmm1, xmm3);
|
||||
lea(r11, ExternalAddress(L_tbl));
|
||||
andpd(xmm5, xmm1);
|
||||
paddd(xmm0, xmm4);
|
||||
subsd(xmm1, xmm5);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
andpd(xmm0, xmm6);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
|
||||
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
|
||||
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
mulpd(xmm5, xmm5);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
pshufd(xmm2, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
mulsd(xmm4, xmm1);
|
||||
subsd(xmm2, xmm0);
|
||||
mulsd(xmm6, xmm1);
|
||||
addsd(xmm1, xmm2);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addsd(xmm1, xmm6);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movq(xmm0, Address(rsp, 0));
|
||||
movq(xmm1, Address(rsp, 0));
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
addsd(xmm1, xmm0);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 18416);
|
||||
psrlq(xmm0, 27);
|
||||
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 9);
|
||||
jmp(L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 8);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movq(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 8));
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
|
||||
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
|
||||
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
|
||||
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
|
||||
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
|
||||
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
|
||||
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
|
||||
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
|
||||
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
|
||||
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
|
||||
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
|
||||
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
|
||||
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
|
||||
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
|
||||
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
|
||||
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
|
||||
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
|
||||
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
|
||||
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
|
||||
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
|
||||
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
|
||||
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
|
||||
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
|
||||
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
|
||||
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
|
||||
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
|
||||
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
|
||||
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
|
||||
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
|
||||
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
|
||||
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
|
||||
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
|
||||
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
|
||||
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
|
||||
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
|
||||
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
|
||||
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
|
||||
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
|
||||
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
|
||||
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
|
||||
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
|
||||
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
|
||||
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
|
||||
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
|
||||
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
|
||||
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
|
||||
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
|
||||
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
|
||||
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
|
||||
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
|
||||
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
|
||||
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
|
||||
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
|
||||
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
|
||||
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
|
||||
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
|
||||
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
|
||||
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
|
||||
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
|
||||
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
|
||||
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
|
||||
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
|
||||
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
|
||||
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
|
||||
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
|
||||
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
|
||||
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
|
||||
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
|
||||
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
|
||||
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
|
||||
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
|
||||
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
|
||||
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
|
||||
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
|
||||
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
|
||||
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
|
||||
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
|
||||
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
|
||||
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
|
||||
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
|
||||
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
|
||||
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
|
||||
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
|
||||
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
|
||||
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
|
||||
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
|
||||
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
|
||||
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
|
||||
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
|
||||
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
|
||||
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
|
||||
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
|
||||
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
|
||||
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
|
||||
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
|
||||
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
|
||||
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
|
||||
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
|
||||
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
|
||||
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
|
||||
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
|
||||
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
|
||||
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
|
||||
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
|
||||
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
|
||||
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
|
||||
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
};
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
|
||||
address static_const_table_log10 = (address)_static_const_table_log10;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 104);
|
||||
movl(Address(rsp, 40), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table_log10));
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movl(ecx, 1054736384);
|
||||
movdl(xmm7, ecx);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movdqu(xmm1, xmm0);
|
||||
movl(edx, 32768);
|
||||
movdl(xmm4, edx);
|
||||
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psllq(xmm0, 5);
|
||||
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
mulss(xmm0, xmm7);
|
||||
por(xmm1, xmm3);
|
||||
andpd(xmm5, xmm1);
|
||||
paddd(xmm0, xmm4);
|
||||
subsd(xmm1, xmm5);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
andpd(xmm0, xmm6);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulsd(xmm1, xmm0);
|
||||
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
|
||||
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
|
||||
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
|
||||
mulpd(xmm5, xmm5);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
pshufd(xmm2, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
mulsd(xmm4, xmm1);
|
||||
subsd(xmm2, xmm0);
|
||||
mulsd(xmm6, xmm1);
|
||||
addsd(xmm1, xmm2);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addsd(xmm1, xmm6);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
|
||||
movdqu(xmm1, xmm0);
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
jmp(L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
movl(edx, 9);
|
||||
mulsd(xmm0, xmm1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_10_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(edx, 8);
|
||||
jmp(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 18416);
|
||||
psllq(xmm0, 5);
|
||||
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 24), xmm0);
|
||||
fld_d(Address(rsp, 24));
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movl(tmp, Address(rsp, 40));
|
||||
|
||||
}
|
||||
#endif
|
3592
hotspot/src/cpu/x86/vm/macroAssembler_x86_pow.cpp
Normal file
3592
hotspot/src/cpu/x86/vm/macroAssembler_x86_pow.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2448
hotspot/src/cpu/x86/vm/macroAssembler_x86_sin.cpp
Normal file
2448
hotspot/src/cpu/x86/vm/macroAssembler_x86_sin.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2144
hotspot/src/cpu/x86/vm/macroAssembler_x86_tan.cpp
Normal file
2144
hotspot/src/cpu/x86/vm/macroAssembler_x86_tan.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -2093,25 +2093,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
entry_checkcast_arraycopy);
|
||||
}
|
||||
|
||||
void generate_math_stubs() {
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "log10");
|
||||
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
|
||||
|
||||
__ fld_d(Address(rsp, 4));
|
||||
__ flog10();
|
||||
__ ret(0);
|
||||
}
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "tan");
|
||||
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
|
||||
|
||||
__ fld_d(Address(rsp, 4));
|
||||
__ trigfunc('t');
|
||||
__ ret(0);
|
||||
}
|
||||
}
|
||||
|
||||
// AES intrinsic stubs
|
||||
enum {AESBlockSize = 16};
|
||||
|
||||
@ -3534,6 +3515,31 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
}
|
||||
|
||||
address generate_libmLog10() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = rbx;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmPow() {
|
||||
address start = __ pc();
|
||||
|
||||
@ -3628,6 +3634,44 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libm_tan_cot_huge() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmTan() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = rbx;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
// Safefetch stubs.
|
||||
@ -3853,24 +3897,25 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||
}
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
|
||||
StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
|
||||
StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
|
||||
StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
|
||||
StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
|
||||
StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
|
||||
StubRoutines::_dexp = generate_libmExp();
|
||||
StubRoutines::_dlog = generate_libmLog();
|
||||
StubRoutines::_dlog10 = generate_libmLog10();
|
||||
StubRoutines::_dpow = generate_libmPow();
|
||||
if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
|
||||
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
|
||||
}
|
||||
if (UseLibmSinIntrinsic) {
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
}
|
||||
if (UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
}
|
||||
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
|
||||
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
|
||||
StubRoutines::_dtan = generate_libmTan();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void generate_all() {
|
||||
// Generates all stubs and initializes the entry points
|
||||
|
||||
@ -3889,8 +3934,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
||||
generate_math_stubs();
|
||||
|
||||
// don't bother generating these AES intrinsic stubs unless global flag is set
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
|
||||
|
@ -2972,35 +2972,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
|
||||
}
|
||||
|
||||
void generate_math_stubs() {
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "log10");
|
||||
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
|
||||
|
||||
__ subq(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), xmm0);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ flog10();
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl(xmm0, Address(rsp, 0));
|
||||
__ addq(rsp, 8);
|
||||
__ ret(0);
|
||||
}
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "tan");
|
||||
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
|
||||
|
||||
__ subq(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), xmm0);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ trigfunc('t');
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl(xmm0, Address(rsp, 0));
|
||||
__ addq(rsp, 8);
|
||||
__ ret(0);
|
||||
}
|
||||
}
|
||||
|
||||
// AES intrinsic stubs
|
||||
enum {AESBlockSize = 16};
|
||||
|
||||
@ -4731,6 +4702,46 @@ class StubGenerator: public StubCodeGenerator {
|
||||
#endif
|
||||
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmLog10() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = r11;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
#endif
|
||||
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
@ -4810,6 +4821,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
@ -4822,6 +4835,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
@ -4853,6 +4868,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
@ -4865,6 +4882,55 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmTan() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp1 = r8;
|
||||
const Register tmp2 = r9;
|
||||
const Register tmp3 = r10;
|
||||
const Register tmp4 = r11;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
#endif
|
||||
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
@ -5065,16 +5131,28 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||
}
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
|
||||
StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
|
||||
StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
|
||||
StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
|
||||
StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
|
||||
StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
|
||||
StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
|
||||
StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
|
||||
StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
|
||||
StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
|
||||
StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
|
||||
StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
|
||||
StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
|
||||
StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
|
||||
StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
|
||||
StubRoutines::_dexp = generate_libmExp();
|
||||
StubRoutines::_dlog = generate_libmLog();
|
||||
StubRoutines::_dlog10 = generate_libmLog10();
|
||||
StubRoutines::_dpow = generate_libmPow();
|
||||
if (UseLibmSinIntrinsic) {
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
}
|
||||
if (UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
}
|
||||
StubRoutines::_dtan = generate_libmTan();
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
}
|
||||
}
|
||||
|
||||
@ -5119,8 +5197,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
||||
generate_math_stubs();
|
||||
|
||||
// don't bother generating these AES intrinsic stubs unless global flag is set
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
|
||||
|
@ -48,6 +48,29 @@ address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
|
||||
address StubRoutines::x86::_k256_adr = NULL;
|
||||
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
|
||||
|
||||
//tables common for sin and cos
|
||||
address StubRoutines::x86::_ONEHALF_adr = NULL;
|
||||
address StubRoutines::x86::_P_2_adr = NULL;
|
||||
address StubRoutines::x86::_SC_4_adr = NULL;
|
||||
address StubRoutines::x86::_Ctable_adr = NULL;
|
||||
address StubRoutines::x86::_SC_2_adr = NULL;
|
||||
address StubRoutines::x86::_SC_3_adr = NULL;
|
||||
address StubRoutines::x86::_SC_1_adr = NULL;
|
||||
address StubRoutines::x86::_PI_INV_TABLE_adr = NULL;
|
||||
address StubRoutines::x86::_PI_4_adr = NULL;
|
||||
address StubRoutines::x86::_PI32INV_adr = NULL;
|
||||
address StubRoutines::x86::_SIGN_MASK_adr = NULL;
|
||||
address StubRoutines::x86::_P_1_adr = NULL;
|
||||
address StubRoutines::x86::_P_3_adr = NULL;
|
||||
address StubRoutines::x86::_NEG_ZERO_adr = NULL;
|
||||
|
||||
//tables common for sincos and tancot
|
||||
address StubRoutines::x86::_L_2il0floatpacket_0_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4Inv_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4x3_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4x4_adr = NULL;
|
||||
address StubRoutines::x86::_ones_adr = NULL;
|
||||
|
||||
uint64_t StubRoutines::x86::_crc_by128_masks[] =
|
||||
{
|
||||
/* The fields in this structure are arranged so that they can be
|
||||
|
@ -57,6 +57,48 @@
|
||||
// byte flip mask for sha256
|
||||
static address _pshuffle_byte_flip_mask_addr;
|
||||
|
||||
//tables common for LIBM sin and cos
|
||||
static juint _ONEHALF[];
|
||||
static address _ONEHALF_adr;
|
||||
static juint _P_2[];
|
||||
static address _P_2_adr;
|
||||
static juint _SC_4[];
|
||||
static address _SC_4_adr;
|
||||
static juint _Ctable[];
|
||||
static address _Ctable_adr;
|
||||
static juint _SC_2[];
|
||||
static address _SC_2_adr;
|
||||
static juint _SC_3[];
|
||||
static address _SC_3_adr;
|
||||
static juint _SC_1[];
|
||||
static address _SC_1_adr;
|
||||
static juint _PI_INV_TABLE[];
|
||||
static address _PI_INV_TABLE_adr;
|
||||
static juint _PI_4[];
|
||||
static address _PI_4_adr;
|
||||
static juint _PI32INV[];
|
||||
static address _PI32INV_adr;
|
||||
static juint _SIGN_MASK[];
|
||||
static address _SIGN_MASK_adr;
|
||||
static juint _P_1[];
|
||||
static address _P_1_adr;
|
||||
static juint _P_3[];
|
||||
static address _P_3_adr;
|
||||
static juint _NEG_ZERO[];
|
||||
static address _NEG_ZERO_adr;
|
||||
|
||||
//tables common for LIBM sincos and tancot
|
||||
static juint _L_2il0floatpacket_0[];
|
||||
static address _L_2il0floatpacket_0_adr;
|
||||
static juint _Pi4Inv[];
|
||||
static address _Pi4Inv_adr;
|
||||
static juint _Pi4x3[];
|
||||
static address _Pi4x3_adr;
|
||||
static juint _Pi4x4[];
|
||||
static address _Pi4x4_adr;
|
||||
static juint _ones[];
|
||||
static address _ones_adr;
|
||||
|
||||
public:
|
||||
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
||||
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
||||
@ -69,4 +111,24 @@
|
||||
static address k256_addr() { return _k256_adr; }
|
||||
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
|
||||
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
|
||||
static address _ONEHALF_addr() { return _ONEHALF_adr; }
|
||||
static address _P_2_addr() { return _P_2_adr; }
|
||||
static address _SC_4_addr() { return _SC_4_adr; }
|
||||
static address _Ctable_addr() { return _Ctable_adr; }
|
||||
static address _SC_2_addr() { return _SC_2_adr; }
|
||||
static address _SC_3_addr() { return _SC_3_adr; }
|
||||
static address _SC_1_addr() { return _SC_1_adr; }
|
||||
static address _PI_INV_TABLE_addr() { return _PI_INV_TABLE_adr; }
|
||||
static address _PI_4_addr() { return _PI_4_adr; }
|
||||
static address _PI32INV_addr() { return _PI32INV_adr; }
|
||||
static address _SIGN_MASK_addr() { return _SIGN_MASK_adr; }
|
||||
static address _P_1_addr() { return _P_1_adr; }
|
||||
static address _P_3_addr() { return _P_3_adr; }
|
||||
static address _NEG_ZERO_addr() { return _NEG_ZERO_adr; }
|
||||
static address _L_2il0floatpacket_0_addr() { return _L_2il0floatpacket_0_adr; }
|
||||
static address _Pi4Inv_addr() { return _Pi4Inv_adr; }
|
||||
static address _Pi4x3_addr() { return _Pi4x3_adr; }
|
||||
static address _Pi4x4_addr() { return _Pi4x4_adr; }
|
||||
static address _ones_addr() { return _ones_adr; }
|
||||
|
||||
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
||||
|
@ -345,13 +345,34 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
__ fld_d(Address(rsp, 1*wordSize));
|
||||
switch (kind) {
|
||||
case Interpreter::java_lang_math_sin :
|
||||
__ trigfunc('s');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_cos :
|
||||
__ trigfunc('c');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_tan :
|
||||
__ trigfunc('t');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_sqrt:
|
||||
__ fsqrt();
|
||||
@ -362,26 +383,29 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
case Interpreter::java_lang_math_log:
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_log10:
|
||||
__ flog10();
|
||||
// Store to stack to convert 80bit precision back to 64bits
|
||||
__ push_fTOS();
|
||||
__ pop_fTOS();
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_pow:
|
||||
__ fld_d(Address(rsp, 3*wordSize)); // second argument
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ fstp_d(Address(rsp, 2 * wordSize));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
|
||||
@ -391,7 +415,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
case Interpreter::java_lang_math_exp:
|
||||
__ subptr(rsp, 2*wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "interpreter/interpreterRuntime.hpp"
|
||||
#include "interpreter/templateInterpreterGenerator.hpp"
|
||||
#include "runtime/arguments.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
|
||||
#define __ _masm->
|
||||
|
||||
@ -373,32 +374,60 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
__ sqrtsd(xmm0, Address(rsp, wordSize));
|
||||
} else if (kind == Interpreter::java_lang_math_exp) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_log) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_log10) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_sin) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dsin() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_cos) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dcos() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_pow) {
|
||||
__ movdbl(xmm1, Address(rsp, wordSize));
|
||||
__ movdbl(xmm0, Address(rsp, 3 * wordSize));
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_tan) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
|
||||
}
|
||||
} else {
|
||||
__ fld_d(Address(rsp, wordSize));
|
||||
switch (kind) {
|
||||
case Interpreter::java_lang_math_sin :
|
||||
__ trigfunc('s');
|
||||
break;
|
||||
case Interpreter::java_lang_math_cos :
|
||||
__ trigfunc('c');
|
||||
break;
|
||||
case Interpreter::java_lang_math_tan :
|
||||
__ trigfunc('t');
|
||||
break;
|
||||
case Interpreter::java_lang_math_abs:
|
||||
__ fabs();
|
||||
break;
|
||||
case Interpreter::java_lang_math_log10:
|
||||
__ flog10();
|
||||
break;
|
||||
default :
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
@ -9828,27 +9828,6 @@ instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
|
||||
predicate (UseSSE<=1);
|
||||
match(Set dst(TanD src));
|
||||
format %{ "DTAN $dst" %}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
|
||||
Opcode(0xDD), Opcode(0xD8)); // fstp st
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct tanD_reg(regD dst, eFlagsReg cr) %{
|
||||
predicate (UseSSE>=2);
|
||||
match(Set dst(TanD dst));
|
||||
effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
|
||||
format %{ "DTAN $dst" %}
|
||||
ins_encode( Push_SrcD(dst),
|
||||
Opcode(0xD9), Opcode(0xF2), // fptan
|
||||
Opcode(0xDD), Opcode(0xD8), // fstp st
|
||||
Push_ResultD(dst) );
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct atanDPR_reg(regDPR dst, regDPR src) %{
|
||||
predicate (UseSSE<=1);
|
||||
match(Set dst(AtanD dst src));
|
||||
@ -9880,41 +9859,6 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
|
||||
predicate (UseSSE<=1);
|
||||
// The source Double operand on FPU stack
|
||||
match(Set dst (Log10D src));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fxch ; swap ST(0) with ST(1)
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "FLDLG2 \t\t\t#Log10\n\t"
|
||||
"FXCH \n\t"
|
||||
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
|
||||
%}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Opcode(0xD9), Opcode(0xC9), // fxch
|
||||
Opcode(0xD9), Opcode(0xF1)); // fyl2x
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
|
||||
predicate (UseSSE>=2);
|
||||
effect(KILL cr);
|
||||
match(Set dst (Log10D src));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "FLDLG2 \t\t\t#Log10\n\t"
|
||||
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
|
||||
%}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Push_SrcD(src),
|
||||
Opcode(0xD9), Opcode(0xF1), // fyl2x
|
||||
Push_ResultD(dst));
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
//-------------Float Instructions-------------------------------
|
||||
// Float Math
|
||||
|
||||
|
@ -9897,34 +9897,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// -----------Trig and Trancendental Instructions------------------------------
|
||||
instruct tanD_reg(regD dst) %{
|
||||
match(Set dst (TanD dst));
|
||||
|
||||
format %{ "dtan $dst\n\t" %}
|
||||
ins_encode( Push_SrcXD(dst),
|
||||
Opcode(0xD9), Opcode(0xF2), //fptan
|
||||
Opcode(0xDD), Opcode(0xD8), //fstp st
|
||||
Push_ResultXD(dst) );
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10D_reg(regD dst) %{
|
||||
// The source and result Double operands in XMM registers
|
||||
match(Set dst (Log10D dst));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "fldlg2\t\t\t#Log10\n\t"
|
||||
"fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
|
||||
%}
|
||||
ins_encode(Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Push_SrcXD(dst),
|
||||
Opcode(0xD9), Opcode(0xF1), // fyl2x
|
||||
Push_ResultXD(dst));
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
//----------Arithmetic Conversion Instructions---------------------------------
|
||||
|
||||
instruct roundFloat_nop(regF dst)
|
||||
|
@ -728,31 +728,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case lir_tan:
|
||||
case lir_log10: {
|
||||
assert(op->as_Op2() != NULL, "must be");
|
||||
LIR_Op2* op2 = (LIR_Op2*)op;
|
||||
|
||||
// On x86 tan/sin/cos need two temporary fpu stack slots and
|
||||
// log/log10 need one so handle opr2 and tmp as temp inputs.
|
||||
// Register input operand as temp to guarantee that it doesn't
|
||||
// overlap with the input.
|
||||
assert(op2->_info == NULL, "not used");
|
||||
assert(op2->_tmp5->is_illegal(), "not used");
|
||||
assert(op2->_opr1->is_valid(), "used");
|
||||
do_input(op2->_opr1); do_temp(op2->_opr1);
|
||||
|
||||
if (op2->_opr2->is_valid()) do_temp(op2->_opr2);
|
||||
if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
|
||||
if (op2->_tmp2->is_valid()) do_temp(op2->_tmp2);
|
||||
if (op2->_tmp3->is_valid()) do_temp(op2->_tmp3);
|
||||
if (op2->_tmp4->is_valid()) do_temp(op2->_tmp4);
|
||||
if (op2->_result->is_valid()) do_output(op2->_result);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// LIR_Op3
|
||||
case lir_idiv:
|
||||
case lir_irem: {
|
||||
@ -1740,8 +1715,6 @@ const char * LIR_Op::name() const {
|
||||
case lir_rem: s = "rem"; break;
|
||||
case lir_abs: s = "abs"; break;
|
||||
case lir_sqrt: s = "sqrt"; break;
|
||||
case lir_tan: s = "tan"; break;
|
||||
case lir_log10: s = "log10"; break;
|
||||
case lir_logic_and: s = "logic_and"; break;
|
||||
case lir_logic_or: s = "logic_or"; break;
|
||||
case lir_logic_xor: s = "logic_xor"; break;
|
||||
|
@ -320,9 +320,11 @@ const char* Runtime1::name_for_address(address entry) {
|
||||
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
|
||||
FUNCTION_CASE(entry, StubRoutines::dexp());
|
||||
FUNCTION_CASE(entry, StubRoutines::dlog());
|
||||
FUNCTION_CASE(entry, StubRoutines::dlog10());
|
||||
FUNCTION_CASE(entry, StubRoutines::dpow());
|
||||
FUNCTION_CASE(entry, StubRoutines::dsin());
|
||||
FUNCTION_CASE(entry, StubRoutines::dcos());
|
||||
FUNCTION_CASE(entry, StubRoutines::dtan());
|
||||
|
||||
#undef FUNCTION_CASE
|
||||
|
||||
|
@ -171,7 +171,6 @@ macro(LoadN)
|
||||
macro(LoadRange)
|
||||
macro(LoadS)
|
||||
macro(Lock)
|
||||
macro(Log10D)
|
||||
macro(Loop)
|
||||
macro(LoopLimit)
|
||||
macro(Mach)
|
||||
@ -265,7 +264,6 @@ macro(SubI)
|
||||
macro(SubL)
|
||||
macro(TailCall)
|
||||
macro(TailJump)
|
||||
macro(TanD)
|
||||
macro(ThreadLocal)
|
||||
macro(Unlock)
|
||||
macro(URShiftI)
|
||||
|
@ -1679,7 +1679,6 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
|
||||
switch (id) {
|
||||
case vmIntrinsics::_dabs: n = new AbsDNode( arg); break;
|
||||
case vmIntrinsics::_dsqrt: n = new SqrtDNode(C, control(), arg); break;
|
||||
case vmIntrinsics::_dlog10: n = new Log10DNode(C, control(), arg); break;
|
||||
default: fatal_unexpected_iid(id); break;
|
||||
}
|
||||
set_result(_gvn.transform(n));
|
||||
@ -1693,10 +1692,6 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
|
||||
Node* arg = round_double_node(argument(0));
|
||||
Node* n = NULL;
|
||||
|
||||
switch (id) {
|
||||
case vmIntrinsics::_dtan: n = new TanDNode(C, control(), arg); break;
|
||||
default: fatal_unexpected_iid(id); break;
|
||||
}
|
||||
n = _gvn.transform(n);
|
||||
|
||||
// Rounding required? Check for argument reduction!
|
||||
@ -1814,15 +1809,18 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
||||
return StubRoutines::dcos() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dcos(), "dcos") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS");
|
||||
case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN");
|
||||
|
||||
case vmIntrinsics::_dtan:
|
||||
return StubRoutines::dtan() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dtan(), "dtan") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN");
|
||||
case vmIntrinsics::_dlog:
|
||||
return StubRoutines::dlog() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog(), "dlog") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG");
|
||||
case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
|
||||
case vmIntrinsics::_dlog10:
|
||||
return StubRoutines::dlog10() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog10(), "dlog10") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
|
||||
|
||||
// These intrinsics are supported on all hardware
|
||||
case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
|
||||
|
@ -1533,25 +1533,3 @@ const Type* SqrtDNode::Value(PhaseGVN* phase) const {
|
||||
if( d < 0.0 ) return Type::DOUBLE;
|
||||
return TypeD::make( sqrt( d ) );
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
// Compute tan
|
||||
const Type* TanDNode::Value(PhaseGVN* phase) const {
|
||||
const Type *t1 = phase->type( in(1) );
|
||||
if( t1 == Type::TOP ) return Type::TOP;
|
||||
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
|
||||
double d = t1->getd();
|
||||
return TypeD::make( StubRoutines::intrinsic_tan( d ) );
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
// Compute log10
|
||||
const Type* Log10DNode::Value(PhaseGVN* phase) const {
|
||||
const Type *t1 = phase->type( in(1) );
|
||||
if( t1 == Type::TOP ) return Type::TOP;
|
||||
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
|
||||
double d = t1->getd();
|
||||
return TypeD::make( StubRoutines::intrinsic_log10( d ) );
|
||||
}
|
||||
|
@ -408,21 +408,6 @@ public:
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
};
|
||||
|
||||
//------------------------------TanDNode---------------------------------------
|
||||
// tangens of a double
|
||||
class TanDNode : public Node {
|
||||
public:
|
||||
TanDNode(Compile* C, Node *c,Node *in1) : Node(c, in1) {
|
||||
init_flags(Flag_is_expensive);
|
||||
C->add_expensive_node(this);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return Type::DOUBLE; }
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
};
|
||||
|
||||
|
||||
//------------------------------AtanDNode--------------------------------------
|
||||
// arcus tangens of a double
|
||||
class AtanDNode : public Node {
|
||||
@ -448,20 +433,6 @@ public:
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
};
|
||||
|
||||
//------------------------------Log10DNode---------------------------------------
|
||||
// Log_10 of a double
|
||||
class Log10DNode : public Node {
|
||||
public:
|
||||
Log10DNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
|
||||
init_flags(Flag_is_expensive);
|
||||
C->add_expensive_node(this);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return Type::DOUBLE; }
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
};
|
||||
|
||||
//-------------------------------ReverseBytesINode--------------------------------
|
||||
// reverse bytes of an integer
|
||||
class ReverseBytesINode : public Node {
|
||||
|
@ -154,11 +154,14 @@ address StubRoutines::_vectorizedMismatch = NULL;
|
||||
|
||||
address StubRoutines::_dexp = NULL;
|
||||
address StubRoutines::_dlog = NULL;
|
||||
address StubRoutines::_dlog10 = NULL;
|
||||
address StubRoutines::_dpow = NULL;
|
||||
address StubRoutines::_dsin = NULL;
|
||||
address StubRoutines::_dcos = NULL;
|
||||
address StubRoutines::_dlibm_sin_cos_huge = NULL;
|
||||
address StubRoutines::_dlibm_reduce_pi04l = NULL;
|
||||
address StubRoutines::_dlibm_tan_cot_huge = NULL;
|
||||
address StubRoutines::_dtan = NULL;
|
||||
|
||||
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
|
||||
double (* StubRoutines::_intrinsic_sin )(double) = NULL;
|
||||
|
@ -213,11 +213,14 @@ class StubRoutines: AllStatic {
|
||||
|
||||
static address _dexp;
|
||||
static address _dlog;
|
||||
static address _dlog10;
|
||||
static address _dpow;
|
||||
static address _dsin;
|
||||
static address _dcos;
|
||||
static address _dlibm_sin_cos_huge;
|
||||
static address _dlibm_reduce_pi04l;
|
||||
static address _dlibm_tan_cot_huge;
|
||||
static address _dtan;
|
||||
|
||||
// These are versions of the java.lang.Math methods which perform
|
||||
// the same operations as the intrinsic version. They are used for
|
||||
@ -391,11 +394,14 @@ class StubRoutines: AllStatic {
|
||||
|
||||
static address dexp() { return _dexp; }
|
||||
static address dlog() { return _dlog; }
|
||||
static address dlog10() { return _dlog10; }
|
||||
static address dpow() { return _dpow; }
|
||||
static address dsin() { return _dsin; }
|
||||
static address dcos() { return _dcos; }
|
||||
static address dlibm_reduce_pi04l() { return _dlibm_reduce_pi04l; }
|
||||
static address dlibm_sin_cos_huge() { return _dlibm_sin_cos_huge; }
|
||||
static address dlibm_tan_cot_huge() { return _dlibm_tan_cot_huge; }
|
||||
static address dtan() { return _dtan; }
|
||||
|
||||
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
||||
|
||||
|
@ -859,9 +859,11 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
static_field(StubRoutines, _mulAdd, address) \
|
||||
static_field(StubRoutines, _dexp, address) \
|
||||
static_field(StubRoutines, _dlog, address) \
|
||||
static_field(StubRoutines, _dlog10, address) \
|
||||
static_field(StubRoutines, _dpow, address) \
|
||||
static_field(StubRoutines, _dsin, address) \
|
||||
static_field(StubRoutines, _dcos, address) \
|
||||
static_field(StubRoutines, _dtan, address) \
|
||||
static_field(StubRoutines, _vectorizedMismatch, address) \
|
||||
static_field(StubRoutines, _jbyte_arraycopy, address) \
|
||||
static_field(StubRoutines, _jshort_arraycopy, address) \
|
||||
@ -2066,10 +2068,8 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
declare_c2_type(NegNode, Node) \
|
||||
declare_c2_type(NegFNode, NegNode) \
|
||||
declare_c2_type(NegDNode, NegNode) \
|
||||
declare_c2_type(TanDNode, Node) \
|
||||
declare_c2_type(AtanDNode, Node) \
|
||||
declare_c2_type(SqrtDNode, Node) \
|
||||
declare_c2_type(Log10DNode, Node) \
|
||||
declare_c2_type(ReverseBytesINode, Node) \
|
||||
declare_c2_type(ReverseBytesLNode, Node) \
|
||||
declare_c2_type(ReductionNode, Node) \
|
||||
|
Loading…
x
Reference in New Issue
Block a user