8152907: Update for x86 tan and log10 in the math lib

Optimize Math.tan() and log10() for 64 and 32 bit X86 architecture using Intel LIBM  implementation.

Reviewed-by: kvn, twisti
This commit is contained in:
Shravya Rukmannagari 2016-04-06 10:29:26 -07:00 committed by Vivek Deshpande
parent 429b1c80a1
commit ad79a5ae65
33 changed files with 11547 additions and 8901 deletions

View File

@ -1827,6 +1827,15 @@ void Assembler::cvttss2sil(Register dst, XMMRegister src) {
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xE6);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::decl(Address dst) {
// Don't use it directly. Use MacroAssembler::decrement() instead.
InstructionMark im(this);
@ -4993,7 +5002,7 @@ void Assembler::paddq(XMMRegister dst, XMMRegister src) {
}
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
assert(VM_Version::supports_sse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x01);
@ -5001,7 +5010,7 @@ void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
}
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
assert(VM_Version::supports_sse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x02);

View File

@ -1048,6 +1048,8 @@ private:
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
void cvttpd2dq(XMMRegister dst, XMMRegister src);
// Divide Scalar Double-Precision Floating-Point Values
void divsd(XMMRegister dst, Address src);
void divsd(XMMRegister dst, XMMRegister src);

View File

@ -2365,13 +2365,8 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
} else if (value->is_double_fpu()) {
assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
switch(code) {
case lir_log10 : __ flog10() ; break;
case lir_abs : __ fabs() ; break;
case lir_sqrt : __ fsqrt(); break;
case lir_tan :
// Should consider not saving rbx, if not necessary
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
break;
default : ShouldNotReachHere();
}
} else {

View File

@ -812,7 +812,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
x->id() == vmIntrinsics::_dsin) {
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
x->id() == vmIntrinsics::_dlog10) {
do_LibmIntrinsic(x);
return;
}
@ -820,58 +821,17 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
LIRItem value(x->argument_at(0), this);
bool use_fpu = false;
if (UseSSE >= 2) {
switch(x->id()) {
case vmIntrinsics::_dtan:
case vmIntrinsics::_dlog10:
use_fpu = true;
break;
}
} else {
if (UseSSE < 2) {
value.set_destroys_register();
}
value.load_item();
LIR_Opr calc_input = value.result();
LIR_Opr calc_input2 = NULL;
if (x->id() == vmIntrinsics::_dpow) {
LIRItem extra_arg(x->argument_at(1), this);
if (UseSSE < 2) {
extra_arg.set_destroys_register();
}
extra_arg.load_item();
calc_input2 = extra_arg.result();
}
LIR_Opr calc_result = rlock_result(x);
// sin, cos, pow and exp need two free fpu stack slots, so register
// two temporary operands
LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
if (use_fpu) {
LIR_Opr tmp = FrameMap::fpu0_double_opr;
int tmp_start = 1;
if (calc_input2 != NULL) {
__ move(calc_input2, tmp);
tmp_start = 2;
calc_input2 = tmp;
}
__ move(calc_input, tmp);
calc_input = tmp;
calc_result = tmp;
tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
}
switch(x->id()) {
case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
default: ShouldNotReachHere();
}
@ -912,21 +872,28 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
result_reg = tmp;
switch(x->id()) {
case vmIntrinsics::_dexp:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dexp() != NULL) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dlog() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog10:
if (StubRoutines::dlog10() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dpow:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dpow() != NULL) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
@ -946,18 +913,44 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != NULL) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
#else
switch (x->id()) {
case vmIntrinsics::_dexp:
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
if (StubRoutines::dexp() != NULL) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog:
if (StubRoutines::dlog() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog10:
if (StubRoutines::dlog10() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dpow:
if (StubRoutines::dpow() != NULL) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dsin:
if (StubRoutines::dsin() != NULL) {
@ -973,6 +966,13 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != NULL) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
#endif // _LP64

View File

@ -786,58 +786,6 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
break;
}
case lir_log10: {
// log and log10 need one temporary fpu stack slot, so
// there is one temporary registers stored in temp of the
// operation. the stack allocator must guarantee that the stack
// slots are really free, otherwise there might be a stack
// overflow.
assert(right->is_illegal(), "must be");
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
assert(op2->tmp1_opr()->is_fpu_register(), "must be");
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
new_left = to_fpu_stack_top(res);
new_res = new_left;
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
break;
}
case lir_tan: {
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
// registers (stored in right and temp of the operation).
// the stack allocator must guarantee that the stack slots are really free,
// otherwise there might be a stack overflow.
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
// assert(left->is_last_use(), "old value gets destroyed");
assert(right->is_fpu_register(), "right is used as the first temporary register");
assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
insert_free_if_dead(right);
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
new_left = to_fpu_stack_top(res);
new_res = new_left;
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
break;
}
default: {
assert(false, "missed a fpu-operation");
}

View File

@ -194,9 +194,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
product(bool, UseBMI2Instructions, false, \
"Use BMI2 instructions") \
\
diagnostic(bool, UseLibmSinIntrinsic, true, \
"Use Libm Sin Intrinsic") \
\
diagnostic(bool, UseLibmCosIntrinsic, true, \
"Use Libm Cos Intrinsic")
diagnostic(bool, UseLibmIntrinsic, true, \
"Use Libm Intrinsics")
#endif // CPU_X86_VM_GLOBALS_X86_HPP

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -928,6 +928,10 @@ class MacroAssembler: public Assembler {
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register r11);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
@ -941,11 +945,19 @@ class MacroAssembler: public Assembler {
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
#else
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1);
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp);
@ -964,6 +976,14 @@ class MacroAssembler: public Assembler {
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
Register esi, Register edi, Register ebp, Register esp);
void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
Register edx, Register ebx, Register esi, Register edi,
Register ebp, Register esp);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
#endif
void increase_precision();

View File

@ -0,0 +1,889 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - COS()
// ---------------------
//
// 1. RANGE REDUCTION
//
// We perform an initial range reduction from X to r with
//
// X =~= N * pi/32 + r
//
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
// where |N| <= 932560. Beyond this, the range reduction is
// insufficiently accurate. For extremely small inputs,
// denormalization can occur internally, impacting performance.
// This means that the main path is actually only taken for
// 2^-252 <= |X| < 90112.
//
// To avoid branches, we perform the range reduction to full
// accuracy each time.
//
// X - N * (P_1 + P_2 + P_3)
//
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
// is exact) and P_3 is a 53-bit number. Together, these
// approximate pi well enough for all cases in the restricted
// range.
//
// The main reduction sequence is:
//
// y = 32/pi * x
// N = integer(y)
// (computed by adding and subtracting off SHIFTER)
//
// m_1 = N * P_1
// m_2 = N * P_2
// r_1 = x - m_1
// r = r_1 - m_2
// (this r can be used for most of the calculation)
//
// c_1 = r_1 - r
// m_3 = N * P_3
// c_2 = c_1 - m_2
// c = c_2 - m_3
//
// 2. MAIN ALGORITHM
//
// The algorithm uses a table lookup based on B = M * pi / 32
// where M = N mod 64. The stored values are:
// sigma closest power of 2 to cos(B)
// C_hl 53-bit cos(B) - sigma
// S_hi + S_lo 2 * 53-bit sin(B)
//
// The computation is organized as follows:
//
// sin(B + r + c) = [sin(B) + sigma * r] +
// r * (cos(B) - sigma) +
// sin(B) * [cos(r + c) - 1] +
// cos(B) * [sin(r + c) - r]
//
// which is approximately:
//
// [S_hi + sigma * r] +
// C_hl * r +
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
// (C_hl + sigma) * [(sin(r) - r) + c]
//
// and this is what is actually computed. We separate this sum
// into four parts:
//
// hi + med + pols + corr
//
// where
//
// hi = S_hi + sigma r
// med = C_hl * r
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
//
// 3. POLYNOMIAL
//
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
// (sin(r) - r) can be rearranged freely, since it is quite
// small, so we exploit parallelism to the fullest.
//
// psc4 = SC_4 * r_1
// msc4 = psc4 * r
// r2 = r * r
// msc2 = SC_2 * r2
// r4 = r2 * r2
// psc3 = SC_3 + msc4
// psc1 = SC_1 + msc2
// msc3 = r4 * psc3
// sincospols = psc1 + msc3
// pols = sincospols *
// <S_hi * r^2 | (C_hl + sigma) * r^3>
//
// 4. CORRECTION TERM
//
// This is where the "c" component of the range reduction is
// taken into account; recall that just "r" is used for most of
// the calculation.
//
// -c = m_3 - c_2
// -d = S_hi * r - (C_hl + sigma)
// corr = -c * -d + S_lo
//
// 5. COMPENSATED SUMMATIONS
//
// The two successive compensated summations add up the high
// and medium parts, leaving just the low parts to add up at
// the end.
//
// rs = sigma * r
// res_int = S_hi + rs
// k_0 = S_hi - res_int
// k_2 = k_0 + rs
// med = C_hl * r
// res_hi = res_int + med
// k_1 = res_int - res_hi
// k_3 = k_1 + med
//
// 6. FINAL SUMMATION
//
// We now add up all the small parts:
//
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
//
// Now the overall result is just:
//
// res_hi + res_lo
//
// 7. SMALL ARGUMENTS
//
// Inputs with |X| < 2^-252 are treated specially as
// 1 - |x|.
//
// Special cases:
// cos(NaN) = quiet NaN, and raise invalid exception
// cos(INF) = NaN and raise invalid exception
// cos(0) = 1
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(8) juint _ONE[] =
{
0x00000000UL, 0x3ff00000UL
};
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start;
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
address ONEHALF = StubRoutines::x86::_ONEHALF_addr();
address P_2 = StubRoutines::x86::_P_2_addr();
address SC_4 = StubRoutines::x86::_SC_4_addr();
address Ctable = StubRoutines::x86::_Ctable_addr();
address SC_2 = StubRoutines::x86::_SC_2_addr();
address SC_3 = StubRoutines::x86::_SC_3_addr();
address SC_1 = StubRoutines::x86::_SC_1_addr();
address PI_INV_TABLE = StubRoutines::x86::_PI_INV_TABLE_addr();
address PI_4 = (address)StubRoutines::x86::_PI_4_addr();
address PI32INV = (address)StubRoutines::x86::_PI32INV_addr();
address SIGN_MASK = (address)StubRoutines::x86::_SIGN_MASK_addr();
address P_1 = (address)StubRoutines::x86::_P_1_addr();
address P_3 = (address)StubRoutines::x86::_P_3_addr();
address ONE = (address)_ONE;
address NEG_ZERO = (address)StubRoutines::x86::_NEG_ZERO_addr();
bind(start);
push(rbx);
subq(rsp, 16);
movsd(Address(rsp, 8), xmm0);
bind(B1_2);
movl(eax, Address(rsp, 12));
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
andl(eax, 2147418112);
subl(eax, 808452096);
cmpl(eax, 281346048);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
mulsd(xmm1, xmm0);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addq(rdx, 1865232);
movdqu(xmm4, xmm0);
andq(rdx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shlq(rdx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
movdqu(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(rax, 0));
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
mulpd(xmm1, xmm7);
movdqu(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movq(xmm5, Address(rax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(rax, 16));
mulpd(xmm6, xmm2);
addsd(xmm0, xmm5);
addsd(xmm3, xmm7);
addsd(xmm0, xmm1);
addsd(xmm0, xmm3);
addsd(xmm0, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm0, xmm6);
addsd(xmm0, xmm4);
jmp(B1_4);
bind(L_2TAG_PACKET_0_0_1);
jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
subsd(xmm1, xmm0);
movdqu(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_1_0_1);
pextrw(eax, xmm0, 3);
andl(eax, 32752);
cmpl(eax, 32752);
jcc(Assembler::equal, L_2TAG_PACKET_2_0_1);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
subl(ecx, 16224);
shrl(ecx, 7);
andl(ecx, 65532);
lea(r11, ExternalAddress(PI_INV_TABLE));
addq(rcx, r11);
movdq(rax, xmm0);
movl(r10, Address(rcx, 20));
movl(r8, Address(rcx, 24));
movl(edx, eax);
shrq(rax, 21);
orl(eax, INT_MIN);
shrl(eax, 11);
movl(r9, r10);
imulq(r10, rdx);
imulq(r9, rax);
imulq(r8, rax);
movl(rsi, Address(rcx, 16));
movl(rdi, Address(rcx, 12));
movl(r11, r10);
shrq(r10, 32);
addq(r9, r10);
addq(r11, r8);
movl(r8, r11);
shrq(r11, 32);
addq(r9, r11);
movl(r10, rsi);
imulq(rsi, rdx);
imulq(r10, rax);
movl(r11, rdi);
imulq(rdi, rdx);
movl(rbx, rsi);
shrq(rsi, 32);
addq(r9, rbx);
movl(rbx, r9);
shrq(r9, 32);
addq(r10, rsi);
addq(r10, r9);
shlq(rbx, 32);
orq(r8, rbx);
imulq(r11, rax);
movl(r9, Address(rcx, 8));
movl(rsi, Address(rcx, 4));
movl(rbx, rdi);
shrq(rdi, 32);
addq(r10, rbx);
movl(rbx, r10);
shrq(r10, 32);
addq(r11, rdi);
addq(r11, r10);
movq(rdi, r9);
imulq(r9, rdx);
imulq(rdi, rax);
movl(r10, r9);
shrq(r9, 32);
addq(r11, r10);
movl(r10, r11);
shrq(r11, 32);
addq(rdi, r9);
addq(rdi, r11);
movq(r9, rsi);
imulq(rsi, rdx);
imulq(r9, rax);
shlq(r10, 32);
orq(r10, rbx);
movl(eax, Address(rcx, 0));
movl(r11, rsi);
shrq(rsi, 32);
addq(rdi, r11);
movl(r11, rdi);
shrq(rdi, 32);
addq(r9, rsi);
addq(r9, rdi);
imulq(rdx, rax);
pextrw(rbx, xmm0, 3);
lea(rdi, ExternalAddress(PI_INV_TABLE));
subq(rcx, rdi);
addl(ecx, ecx);
addl(ecx, ecx);
addl(ecx, ecx);
addl(ecx, 19);
movl(rsi, 32768);
andl(rsi, rbx);
shrl(rbx, 4);
andl(rbx, 2047);
subl(rbx, 1023);
subl(ecx, rbx);
addq(r9, rdx);
movl(edx, ecx);
addl(edx, 32);
cmpl(ecx, 1);
jcc(Assembler::less, L_2TAG_PACKET_3_0_1);
negl(ecx);
addl(ecx, 29);
shll(r9);
movl(rdi, r9);
andl(r9, 536870911);
testl(r9, 268435456);
jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1);
shrl(r9);
movl(rbx, 0);
shlq(r9, 32);
orq(r9, r11);
bind(L_2TAG_PACKET_5_0_1);
bind(L_2TAG_PACKET_6_0_1);
cmpq(r9, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_1);
bind(L_2TAG_PACKET_8_0_1);
bsrq(r11, r9);
movl(ecx, 29);
subl(ecx, r11);
jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1);
shlq(r9);
movq(rax, r10);
shlq(r10);
addl(edx, ecx);
negl(ecx);
addl(ecx, 64);
shrq(rax);
shrq(r8);
orq(r9, rax);
orq(r10, r8);
bind(L_2TAG_PACKET_10_0_1);
cvtsi2sdq(xmm0, r9);
shrq(r10, 1);
cvtsi2sdq(xmm3, r10);
xorpd(xmm4, xmm4);
shll(edx, 4);
negl(edx);
addl(edx, 16368);
orl(edx, rsi);
xorl(edx, rbx);
pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
xorpd(xmm5, xmm5);
subl(edx, 1008);
pinsrw(xmm5, edx, 3);
mulsd(xmm0, xmm4);
shll(rsi, 16);
sarl(rsi, 31);
mulsd(xmm3, xmm5);
movdqu(xmm1, xmm0);
mulsd(xmm0, xmm2);
shrl(rdi, 29);
addsd(xmm1, xmm3);
mulsd(xmm3, xmm2);
addl(rdi, rsi);
xorl(rdi, rsi);
mulsd(xmm6, xmm1);
movl(eax, rdi);
addsd(xmm6, xmm3);
movdqu(xmm2, xmm0);
addsd(xmm0, xmm6);
subsd(xmm2, xmm0);
addsd(xmm6, xmm2);
bind(L_2TAG_PACKET_11_0_1);
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
mulsd(xmm1, xmm0);
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2siq(rdx, xmm1);
cvtsi2sdq(xmm1, rdx);
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
shll(eax, 3);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
addl(edx, eax);
andl(edx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shll(edx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
movdqu(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(rax, 0));
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
subsd(xmm1, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
mulpd(xmm1, xmm7);
movdqu(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movq(xmm5, Address(rax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(rax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
movdqu(xmm0, xmm4);
addsd(xmm1, xmm6);
addsd(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_7_0_1);
addl(edx, 64);
movq(r9, r10);
movq(r10, r8);
movl(r8, 0);
cmpq(r9, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
addl(edx, 64);
movq(r9, r10);
movq(r10, r8);
cmpq(r9, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
xorpd(xmm0, xmm0);
xorpd(xmm6, xmm6);
jmp(L_2TAG_PACKET_11_0_1);
bind(L_2TAG_PACKET_9_0_1);
jcc(Assembler::equal, L_2TAG_PACKET_10_0_1);
negl(ecx);
shrq(r10);
movq(rax, r9);
shrq(r9);
subl(edx, ecx);
negl(ecx);
addl(ecx, 64);
shlq(rax);
orq(r10, rax);
jmp(L_2TAG_PACKET_10_0_1);
bind(L_2TAG_PACKET_3_0_1);
negl(ecx);
shlq(r9, 32);
orq(r9, r11);
shlq(r9);
movq(rdi, r9);
testl(r9, INT_MIN);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1);
shrl(r9);
movl(rbx, 0);
shrq(rdi, 3);
jmp(L_2TAG_PACKET_6_0_1);
bind(L_2TAG_PACKET_4_0_1);
shrl(r9);
movl(rbx, 536870912);
shrl(rbx);
shlq(r9, 32);
orq(r9, r11);
shlq(rbx, 32);
addl(rdi, 536870912);
movl(rcx, 0);
movl(r11, 0);
subq(rcx, r8);
sbbq(r11, r10);
sbbq(rbx, r9);
movq(r8, rcx);
movq(r10, r11);
movq(r9, rbx);
movl(rbx, 32768);
jmp(L_2TAG_PACKET_5_0_1);
bind(L_2TAG_PACKET_12_0_1);
shrl(r9);
mov64(rbx, 0x100000000);
shrq(rbx);
movl(rcx, 0);
movl(r11, 0);
subq(rcx, r8);
sbbq(r11, r10);
sbbq(rbx, r9);
movq(r8, rcx);
movq(r10, r11);
movq(r9, rbx);
movl(rbx, 32768);
shrq(rdi, 3);
addl(rdi, 536870912);
jmp(L_2TAG_PACKET_6_0_1);
bind(L_2TAG_PACKET_2_0_1);
movsd(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_13_0_1);
bind(B1_4);
addq(rsp, 16);
pop(rbx);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_cos[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_cos = (address)_static_const_table_cos;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 56), tmp);
lea(tmp, ExternalAddress(static_const_table_cos));
movsd(xmm0, Address(rsp, 128));
pextrw(eax, xmm0, 3);
andl(eax, 32767);
subl(eax, 12336);
cmpl(eax, 4293);
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
movsd(xmm1, Address(tmp, 2160));
mulsd(xmm1, xmm0);
movdqu(xmm5, Address(tmp, 2240));
movsd(xmm4, Address(tmp, 2224));
pand(xmm4, xmm0);
por(xmm5, xmm4);
movsd(xmm3, Address(tmp, 2128));
movdqu(xmm2, Address(tmp, 2112));
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
andl(edx, 63);
movdqu(xmm5, Address(tmp, 2096));
lea(eax, Address(tmp, 0));
shll(edx, 5);
addl(eax, edx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, Address(tmp, 2144));
subsd(xmm4, xmm3);
movsd(xmm7, Address(eax, 8));
unpcklpd(xmm0, xmm0);
movapd(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, Address(tmp, 2064));
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(eax, 0));
subsd(xmm1, xmm3);
movsd(xmm3, Address(eax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, Address(tmp, 2080));
mulsd(xmm4, Address(eax, 0));
addpd(xmm6, Address(tmp, 2048));
mulpd(xmm5, xmm0);
movapd(xmm0, xmm3);
addsd(xmm3, Address(eax, 8));
mulpd(xmm1, xmm7);
movapd(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movsd(xmm5, Address(eax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(eax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm1, xmm6);
addsd(xmm4, xmm1);
movsd(Address(rsp, 0), xmm4);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_0_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movsd(xmm1, Address(tmp, 2192));
subsd(xmm1, xmm0);
movsd(Address(rsp, 0), xmm1);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2146435072);
cmpl(eax, 2146435072);
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
subl(rsp, 32);
movsd(Address(rsp, 0), xmm0);
lea(eax, Address(rsp, 40));
movl(Address(rsp, 8), eax);
movl(eax, 1);
movl(Address(rsp, 12), eax);
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
addl(rsp, 32);
fld_d(Address(rsp, 8));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_3_0_2);
fld_d(Address(rsp, 128));
fmul_d(Address(tmp, 2208));
bind(L_2TAG_PACKET_1_0_2);
movl(tmp, Address(rsp, 56));
}
#endif

View File

@ -0,0 +1,674 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - EXP()
// ---------------------
//
// Description:
// Let K = 64 (table size).
// x x/log(2) n
// e = 2 = 2 * T[j] * (1 + P(y))
// where
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
// j/K
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
//
// P(y) is a minimax polynomial approximation of exp(x)-1
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
//
// To avoid problems with arithmetic overflow and underflow,
// n n1 n2
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
// where BIAS is a value of exponent bias.
//
// Special cases:
// exp(NaN) = NaN
// exp(+INF) = +INF
// exp(-INF) = 0
// exp(x) = 1 for subnormals
// for finite argument, only exp(0)=1 is exact
// For IEEE double
// if x > 709.782712893383973096 then exp(x) overflow
// if x < -745.133219101941108420 then exp(x) underflow
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _cv[] =
{
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
};
ALIGNED_(16) juint _shifter[] =
{
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
};
ALIGNED_(16) juint _mmask[] =
{
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _bias[] =
{
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _Tbl_addr[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL
};
ALIGNED_(16) juint _ALLONES[] =
{
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
};
ALIGNED_(16) juint _ebias[] =
{
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
};
ALIGNED_(4) juint _XMAX[] =
{
0xffffffffUL, 0x7fefffffUL
};
ALIGNED_(4) juint _XMIN[] =
{
0x00000000UL, 0x00100000UL
};
ALIGNED_(4) juint _INF[] =
{
0x00000000UL, 0x7ff00000UL
};
ALIGNED_(4) juint _ZERO[] =
{
0x00000000UL, 0x00000000UL
};
ALIGNED_(4) juint _ONE_val[] =
{
0x00000000UL, 0x3ff00000UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address cv = (address)_cv;
address Shifter = (address)_shifter;
address mmask = (address)_mmask;
address bias = (address)_bias;
address Tbl_addr = (address)_Tbl_addr;
address ALLONES = (address)_ALLONES;
address ebias = (address)_ebias;
address XMAX = (address)_XMAX;
address XMIN = (address)_XMIN;
address INF = (address)_INF;
address ZERO = (address)_ZERO;
address ONE_val = (address)_ONE_val;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 8), xmm0);
unpcklpd(xmm0, xmm0);
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
lea(tmp, ExternalAddress(Tbl_addr));
movdqu(xmm2, Address(ecx, tmp));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(B1_5);
bind(L_2TAG_PACKET_1_0_2);
xorpd(xmm3, xmm3);
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
movl(edx, -1022);
subl(edx, eax);
movdl(xmm5, edx);
psllq(xmm4, xmm5);
movl(ecx, eax);
sarl(eax, 1);
pinsrw(xmm3, eax, 3);
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
psllq(xmm3, 4);
psubd(xmm2, xmm3);
mulsd(xmm0, xmm2);
cmpl(edx, 52);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pand(xmm4, xmm2);
paddd(xmm3, xmm6);
subsd(xmm2, xmm4);
addsd(xmm0, xmm2);
cmpl(ecx, 1023);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
pextrw(ecx, xmm0, 3);
andl(ecx, 32768);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
movapd(xmm6, xmm0);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
mulsd(xmm6, xmm3);
mulsd(xmm4, xmm3);
movdqu(xmm0, xmm6);
pxor(xmm6, xmm4);
psrad(xmm6, 31);
pshufd(xmm6, xmm6, 85);
psllq(xmm0, 1);
psrlq(xmm0, 1);
pxor(xmm0, xmm6);
psrlq(xmm6, 63);
paddq(xmm0, xmm6);
paddq(xmm0, xmm4);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
jmp(B1_5);
bind(L_2TAG_PACKET_3_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_2_0_2);
paddd(xmm3, xmm6);
addpd(xmm0, xmm2);
mulsd(xmm0, xmm3);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_8_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
movl(eax, Address(rsp, 12));
cmpl(eax, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
bind(L_2TAG_PACKET_7_0_2);
movl(Address(rsp, 0), 14);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
mulsd(xmm0, xmm0);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_9_0_2);
movl(edx, Address(rsp, 8));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movl(eax, Address(rsp, 12));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
jmp(B1_5);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(rsp, 8));
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 12));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
movsd(Address(rsp, 8), xmm0);
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_6_0_2);
movq(Address(rsp, 16), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 16));
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table[] =
{
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
0x00100000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 64), tmp);
lea(tmp, ExternalAddress(static_const_table));
movdqu(xmm0, Address(rsp, 128));
unpcklpd(xmm0, xmm0);
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_1_0_2);
fnstcw(Address(rsp, 24));
movzwl(edx, Address(rsp, 24));
orl(edx, 768);
movw(Address(rsp, 28), edx);
fldcw(Address(rsp, 28));
movl(edx, eax);
sarl(eax, 1);
subl(edx, eax);
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
pandn(xmm6, xmm2);
addl(eax, 1023);
movdl(xmm3, eax);
psllq(xmm3, 52);
por(xmm6, xmm3);
addl(edx, 1023);
movdl(xmm4, edx);
psllq(xmm4, 52);
movsd(Address(rsp, 8), xmm0);
fld_d(Address(rsp, 8));
movsd(Address(rsp, 16), xmm6);
fld_d(Address(rsp, 16));
fmula(1);
faddp(1);
movsd(Address(rsp, 8), xmm4);
fld_d(Address(rsp, 8));
fmulp(1);
fstp_d(Address(rsp, 8));
movsd(xmm0, Address(rsp, 8));
fldcw(Address(rsp, 24));
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_2_0_2);
cmpl(ecx, INT_MIN);
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
cmpl(ecx, -1064950997);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
movl(edx, Address(rsp, 128));
cmpl(edx, -17155601);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jmp(L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_3_0_2);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_4_0_2);
movl(edx, 15);
bind(L_2TAG_PACKET_5_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 128));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_7_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, INT_MIN);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1216));
mulsd(xmm0, xmm0);
movl(edx, 15);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_8_0_2);
movl(edx, Address(rsp, 128));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 48), xmm0);
fld_d(Address(rsp, 48));
bind(L_2TAG_PACKET_6_0_2);
movl(tmp, Address(rsp, 64));
}
#endif

View File

@ -0,0 +1,655 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG()
// ---------------------
//
// x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*2^7+0.5))/2^7
//
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
//
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log(NaN) = quiet NaN, and raise invalid exception
// log(+INF) = that INF
// log(0) = -INF with divide-by-zero exception raised
// log(1) = +0
// log(x) = NaN with invalid exception raised if x < -0, including -INF
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _L_tbl[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL
};
ALIGNED_(16) juint _log2[] =
{
0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
};
ALIGNED_(16) juint _coeff[] =
{
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, r8, r11
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp1, tmp2, eax, ecx, edx);
jmp(start);
address L_tbl = (address)_L_tbl;
address log2 = (address)_log2;
address coeff = (address)_coeff;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 0), xmm0);
mov64(rax, 0x3ff0000000000000);
movdq(xmm2, rax);
mov64(rdx, 0x77f0000000000000);
movdq(xmm3, rdx);
movl(ecx, 32768);
movdl(xmm4, rcx);
mov64(tmp1, 0xffffe00000000000);
movdq(xmm5, tmp1);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
lea(tmp2, ExternalAddress(L_tbl));
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp2, edx));
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
if (VM_Version::supports_sse3()) {
movddup(xmm5, xmm1);
}
else {
movdqu(xmm5, xmm1);
movlhps(xmm5, xmm5);
}
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
if (VM_Version::supports_sse3()) {
movddup(xmm6, xmm0);
}
else {
movdqu(xmm6, xmm0);
movlhps(xmm6, xmm6);
}
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movq(xmm0, Address(rsp, 0));
movq(xmm1, Address(rsp, 0));
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_3_0_2);
xorpd(xmm1, xmm1);
addsd(xmm1, xmm0);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psrlq(xmm0, 27);
movl(ecx, 18416);
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_6_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movl(Address(rsp, 16), 3);
jmp(L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(Address(rsp, 16), 2);
bind(L_2TAG_PACKET_8_0_2);
movq(Address(rsp, 8), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 8));
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_log[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
0xffffe000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
Label L_2TAG_PACKET_10_0_2, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table_log;
bind(start);
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movapd(xmm1, xmm0);
movl(ecx, 32768);
movdl(xmm4, ecx);
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 16352);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx));
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
pshufd(xmm6, xmm0, 228);
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 3);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 2);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movapd(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 18416);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}
#endif

View File

@ -0,0 +1,687 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG10()
// ---------------------
//
// Let x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*LH*2^7+0.5))/2^7
// LH is a short approximation for log10(e)
//
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
//
// Result: k*log10(2) - log(B) + p(r)
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log10(0) = -INF with divide-by-zero exception raised
// log10(1) = +0
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
// log10(+INF) = +INF
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _HIGHSIGMASK_log10[] =
{
0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
};
ALIGNED_(16) juint _LOG10_E[] =
{
0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
ALIGNED_(16) juint _L_tbl_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL
};
ALIGNED_(16) juint _log2_log10[] =
{
0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
};
ALIGNED_(16) juint _coeff_log10[] =
{
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start;
assert_different_registers(r11, eax, ecx, edx);
address HIGHSIGMASK = (address)_HIGHSIGMASK_log10;
address LOG10_E = (address)_LOG10_E;
address L_tbl = (address)_L_tbl_log10;
address log2 = (address)_log2_log10;
address coeff = (address)_coeff_log10;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 0), xmm0);
bind(B1_2);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
lea(r11, ExternalAddress(L_tbl));
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movq(xmm0, Address(rsp, 0));
movq(xmm1, Address(rsp, 0));
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_3_0_2);
xorpd(xmm1, xmm1);
addsd(xmm1, xmm0);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_6_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movl(Address(rsp, 16), 9);
jmp(L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(Address(rsp, 16), 8);
bind(L_2TAG_PACKET_8_0_2);
movq(Address(rsp, 8), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 8));
bind(L_2TAG_PACKET_9_0_2);
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_log10 = (address)_static_const_table_log10;
bind(start);
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table_log10));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 9);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 8);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2093,25 +2093,6 @@ class StubGenerator: public StubCodeGenerator {
entry_checkcast_arraycopy);
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ flog10();
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ trigfunc('t');
__ ret(0);
}
}
// AES intrinsic stubs
enum {AESBlockSize = 16};
@ -3534,6 +3515,31 @@ class StubGenerator: public StubCodeGenerator {
}
address generate_libmLog10() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmPow() {
address start = __ pc();
@ -3628,6 +3634,44 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_libm_tan_cot_huge() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
BLOCK_COMMENT("Entry:");
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
return start;
}
address generate_libmTan() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
// Safefetch stubs.
@ -3853,24 +3897,25 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
StubRoutines::_dexp = generate_libmExp();
StubRoutines::_dlog = generate_libmLog();
StubRoutines::_dlog10 = generate_libmLog10();
StubRoutines::_dpow = generate_libmPow();
if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) {
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
}
if (UseLibmSinIntrinsic) {
StubRoutines::_dsin = generate_libmSin();
}
if (UseLibmCosIntrinsic) {
StubRoutines::_dcos = generate_libmCos();
}
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
StubRoutines::_dsin = generate_libmSin();
StubRoutines::_dcos = generate_libmCos();
StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
StubRoutines::_dtan = generate_libmTan();
}
}
void generate_all() {
// Generates all stubs and initializes the entry points
@ -3889,8 +3934,6 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
generate_math_stubs();
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others

View File

@ -2972,35 +2972,6 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ flog10();
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ trigfunc('t');
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
}
// AES intrinsic stubs
enum {AESBlockSize = 16};
@ -4731,6 +4702,46 @@ class StubGenerator: public StubCodeGenerator {
#endif
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmLog10() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
@ -4810,6 +4821,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
@ -4822,6 +4835,8 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -4853,6 +4868,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
@ -4865,6 +4882,55 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmTan() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -5065,16 +5131,28 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
StubRoutines::_dexp = generate_libmExp();
StubRoutines::_dlog = generate_libmLog();
StubRoutines::_dlog10 = generate_libmLog10();
StubRoutines::_dpow = generate_libmPow();
if (UseLibmSinIntrinsic) {
StubRoutines::_dsin = generate_libmSin();
}
if (UseLibmCosIntrinsic) {
StubRoutines::_dcos = generate_libmCos();
}
StubRoutines::_dtan = generate_libmTan();
StubRoutines::_dsin = generate_libmSin();
StubRoutines::_dcos = generate_libmCos();
}
}
@ -5119,8 +5197,6 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
generate_math_stubs();
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others

View File

@ -48,6 +48,29 @@ address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
address StubRoutines::x86::_k256_adr = NULL;
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
//tables common for sin and cos
address StubRoutines::x86::_ONEHALF_adr = NULL;
address StubRoutines::x86::_P_2_adr = NULL;
address StubRoutines::x86::_SC_4_adr = NULL;
address StubRoutines::x86::_Ctable_adr = NULL;
address StubRoutines::x86::_SC_2_adr = NULL;
address StubRoutines::x86::_SC_3_adr = NULL;
address StubRoutines::x86::_SC_1_adr = NULL;
address StubRoutines::x86::_PI_INV_TABLE_adr = NULL;
address StubRoutines::x86::_PI_4_adr = NULL;
address StubRoutines::x86::_PI32INV_adr = NULL;
address StubRoutines::x86::_SIGN_MASK_adr = NULL;
address StubRoutines::x86::_P_1_adr = NULL;
address StubRoutines::x86::_P_3_adr = NULL;
address StubRoutines::x86::_NEG_ZERO_adr = NULL;
//tables common for sincos and tancot
address StubRoutines::x86::_L_2il0floatpacket_0_adr = NULL;
address StubRoutines::x86::_Pi4Inv_adr = NULL;
address StubRoutines::x86::_Pi4x3_adr = NULL;
address StubRoutines::x86::_Pi4x4_adr = NULL;
address StubRoutines::x86::_ones_adr = NULL;
uint64_t StubRoutines::x86::_crc_by128_masks[] =
{
/* The fields in this structure are arranged so that they can be

View File

@ -57,6 +57,48 @@
// byte flip mask for sha256
static address _pshuffle_byte_flip_mask_addr;
//tables common for LIBM sin and cos
static juint _ONEHALF[];
static address _ONEHALF_adr;
static juint _P_2[];
static address _P_2_adr;
static juint _SC_4[];
static address _SC_4_adr;
static juint _Ctable[];
static address _Ctable_adr;
static juint _SC_2[];
static address _SC_2_adr;
static juint _SC_3[];
static address _SC_3_adr;
static juint _SC_1[];
static address _SC_1_adr;
static juint _PI_INV_TABLE[];
static address _PI_INV_TABLE_adr;
static juint _PI_4[];
static address _PI_4_adr;
static juint _PI32INV[];
static address _PI32INV_adr;
static juint _SIGN_MASK[];
static address _SIGN_MASK_adr;
static juint _P_1[];
static address _P_1_adr;
static juint _P_3[];
static address _P_3_adr;
static juint _NEG_ZERO[];
static address _NEG_ZERO_adr;
//tables common for LIBM sincos and tancot
static juint _L_2il0floatpacket_0[];
static address _L_2il0floatpacket_0_adr;
static juint _Pi4Inv[];
static address _Pi4Inv_adr;
static juint _Pi4x3[];
static address _Pi4x3_adr;
static juint _Pi4x4[];
static address _Pi4x4_adr;
static juint _ones[];
static address _ones_adr;
public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
@ -69,4 +111,24 @@
static address k256_addr() { return _k256_adr; }
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
static address _ONEHALF_addr() { return _ONEHALF_adr; }
static address _P_2_addr() { return _P_2_adr; }
static address _SC_4_addr() { return _SC_4_adr; }
static address _Ctable_addr() { return _Ctable_adr; }
static address _SC_2_addr() { return _SC_2_adr; }
static address _SC_3_addr() { return _SC_3_adr; }
static address _SC_1_addr() { return _SC_1_adr; }
static address _PI_INV_TABLE_addr() { return _PI_INV_TABLE_adr; }
static address _PI_4_addr() { return _PI_4_adr; }
static address _PI32INV_addr() { return _PI32INV_adr; }
static address _SIGN_MASK_addr() { return _SIGN_MASK_adr; }
static address _P_1_addr() { return _P_1_adr; }
static address _P_3_addr() { return _P_3_adr; }
static address _NEG_ZERO_addr() { return _NEG_ZERO_adr; }
static address _L_2il0floatpacket_0_addr() { return _L_2il0floatpacket_0_adr; }
static address _Pi4Inv_addr() { return _Pi4Inv_adr; }
static address _Pi4x3_addr() { return _Pi4x3_adr; }
static address _Pi4x4_addr() { return _Pi4x4_adr; }
static address _ones_addr() { return _ones_adr; }
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP

View File

@ -345,13 +345,34 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ fld_d(Address(rsp, 1*wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ trigfunc('s');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_cos :
__ trigfunc('c');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_tan :
__ trigfunc('t');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dtan() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_sqrt:
__ fsqrt();
@ -362,26 +383,29 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_log:
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dlog() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
}
else {
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_log10:
__ flog10();
// Store to stack to convert 80bit precision back to 64bits
__ push_fTOS();
__ pop_fTOS();
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dlog10() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_pow:
__ fld_d(Address(rsp, 3*wordSize)); // second argument
__ subptr(rsp, 4 * wordSize);
__ fstp_d(Address(rsp, 0));
__ fstp_d(Address(rsp, 2 * wordSize));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dpow() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
@ -391,7 +415,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_exp:
__ subptr(rsp, 2*wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dexp() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));

View File

@ -29,6 +29,7 @@
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/templateInterpreterGenerator.hpp"
#include "runtime/arguments.hpp"
#include "runtime/sharedRuntime.hpp"
#define __ _masm->
@ -373,32 +374,60 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ sqrtsd(xmm0, Address(rsp, wordSize));
} else if (kind == Interpreter::java_lang_math_exp) {
__ movdbl(xmm0, Address(rsp, wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
if (StubRoutines::dexp() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
}
} else if (kind == Interpreter::java_lang_math_log) {
__ movdbl(xmm0, Address(rsp, wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
if (StubRoutines::dlog() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
}
} else if (kind == Interpreter::java_lang_math_log10) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dlog10() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
}
} else if (kind == Interpreter::java_lang_math_sin) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dsin() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
}
} else if (kind == Interpreter::java_lang_math_cos) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dcos() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
}
} else if (kind == Interpreter::java_lang_math_pow) {
__ movdbl(xmm1, Address(rsp, wordSize));
__ movdbl(xmm0, Address(rsp, 3 * wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
if (StubRoutines::dpow() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
}
} else if (kind == Interpreter::java_lang_math_tan) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dtan() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
}
} else {
__ fld_d(Address(rsp, wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ trigfunc('s');
break;
case Interpreter::java_lang_math_cos :
__ trigfunc('c');
break;
case Interpreter::java_lang_math_tan :
__ trigfunc('t');
break;
case Interpreter::java_lang_math_abs:
__ fabs();
break;
case Interpreter::java_lang_math_log10:
__ flog10();
break;
default :
ShouldNotReachHere();
}

View File

@ -9828,27 +9828,6 @@ instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
ins_pipe( pipe_slow );
%}
instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst(TanD src));
format %{ "DTAN $dst" %}
ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8)); // fstp st
ins_pipe( pipe_slow );
%}
instruct tanD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst(TanD dst));
effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
format %{ "DTAN $dst" %}
ins_encode( Push_SrcD(dst),
Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8), // fstp st
Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
instruct atanDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE<=1);
match(Set dst(AtanD dst src));
@ -9880,41 +9859,6 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
ins_pipe( pipe_slow );
%}
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack
match(Set dst (Log10D src));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fxch ; swap ST(0) with ST(1)
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "FLDLG2 \t\t\t#Log10\n\t"
"FXCH \n\t"
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
Opcode(0xD9), Opcode(0xC9), // fxch
Opcode(0xD9), Opcode(0xF1)); // fyl2x
ins_pipe( pipe_slow );
%}
instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
effect(KILL cr);
match(Set dst (Log10D src));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "FLDLG2 \t\t\t#Log10\n\t"
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
Push_SrcD(src),
Opcode(0xD9), Opcode(0xF1), // fyl2x
Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
//-------------Float Instructions-------------------------------
// Float Math

View File

@ -9897,34 +9897,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
ins_pipe(pipe_slow);
%}
// -----------Trig and Trancendental Instructions------------------------------
instruct tanD_reg(regD dst) %{
match(Set dst (TanD dst));
format %{ "dtan $dst\n\t" %}
ins_encode( Push_SrcXD(dst),
Opcode(0xD9), Opcode(0xF2), //fptan
Opcode(0xDD), Opcode(0xD8), //fstp st
Push_ResultXD(dst) );
ins_pipe( pipe_slow );
%}
instruct log10D_reg(regD dst) %{
// The source and result Double operands in XMM registers
match(Set dst (Log10D dst));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "fldlg2\t\t\t#Log10\n\t"
"fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
%}
ins_encode(Opcode(0xD9), Opcode(0xEC), // fldlg2
Push_SrcXD(dst),
Opcode(0xD9), Opcode(0xF1), // fyl2x
Push_ResultXD(dst));
ins_pipe( pipe_slow );
%}
//----------Arithmetic Conversion Instructions---------------------------------
instruct roundFloat_nop(regF dst)

View File

@ -728,31 +728,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
break;
}
case lir_tan:
case lir_log10: {
assert(op->as_Op2() != NULL, "must be");
LIR_Op2* op2 = (LIR_Op2*)op;
// On x86 tan/sin/cos need two temporary fpu stack slots and
// log/log10 need one so handle opr2 and tmp as temp inputs.
// Register input operand as temp to guarantee that it doesn't
// overlap with the input.
assert(op2->_info == NULL, "not used");
assert(op2->_tmp5->is_illegal(), "not used");
assert(op2->_opr1->is_valid(), "used");
do_input(op2->_opr1); do_temp(op2->_opr1);
if (op2->_opr2->is_valid()) do_temp(op2->_opr2);
if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
if (op2->_tmp2->is_valid()) do_temp(op2->_tmp2);
if (op2->_tmp3->is_valid()) do_temp(op2->_tmp3);
if (op2->_tmp4->is_valid()) do_temp(op2->_tmp4);
if (op2->_result->is_valid()) do_output(op2->_result);
break;
}
// LIR_Op3
case lir_idiv:
case lir_irem: {
@ -1740,8 +1715,6 @@ const char * LIR_Op::name() const {
case lir_rem: s = "rem"; break;
case lir_abs: s = "abs"; break;
case lir_sqrt: s = "sqrt"; break;
case lir_tan: s = "tan"; break;
case lir_log10: s = "log10"; break;
case lir_logic_and: s = "logic_and"; break;
case lir_logic_or: s = "logic_or"; break;
case lir_logic_xor: s = "logic_xor"; break;

View File

@ -320,9 +320,11 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
FUNCTION_CASE(entry, StubRoutines::dexp());
FUNCTION_CASE(entry, StubRoutines::dlog());
FUNCTION_CASE(entry, StubRoutines::dlog10());
FUNCTION_CASE(entry, StubRoutines::dpow());
FUNCTION_CASE(entry, StubRoutines::dsin());
FUNCTION_CASE(entry, StubRoutines::dcos());
FUNCTION_CASE(entry, StubRoutines::dtan());
#undef FUNCTION_CASE

View File

@ -171,7 +171,6 @@ macro(LoadN)
macro(LoadRange)
macro(LoadS)
macro(Lock)
macro(Log10D)
macro(Loop)
macro(LoopLimit)
macro(Mach)
@ -265,7 +264,6 @@ macro(SubI)
macro(SubL)
macro(TailCall)
macro(TailJump)
macro(TanD)
macro(ThreadLocal)
macro(Unlock)
macro(URShiftI)

View File

@ -1679,7 +1679,6 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
switch (id) {
case vmIntrinsics::_dabs: n = new AbsDNode( arg); break;
case vmIntrinsics::_dsqrt: n = new SqrtDNode(C, control(), arg); break;
case vmIntrinsics::_dlog10: n = new Log10DNode(C, control(), arg); break;
default: fatal_unexpected_iid(id); break;
}
set_result(_gvn.transform(n));
@ -1693,10 +1692,6 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
Node* arg = round_double_node(argument(0));
Node* n = NULL;
switch (id) {
case vmIntrinsics::_dtan: n = new TanDNode(C, control(), arg); break;
default: fatal_unexpected_iid(id); break;
}
n = _gvn.transform(n);
// Rounding required? Check for argument reduction!
@ -1814,15 +1809,18 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
return StubRoutines::dcos() != NULL ?
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dcos(), "dcos") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS");
case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN");
case vmIntrinsics::_dtan:
return StubRoutines::dtan() != NULL ?
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dtan(), "dtan") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN");
case vmIntrinsics::_dlog:
return StubRoutines::dlog() != NULL ?
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog(), "dlog") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG");
case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
case vmIntrinsics::_dlog10:
return StubRoutines::dlog10() != NULL ?
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog10(), "dlog10") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
// These intrinsics are supported on all hardware
case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;

View File

@ -1533,25 +1533,3 @@ const Type* SqrtDNode::Value(PhaseGVN* phase) const {
if( d < 0.0 ) return Type::DOUBLE;
return TypeD::make( sqrt( d ) );
}
//=============================================================================
//------------------------------Value------------------------------------------
// Compute tan
const Type* TanDNode::Value(PhaseGVN* phase) const {
const Type *t1 = phase->type( in(1) );
if( t1 == Type::TOP ) return Type::TOP;
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
double d = t1->getd();
return TypeD::make( StubRoutines::intrinsic_tan( d ) );
}
//=============================================================================
//------------------------------Value------------------------------------------
// Compute log10
const Type* Log10DNode::Value(PhaseGVN* phase) const {
const Type *t1 = phase->type( in(1) );
if( t1 == Type::TOP ) return Type::TOP;
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
double d = t1->getd();
return TypeD::make( StubRoutines::intrinsic_log10( d ) );
}

View File

@ -408,21 +408,6 @@ public:
virtual uint ideal_reg() const { return Op_RegD; }
};
//------------------------------TanDNode---------------------------------------
// tangens of a double
class TanDNode : public Node {
public:
TanDNode(Compile* C, Node *c,Node *in1) : Node(c, in1) {
init_flags(Flag_is_expensive);
C->add_expensive_node(this);
}
virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
virtual const Type* Value(PhaseGVN* phase) const;
};
//------------------------------AtanDNode--------------------------------------
// arcus tangens of a double
class AtanDNode : public Node {
@ -448,20 +433,6 @@ public:
virtual const Type* Value(PhaseGVN* phase) const;
};
//------------------------------Log10DNode---------------------------------------
// Log_10 of a double
class Log10DNode : public Node {
public:
Log10DNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
init_flags(Flag_is_expensive);
C->add_expensive_node(this);
}
virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
virtual const Type* Value(PhaseGVN* phase) const;
};
//-------------------------------ReverseBytesINode--------------------------------
// reverse bytes of an integer
class ReverseBytesINode : public Node {

View File

@ -154,11 +154,14 @@ address StubRoutines::_vectorizedMismatch = NULL;
address StubRoutines::_dexp = NULL;
address StubRoutines::_dlog = NULL;
address StubRoutines::_dlog10 = NULL;
address StubRoutines::_dpow = NULL;
address StubRoutines::_dsin = NULL;
address StubRoutines::_dcos = NULL;
address StubRoutines::_dlibm_sin_cos_huge = NULL;
address StubRoutines::_dlibm_reduce_pi04l = NULL;
address StubRoutines::_dlibm_tan_cot_huge = NULL;
address StubRoutines::_dtan = NULL;
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
double (* StubRoutines::_intrinsic_sin )(double) = NULL;

View File

@ -213,11 +213,14 @@ class StubRoutines: AllStatic {
static address _dexp;
static address _dlog;
static address _dlog10;
static address _dpow;
static address _dsin;
static address _dcos;
static address _dlibm_sin_cos_huge;
static address _dlibm_reduce_pi04l;
static address _dlibm_tan_cot_huge;
static address _dtan;
// These are versions of the java.lang.Math methods which perform
// the same operations as the intrinsic version. They are used for
@ -391,11 +394,14 @@ class StubRoutines: AllStatic {
static address dexp() { return _dexp; }
static address dlog() { return _dlog; }
static address dlog10() { return _dlog10; }
static address dpow() { return _dpow; }
static address dsin() { return _dsin; }
static address dcos() { return _dcos; }
static address dlibm_reduce_pi04l() { return _dlibm_reduce_pi04l; }
static address dlibm_sin_cos_huge() { return _dlibm_sin_cos_huge; }
static address dlibm_tan_cot_huge() { return _dlibm_tan_cot_huge; }
static address dtan() { return _dtan; }
static address select_fill_function(BasicType t, bool aligned, const char* &name);

View File

@ -859,9 +859,11 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
static_field(StubRoutines, _mulAdd, address) \
static_field(StubRoutines, _dexp, address) \
static_field(StubRoutines, _dlog, address) \
static_field(StubRoutines, _dlog10, address) \
static_field(StubRoutines, _dpow, address) \
static_field(StubRoutines, _dsin, address) \
static_field(StubRoutines, _dcos, address) \
static_field(StubRoutines, _dtan, address) \
static_field(StubRoutines, _vectorizedMismatch, address) \
static_field(StubRoutines, _jbyte_arraycopy, address) \
static_field(StubRoutines, _jshort_arraycopy, address) \
@ -2066,10 +2068,8 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
declare_c2_type(NegNode, Node) \
declare_c2_type(NegFNode, NegNode) \
declare_c2_type(NegDNode, NegNode) \
declare_c2_type(TanDNode, Node) \
declare_c2_type(AtanDNode, Node) \
declare_c2_type(SqrtDNode, Node) \
declare_c2_type(Log10DNode, Node) \
declare_c2_type(ReverseBytesINode, Node) \
declare_c2_type(ReverseBytesLNode, Node) \
declare_c2_type(ReductionNode, Node) \