8145688: Update for x86 pow in the math lib
Optimizes Math.pow() for 64 and 32 bit X86 architecture using Intel LIBM implementation. Reviewed-by: kvn
This commit is contained in:
parent
bc04deac15
commit
453650389f
@ -772,6 +772,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
||||
case 0x55: // andnps
|
||||
case 0x56: // orps
|
||||
case 0x57: // xorps
|
||||
case 0x58: // addpd
|
||||
case 0x59: // mulpd
|
||||
case 0x6E: // movd
|
||||
case 0x7E: // movd
|
||||
@ -3363,6 +3364,7 @@ void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
// The encoding for pextrw is SSE2 to support the LIBM implementation.
|
||||
void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
|
||||
assert(VM_Version::supports_sse2(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
@ -4361,6 +4363,17 @@ void Assembler::addpd(XMMRegister dst, XMMRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::addpd(XMMRegister dst, Address src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::addps(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
|
@ -1791,6 +1791,7 @@ private:
|
||||
|
||||
// Add Packed Floating-Point Values
|
||||
void addpd(XMMRegister dst, XMMRegister src);
|
||||
void addpd(XMMRegister dst, Address src);
|
||||
void addps(XMMRegister dst, XMMRegister src);
|
||||
void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
@ -2381,9 +2381,6 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
|
||||
// Should consider not saving rbx, if not necessary
|
||||
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
|
||||
break;
|
||||
case lir_pow :
|
||||
__ pow_with_fallback(op->as_Op2()->fpu_stack_size());
|
||||
break;
|
||||
default : ShouldNotReachHere();
|
||||
}
|
||||
} else {
|
||||
|
@ -810,7 +810,8 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
|
||||
void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||
assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
|
||||
|
||||
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog) {
|
||||
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
|
||||
x->id() == vmIntrinsics::_dpow) {
|
||||
do_LibmIntrinsic(x);
|
||||
return;
|
||||
}
|
||||
@ -824,7 +825,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||
case vmIntrinsics::_dcos:
|
||||
case vmIntrinsics::_dtan:
|
||||
case vmIntrinsics::_dlog10:
|
||||
case vmIntrinsics::_dpow:
|
||||
use_fpu = true;
|
||||
}
|
||||
} else {
|
||||
@ -874,7 +874,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||
case vmIntrinsics::_dcos: __ cos (calc_input, calc_result, tmp1, tmp2); break;
|
||||
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
|
||||
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
|
||||
case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
@ -890,11 +889,25 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
LIR_Opr calc_result = rlock_result(x);
|
||||
LIR_Opr result_reg = result_register_for(x->type());
|
||||
|
||||
CallingConvention* cc = NULL;
|
||||
|
||||
if (x->id() == vmIntrinsics::_dpow) {
|
||||
LIRItem value1(x->argument_at(1), this);
|
||||
|
||||
value1.set_destroys_register();
|
||||
|
||||
BasicTypeList signature(2);
|
||||
signature.append(T_DOUBLE);
|
||||
signature.append(T_DOUBLE);
|
||||
cc = frame_map()->c_calling_convention(&signature);
|
||||
value.load_item_force(cc->at(0));
|
||||
value1.load_item_force(cc->at(1));
|
||||
} else {
|
||||
BasicTypeList signature(1);
|
||||
signature.append(T_DOUBLE);
|
||||
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
|
||||
|
||||
cc = frame_map()->c_calling_convention(&signature);
|
||||
value.load_item_force(cc->at(0));
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
LIR_Opr tmp = FrameMap::fpu0_double_opr;
|
||||
@ -915,6 +928,14 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
#else
|
||||
@ -925,6 +946,9 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
case vmIntrinsics::_dlog:
|
||||
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
__ move(result_reg, calc_result);
|
||||
|
@ -840,53 +840,6 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
|
||||
break;
|
||||
}
|
||||
|
||||
case lir_pow: {
|
||||
// pow needs two temporary fpu stack slots, so there are two temporary
|
||||
// registers (stored in tmp1 and tmp2 of the operation).
|
||||
// the stack allocator must guarantee that the stack slots are really free,
|
||||
// otherwise there might be a stack overflow.
|
||||
assert(left->is_fpu_register(), "must be");
|
||||
assert(right->is_fpu_register(), "must be");
|
||||
assert(res->is_fpu_register(), "must be");
|
||||
|
||||
assert(op2->tmp1_opr()->is_fpu_register(), "tmp1 is the first temporary register");
|
||||
assert(op2->tmp2_opr()->is_fpu_register(), "tmp2 is the second temporary register");
|
||||
assert(fpu_num(left) != fpu_num(right) && fpu_num(left) != fpu_num(op2->tmp1_opr()) && fpu_num(left) != fpu_num(op2->tmp2_opr()) && fpu_num(left) != fpu_num(res), "need distinct temp registers");
|
||||
assert(fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(right) != fpu_num(op2->tmp2_opr()) && fpu_num(right) != fpu_num(res), "need distinct temp registers");
|
||||
assert(fpu_num(op2->tmp1_opr()) != fpu_num(op2->tmp2_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
|
||||
assert(fpu_num(op2->tmp2_opr()) != fpu_num(res), "need distinct temp registers");
|
||||
|
||||
insert_free_if_dead(op2->tmp1_opr());
|
||||
insert_free_if_dead(op2->tmp2_opr());
|
||||
|
||||
// Must bring both operands to top of stack with following operand ordering:
|
||||
// * fpu stack before pow: ... right left
|
||||
// * fpu stack after pow: ... left
|
||||
|
||||
insert_free_if_dead(res, right);
|
||||
|
||||
if (tos_offset(right) != 1) {
|
||||
insert_exchange(right);
|
||||
insert_exchange(1);
|
||||
}
|
||||
insert_exchange(left);
|
||||
assert(tos_offset(right) == 1, "check");
|
||||
assert(tos_offset(left) == 0, "check");
|
||||
|
||||
new_left = to_fpu_stack_top(left);
|
||||
new_right = to_fpu_stack(right);
|
||||
|
||||
op2->set_fpu_stack_size(sim()->stack_size());
|
||||
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
|
||||
|
||||
sim()->pop();
|
||||
|
||||
do_rename(right, res);
|
||||
|
||||
new_res = to_fpu_stack_top(res);
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
assert(false, "missed a fpu-operation");
|
||||
}
|
||||
|
@ -149,10 +149,15 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
|
||||
break;
|
||||
case Interpreter::java_lang_math_pow:
|
||||
__ fld_d(Address(rsp, 3*wordSize)); // second argument
|
||||
__ pow_with_fallback(0);
|
||||
// Store to stack to convert 80bit precision back to 64bits
|
||||
__ push_fTOS();
|
||||
__ pop_fTOS();
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ fstp_d(Address(rsp, 2 * wordSize));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
|
||||
}
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_exp:
|
||||
__ subptr(rsp, 2*wordSize);
|
||||
|
@ -255,6 +255,10 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
|
||||
} else if (kind == Interpreter::java_lang_math_log) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
} else if (kind == Interpreter::java_lang_math_pow) {
|
||||
__ movdbl(xmm1, Address(rsp, wordSize));
|
||||
__ movdbl(xmm0, Address(rsp, 3 * wordSize));
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ fld_d(Address(rsp, wordSize));
|
||||
switch (kind) {
|
||||
@ -273,11 +277,6 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
|
||||
case Interpreter::java_lang_math_log10:
|
||||
__ flog10();
|
||||
break;
|
||||
case Interpreter::java_lang_math_pow:
|
||||
__ fld_d(Address(rsp, 3*wordSize)); // second argument (one
|
||||
// empty stack slot)
|
||||
__ pow_with_fallback(0);
|
||||
break;
|
||||
default :
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
@ -3060,50 +3060,6 @@ void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::pow_exp_core_encoding() {
|
||||
// kills rax, rcx, rdx
|
||||
subptr(rsp,sizeof(jdouble));
|
||||
// computes 2^X. Stack: X ...
|
||||
// f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
|
||||
// keep it on the thread's stack to compute 2^int(X) later
|
||||
// then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
|
||||
// final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
|
||||
fld_s(0); // Stack: X X ...
|
||||
frndint(); // Stack: int(X) X ...
|
||||
fsuba(1); // Stack: int(X) X-int(X) ...
|
||||
fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ...
|
||||
f2xm1(); // Stack: 2^(X-int(X))-1 ...
|
||||
fld1(); // Stack: 1 2^(X-int(X))-1 ...
|
||||
faddp(1); // Stack: 2^(X-int(X))
|
||||
// computes 2^(int(X)): add exponent bias (1023) to int(X), then
|
||||
// shift int(X)+1023 to exponent position.
|
||||
// Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
|
||||
// bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
|
||||
// values so detect them and set result to NaN.
|
||||
movl(rax,Address(rsp,0));
|
||||
movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
|
||||
addl(rax, 1023);
|
||||
movl(rdx,rax);
|
||||
shll(rax,20);
|
||||
// Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
|
||||
addl(rdx,1);
|
||||
// Check that 1 < int(X)+1023+1 < 2048
|
||||
// in 3 steps:
|
||||
// 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
|
||||
// 2- (int(X)+1023+1)&-2048 != 0
|
||||
// 3- (int(X)+1023+1)&-2048 != 1
|
||||
// Do 2- first because addl just updated the flags.
|
||||
cmov32(Assembler::equal,rax,rcx);
|
||||
cmpl(rdx,1);
|
||||
cmov32(Assembler::equal,rax,rcx);
|
||||
testl(rdx,rcx);
|
||||
cmov32(Assembler::notEqual,rax,rcx);
|
||||
movl(Address(rsp,4),rax);
|
||||
movl(Address(rsp,0),0);
|
||||
fmul_d(Address(rsp,0)); // Stack: 2^X ...
|
||||
addptr(rsp,sizeof(jdouble));
|
||||
}
|
||||
|
||||
void MacroAssembler::increase_precision() {
|
||||
subptr(rsp, BytesPerWord);
|
||||
fnstcw(Address(rsp, 0));
|
||||
@ -3119,194 +3075,6 @@ void MacroAssembler::restore_precision() {
|
||||
addptr(rsp, BytesPerWord);
|
||||
}
|
||||
|
||||
void MacroAssembler::fast_pow() {
|
||||
// computes X^Y = 2^(Y * log2(X))
|
||||
// if fast computation is not possible, result is NaN. Requires
|
||||
// fallback from user of this macro.
|
||||
// increase precision for intermediate steps of the computation
|
||||
BLOCK_COMMENT("fast_pow {");
|
||||
increase_precision();
|
||||
fyl2x(); // Stack: (Y*log2(X)) ...
|
||||
pow_exp_core_encoding(); // Stack: exp(X) ...
|
||||
restore_precision();
|
||||
BLOCK_COMMENT("} fast_pow");
|
||||
}
|
||||
|
||||
void MacroAssembler::pow_or_exp(int num_fpu_regs_in_use) {
|
||||
// kills rax, rcx, rdx
|
||||
// pow and exp needs 2 extra registers on the fpu stack.
|
||||
Label slow_case, done;
|
||||
Register tmp = noreg;
|
||||
if (!VM_Version::supports_cmov()) {
|
||||
// fcmp needs a temporary so preserve rdx,
|
||||
tmp = rdx;
|
||||
}
|
||||
Register tmp2 = rax;
|
||||
Register tmp3 = rcx;
|
||||
|
||||
// Stack: X Y
|
||||
Label x_negative, y_not_2;
|
||||
|
||||
static double two = 2.0;
|
||||
ExternalAddress two_addr((address)&two);
|
||||
|
||||
// constant maybe too far on 64 bit
|
||||
lea(tmp2, two_addr);
|
||||
fld_d(Address(tmp2, 0)); // Stack: 2 X Y
|
||||
fcmp(tmp, 2, true, false); // Stack: X Y
|
||||
jcc(Assembler::parity, y_not_2);
|
||||
jcc(Assembler::notEqual, y_not_2);
|
||||
|
||||
fxch(); fpop(); // Stack: X
|
||||
fmul(0); // Stack: X*X
|
||||
|
||||
jmp(done);
|
||||
|
||||
bind(y_not_2);
|
||||
|
||||
fldz(); // Stack: 0 X Y
|
||||
fcmp(tmp, 1, true, false); // Stack: X Y
|
||||
jcc(Assembler::above, x_negative);
|
||||
|
||||
// X >= 0
|
||||
|
||||
fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
|
||||
fld_s(1); // Stack: X Y X Y
|
||||
fast_pow(); // Stack: X^Y X Y
|
||||
fcmp(tmp, 0, false, false); // Stack: X^Y X Y
|
||||
// X^Y not equal to itself: X^Y is NaN go to slow case.
|
||||
jcc(Assembler::parity, slow_case);
|
||||
// get rid of duplicate arguments. Stack: X^Y
|
||||
if (num_fpu_regs_in_use > 0) {
|
||||
fxch(); fpop();
|
||||
fxch(); fpop();
|
||||
} else {
|
||||
ffree(2);
|
||||
ffree(1);
|
||||
}
|
||||
jmp(done);
|
||||
|
||||
// X <= 0
|
||||
bind(x_negative);
|
||||
|
||||
fld_s(1); // Stack: Y X Y
|
||||
frndint(); // Stack: int(Y) X Y
|
||||
fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
|
||||
jcc(Assembler::notEqual, slow_case);
|
||||
|
||||
subptr(rsp, 8);
|
||||
|
||||
// For X^Y, when X < 0, Y has to be an integer and the final
|
||||
// result depends on whether it's odd or even. We just checked
|
||||
// that int(Y) == Y. We move int(Y) to gp registers as a 64 bit
|
||||
// integer to test its parity. If int(Y) is huge and doesn't fit
|
||||
// in the 64 bit integer range, the integer indefinite value will
|
||||
// end up in the gp registers. Huge numbers are all even, the
|
||||
// integer indefinite number is even so it's fine.
|
||||
|
||||
#ifdef ASSERT
|
||||
// Let's check we don't end up with an integer indefinite number
|
||||
// when not expected. First test for huge numbers: check whether
|
||||
// int(Y)+1 == int(Y) which is true for very large numbers and
|
||||
// those are all even. A 64 bit integer is guaranteed to not
|
||||
// overflow for numbers where y+1 != y (when precision is set to
|
||||
// double precision).
|
||||
Label y_not_huge;
|
||||
|
||||
fld1(); // Stack: 1 int(Y) X Y
|
||||
fadd(1); // Stack: 1+int(Y) int(Y) X Y
|
||||
|
||||
#ifdef _LP64
|
||||
// trip to memory to force the precision down from double extended
|
||||
// precision
|
||||
fstp_d(Address(rsp, 0));
|
||||
fld_d(Address(rsp, 0));
|
||||
#endif
|
||||
|
||||
fcmp(tmp, 1, true, false); // Stack: int(Y) X Y
|
||||
#endif
|
||||
|
||||
// move int(Y) as 64 bit integer to thread's stack
|
||||
fistp_d(Address(rsp,0)); // Stack: X Y
|
||||
|
||||
#ifdef ASSERT
|
||||
jcc(Assembler::notEqual, y_not_huge);
|
||||
|
||||
// Y is huge so we know it's even. It may not fit in a 64 bit
|
||||
// integer and we don't want the debug code below to see the
|
||||
// integer indefinite value so overwrite int(Y) on the thread's
|
||||
// stack with 0.
|
||||
movl(Address(rsp, 0), 0);
|
||||
movl(Address(rsp, 4), 0);
|
||||
|
||||
bind(y_not_huge);
|
||||
#endif
|
||||
|
||||
fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
|
||||
fld_s(1); // Stack: X Y X Y
|
||||
fabs(); // Stack: abs(X) Y X Y
|
||||
fast_pow(); // Stack: abs(X)^Y X Y
|
||||
fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
|
||||
// abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
|
||||
|
||||
pop(tmp2);
|
||||
NOT_LP64(pop(tmp3));
|
||||
jcc(Assembler::parity, slow_case);
|
||||
|
||||
#ifdef ASSERT
|
||||
// Check that int(Y) is not integer indefinite value (int
|
||||
// overflow). Shouldn't happen because for values that would
|
||||
// overflow, 1+int(Y)==Y which was tested earlier.
|
||||
#ifndef _LP64
|
||||
{
|
||||
Label integer;
|
||||
testl(tmp2, tmp2);
|
||||
jcc(Assembler::notZero, integer);
|
||||
cmpl(tmp3, 0x80000000);
|
||||
jcc(Assembler::notZero, integer);
|
||||
STOP("integer indefinite value shouldn't be seen here");
|
||||
bind(integer);
|
||||
}
|
||||
#else
|
||||
{
|
||||
Label integer;
|
||||
mov(tmp3, tmp2); // preserve tmp2 for parity check below
|
||||
shlq(tmp3, 1);
|
||||
jcc(Assembler::carryClear, integer);
|
||||
jcc(Assembler::notZero, integer);
|
||||
STOP("integer indefinite value shouldn't be seen here");
|
||||
bind(integer);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// get rid of duplicate arguments. Stack: X^Y
|
||||
if (num_fpu_regs_in_use > 0) {
|
||||
fxch(); fpop();
|
||||
fxch(); fpop();
|
||||
} else {
|
||||
ffree(2);
|
||||
ffree(1);
|
||||
}
|
||||
|
||||
testl(tmp2, 1);
|
||||
jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
|
||||
// X <= 0, Y even: X^Y = -abs(X)^Y
|
||||
|
||||
fchs(); // Stack: -abs(X)^Y Y
|
||||
jmp(done);
|
||||
|
||||
// slow case: runtime call
|
||||
bind(slow_case);
|
||||
|
||||
fpop(); // pop incorrect result or int(Y)
|
||||
|
||||
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2, num_fpu_regs_in_use);
|
||||
|
||||
// Come here with result in F-TOS
|
||||
bind(done);
|
||||
}
|
||||
|
||||
void MacroAssembler::fpop() {
|
||||
ffree();
|
||||
fincstp();
|
||||
|
@ -918,24 +918,19 @@ class MacroAssembler: public Assembler {
|
||||
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1 LP64_ONLY(COMMA Register tmp2));
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx NOT_LP64(COMMA Register tmp) LP64_ONLY(COMMA Register tmp1)
|
||||
LP64_ONLY(COMMA Register tmp2) LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4));
|
||||
|
||||
void increase_precision();
|
||||
void restore_precision();
|
||||
|
||||
// computes pow(x,y). Fallback to runtime call included.
|
||||
void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(num_fpu_regs_in_use); }
|
||||
|
||||
private:
|
||||
|
||||
// call runtime as a fallback for trig functions and pow/exp.
|
||||
void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use);
|
||||
|
||||
// computes 2^(Ylog2X); Ylog2X in ST(0)
|
||||
void pow_exp_core_encoding();
|
||||
|
||||
// computes pow(x,y) or exp(x). Fallback to runtime call included.
|
||||
void pow_or_exp(int num_fpu_regs_in_use);
|
||||
|
||||
// these are private because users should be doing movflt/movdbl
|
||||
|
||||
void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2126,15 +2126,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ trigfunc('t');
|
||||
__ ret(0);
|
||||
}
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "pow");
|
||||
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
|
||||
|
||||
__ fld_d(Address(rsp, 12));
|
||||
__ fld_d(Address(rsp, 4));
|
||||
__ pow_with_fallback(0);
|
||||
__ ret(0);
|
||||
}
|
||||
}
|
||||
|
||||
// AES intrinsic stubs
|
||||
@ -3082,6 +3073,30 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
}
|
||||
|
||||
address generate_libmPow() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = rbx;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Safefetch stubs.
|
||||
@ -3310,6 +3325,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (VM_Version::supports_sse2()) {
|
||||
StubRoutines::_dexp = generate_libmExp();
|
||||
StubRoutines::_dlog = generate_libmLog();
|
||||
StubRoutines::_dpow = generate_libmPow();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3025,21 +3025,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addq(rsp, 8);
|
||||
__ ret(0);
|
||||
}
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "pow");
|
||||
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
|
||||
|
||||
__ subq(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), xmm1);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ movdbl(Address(rsp, 0), xmm0);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ pow_with_fallback(0);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl(xmm0, Address(rsp, 0));
|
||||
__ addq(rsp, 8);
|
||||
__ ret(0);
|
||||
}
|
||||
}
|
||||
|
||||
// AES intrinsic stubs
|
||||
@ -4283,6 +4268,48 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
}
|
||||
|
||||
address generate_libmPow() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp1 = r8;
|
||||
const Register tmp2 = r9;
|
||||
const Register tmp3 = r10;
|
||||
const Register tmp4 = r11;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
#endif
|
||||
__ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
#undef __
|
||||
#define __ masm->
|
||||
@ -4478,6 +4505,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (VM_Version::supports_sse2()) {
|
||||
StubRoutines::_dexp = generate_libmExp();
|
||||
StubRoutines::_dlog = generate_libmLog();
|
||||
StubRoutines::_dpow = generate_libmPow();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9885,39 +9885,6 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
|
||||
predicate (UseSSE<=1);
|
||||
match(Set Y (PowD X Y)); // Raise X to the Yth power
|
||||
effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
|
||||
format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %}
|
||||
ins_encode %{
|
||||
__ subptr(rsp, 8);
|
||||
__ fld_s($X$$reg - 1);
|
||||
__ fast_pow();
|
||||
__ addptr(rsp, 8);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
|
||||
predicate (UseSSE>=2);
|
||||
match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
|
||||
effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
|
||||
format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %}
|
||||
ins_encode %{
|
||||
__ subptr(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), $src1$$XMMRegister);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ movdbl(Address(rsp, 0), $src0$$XMMRegister);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ fast_pow();
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
|
||||
__ addptr(rsp, 8);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
|
||||
predicate (UseSSE<=1);
|
||||
// The source Double operand on FPU stack
|
||||
|
@ -9864,24 +9864,6 @@ instruct log10D_reg(regD dst) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
|
||||
match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
|
||||
effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
|
||||
format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %}
|
||||
ins_encode %{
|
||||
__ subptr(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), $src1$$XMMRegister);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ movdbl(Address(rsp, 0), $src0$$XMMRegister);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ fast_pow();
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
|
||||
__ addptr(rsp, 8);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
//----------Arithmetic Conversion Instructions---------------------------------
|
||||
|
||||
instruct roundFloat_nop(regF dst)
|
||||
|
@ -4018,7 +4018,6 @@ int MatchRule::is_expensive() const {
|
||||
strcmp(opType,"ModD")==0 ||
|
||||
strcmp(opType,"ModF")==0 ||
|
||||
strcmp(opType,"ModI")==0 ||
|
||||
strcmp(opType,"PowD")==0 ||
|
||||
strcmp(opType,"SinD")==0 ||
|
||||
strcmp(opType,"SqrtD")==0 ||
|
||||
strcmp(opType,"TanD")==0 ||
|
||||
|
@ -754,31 +754,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
|
||||
break;
|
||||
}
|
||||
|
||||
case lir_pow: {
|
||||
assert(op->as_Op2() != NULL, "must be");
|
||||
LIR_Op2* op2 = (LIR_Op2*)op;
|
||||
|
||||
// On x86 pow needs two temporary fpu stack slots: tmp1 and
|
||||
// tmp2. Register input operands as temps to guarantee that it
|
||||
// doesn't overlap with the temporary slots.
|
||||
assert(op2->_info == NULL, "not used");
|
||||
assert(op2->_opr1->is_valid() && op2->_opr2->is_valid(), "used");
|
||||
assert(op2->_tmp1->is_valid() && op2->_tmp2->is_valid() && op2->_tmp3->is_valid()
|
||||
&& op2->_tmp4->is_valid() && op2->_tmp5->is_valid(), "used");
|
||||
assert(op2->_result->is_valid(), "used");
|
||||
|
||||
do_input(op2->_opr1); do_temp(op2->_opr1);
|
||||
do_input(op2->_opr2); do_temp(op2->_opr2);
|
||||
do_temp(op2->_tmp1);
|
||||
do_temp(op2->_tmp2);
|
||||
do_temp(op2->_tmp3);
|
||||
do_temp(op2->_tmp4);
|
||||
do_temp(op2->_tmp5);
|
||||
do_output(op2->_result);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// LIR_Op3
|
||||
case lir_idiv:
|
||||
case lir_irem: {
|
||||
@ -1769,7 +1744,6 @@ const char * LIR_Op::name() const {
|
||||
case lir_cos: s = "cos"; break;
|
||||
case lir_tan: s = "tan"; break;
|
||||
case lir_log10: s = "log10"; break;
|
||||
case lir_pow: s = "pow"; break;
|
||||
case lir_logic_and: s = "logic_and"; break;
|
||||
case lir_logic_or: s = "logic_or"; break;
|
||||
case lir_logic_xor: s = "logic_xor"; break;
|
||||
|
@ -962,7 +962,6 @@ enum LIR_Code {
|
||||
, lir_cos
|
||||
, lir_tan
|
||||
, lir_log10
|
||||
, lir_pow
|
||||
, lir_logic_and
|
||||
, lir_logic_or
|
||||
, lir_logic_xor
|
||||
@ -2198,7 +2197,6 @@ class LIR_List: public CompilationResourceObj {
|
||||
void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); }
|
||||
void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); }
|
||||
void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); }
|
||||
void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); }
|
||||
|
||||
void add (LIR_Opr left, LIR_Opr right, LIR_Opr res) { append(new LIR_Op2(lir_add, left, right, res)); }
|
||||
void sub (LIR_Opr left, LIR_Opr right, LIR_Opr res, CodeEmitInfo* info = NULL) { append(new LIR_Op2(lir_sub, left, right, res, info)); }
|
||||
|
@ -740,7 +740,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
|
||||
case lir_tan:
|
||||
case lir_cos:
|
||||
case lir_log10:
|
||||
case lir_pow:
|
||||
intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
|
||||
break;
|
||||
|
||||
|
@ -6603,7 +6603,6 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
|
||||
case lir_cos:
|
||||
case lir_abs:
|
||||
case lir_log10:
|
||||
case lir_pow:
|
||||
case lir_logic_and:
|
||||
case lir_logic_or:
|
||||
case lir_logic_xor:
|
||||
|
@ -319,6 +319,7 @@ const char* Runtime1::name_for_address(address entry) {
|
||||
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
|
||||
FUNCTION_CASE(entry, StubRoutines::dexp());
|
||||
FUNCTION_CASE(entry, StubRoutines::dlog());
|
||||
FUNCTION_CASE(entry, StubRoutines::dpow());
|
||||
|
||||
#undef FUNCTION_CASE
|
||||
|
||||
|
@ -216,7 +216,6 @@ macro(PartialSubtypeCheck)
|
||||
macro(Phi)
|
||||
macro(PopCountI)
|
||||
macro(PopCountL)
|
||||
macro(PowD)
|
||||
macro(PrefetchAllocation)
|
||||
macro(Proj)
|
||||
macro(RShiftI)
|
||||
|
@ -230,8 +230,6 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_math_negateExactL();
|
||||
bool inline_math_subtractExactI(bool is_decrement);
|
||||
bool inline_math_subtractExactL(bool is_decrement);
|
||||
bool inline_pow();
|
||||
Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
|
||||
bool inline_min_max(vmIntrinsics::ID id);
|
||||
bool inline_notify(vmIntrinsics::ID id);
|
||||
Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
|
||||
@ -1718,243 +1716,6 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
|
||||
//-------------------
|
||||
//result=(result.isNaN())? funcAddr():result;
|
||||
// Check: If isNaN() by checking result!=result? then either trap
|
||||
// or go to runtime
|
||||
Node* cmpisnan = _gvn.transform(new CmpDNode(result, result));
|
||||
// Build the boolean node
|
||||
Node* bolisnum = _gvn.transform(new BoolNode(cmpisnan, BoolTest::eq));
|
||||
|
||||
if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
|
||||
{ BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
|
||||
// The pow or exp intrinsic returned a NaN, which requires a call
|
||||
// to the runtime. Recompile with the runtime call.
|
||||
uncommon_trap(Deoptimization::Reason_intrinsic,
|
||||
Deoptimization::Action_make_not_entrant);
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
// If this inlining ever returned NaN in the past, we compile a call
|
||||
// to the runtime to properly handle corner cases
|
||||
|
||||
IfNode* iff = create_and_xform_if(control(), bolisnum, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
|
||||
Node* if_slow = _gvn.transform(new IfFalseNode(iff));
|
||||
Node* if_fast = _gvn.transform(new IfTrueNode(iff));
|
||||
|
||||
if (!if_slow->is_top()) {
|
||||
RegionNode* result_region = new RegionNode(3);
|
||||
PhiNode* result_val = new PhiNode(result_region, Type::DOUBLE);
|
||||
|
||||
result_region->init_req(1, if_fast);
|
||||
result_val->init_req(1, result);
|
||||
|
||||
set_control(if_slow);
|
||||
|
||||
const TypePtr* no_memory_effects = NULL;
|
||||
Node* rt = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
|
||||
no_memory_effects,
|
||||
x, top(), y, y ? top() : NULL);
|
||||
Node* value = _gvn.transform(new ProjNode(rt, TypeFunc::Parms+0));
|
||||
#ifdef ASSERT
|
||||
Node* value_top = _gvn.transform(new ProjNode(rt, TypeFunc::Parms+1));
|
||||
assert(value_top == top(), "second value must be top");
|
||||
#endif
|
||||
|
||||
result_region->init_req(2, control());
|
||||
result_val->init_req(2, value);
|
||||
set_control(_gvn.transform(result_region));
|
||||
return _gvn.transform(result_val);
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------inline_pow-------------------------------------
|
||||
// Inline power instructions, if possible.
|
||||
bool LibraryCallKit::inline_pow() {
|
||||
// Pseudocode for pow
|
||||
// if (y == 2) {
|
||||
// return x * x;
|
||||
// } else {
|
||||
// if (x <= 0.0) {
|
||||
// long longy = (long)y;
|
||||
// if ((double)longy == y) { // if y is long
|
||||
// if (y + 1 == y) longy = 0; // huge number: even
|
||||
// result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
|
||||
// } else {
|
||||
// result = NaN;
|
||||
// }
|
||||
// } else {
|
||||
// result = DPow(x,y);
|
||||
// }
|
||||
// if (result != result)? {
|
||||
// result = uncommon_trap() or runtime_call();
|
||||
// }
|
||||
// return result;
|
||||
// }
|
||||
|
||||
Node* x = round_double_node(argument(0));
|
||||
Node* y = round_double_node(argument(2));
|
||||
|
||||
Node* result = NULL;
|
||||
|
||||
Node* const_two_node = makecon(TypeD::make(2.0));
|
||||
Node* cmp_node = _gvn.transform(new CmpDNode(y, const_two_node));
|
||||
Node* bool_node = _gvn.transform(new BoolNode(cmp_node, BoolTest::eq));
|
||||
IfNode* if_node = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
|
||||
Node* if_true = _gvn.transform(new IfTrueNode(if_node));
|
||||
Node* if_false = _gvn.transform(new IfFalseNode(if_node));
|
||||
|
||||
RegionNode* region_node = new RegionNode(3);
|
||||
region_node->init_req(1, if_true);
|
||||
|
||||
Node* phi_node = new PhiNode(region_node, Type::DOUBLE);
|
||||
// special case for x^y where y == 2, we can convert it to x * x
|
||||
phi_node->init_req(1, _gvn.transform(new MulDNode(x, x)));
|
||||
|
||||
// set control to if_false since we will now process the false branch
|
||||
set_control(if_false);
|
||||
|
||||
if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
|
||||
// Short form: skip the fancy tests and just check for NaN result.
|
||||
result = _gvn.transform(new PowDNode(C, control(), x, y));
|
||||
} else {
|
||||
// If this inlining ever returned NaN in the past, include all
|
||||
// checks + call to the runtime.
|
||||
|
||||
// Set the merge point for If node with condition of (x <= 0.0)
|
||||
// There are four possible paths to region node and phi node
|
||||
RegionNode *r = new RegionNode(4);
|
||||
Node *phi = new PhiNode(r, Type::DOUBLE);
|
||||
|
||||
// Build the first if node: if (x <= 0.0)
|
||||
// Node for 0 constant
|
||||
Node *zeronode = makecon(TypeD::ZERO);
|
||||
// Check x:0
|
||||
Node *cmp = _gvn.transform(new CmpDNode(x, zeronode));
|
||||
// Check: If (x<=0) then go complex path
|
||||
Node *bol1 = _gvn.transform(new BoolNode( cmp, BoolTest::le ));
|
||||
// Branch either way
|
||||
IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
|
||||
// Fast path taken; set region slot 3
|
||||
Node *fast_taken = _gvn.transform(new IfFalseNode(if1));
|
||||
r->init_req(3,fast_taken); // Capture fast-control
|
||||
|
||||
// Fast path not-taken, i.e. slow path
|
||||
Node *complex_path = _gvn.transform(new IfTrueNode(if1));
|
||||
|
||||
// Set fast path result
|
||||
Node *fast_result = _gvn.transform(new PowDNode(C, control(), x, y));
|
||||
phi->init_req(3, fast_result);
|
||||
|
||||
// Complex path
|
||||
// Build the second if node (if y is long)
|
||||
// Node for (long)y
|
||||
Node *longy = _gvn.transform(new ConvD2LNode(y));
|
||||
// Node for (double)((long) y)
|
||||
Node *doublelongy= _gvn.transform(new ConvL2DNode(longy));
|
||||
// Check (double)((long) y) : y
|
||||
Node *cmplongy= _gvn.transform(new CmpDNode(doublelongy, y));
|
||||
// Check if (y isn't long) then go to slow path
|
||||
|
||||
Node *bol2 = _gvn.transform(new BoolNode( cmplongy, BoolTest::ne ));
|
||||
// Branch either way
|
||||
IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
|
||||
Node* ylong_path = _gvn.transform(new IfFalseNode(if2));
|
||||
|
||||
Node *slow_path = _gvn.transform(new IfTrueNode(if2));
|
||||
|
||||
// Calculate DPow(abs(x), y)*(1 & (long)y)
|
||||
// Node for constant 1
|
||||
Node *conone = longcon(1);
|
||||
// 1& (long)y
|
||||
Node *signnode= _gvn.transform(new AndLNode(conone, longy));
|
||||
|
||||
// A huge number is always even. Detect a huge number by checking
|
||||
// if y + 1 == y and set integer to be tested for parity to 0.
|
||||
// Required for corner case:
|
||||
// (long)9.223372036854776E18 = max_jlong
|
||||
// (double)(long)9.223372036854776E18 = 9.223372036854776E18
|
||||
// max_jlong is odd but 9.223372036854776E18 is even
|
||||
Node* yplus1 = _gvn.transform(new AddDNode(y, makecon(TypeD::make(1))));
|
||||
Node *cmpyplus1= _gvn.transform(new CmpDNode(yplus1, y));
|
||||
Node *bolyplus1 = _gvn.transform(new BoolNode( cmpyplus1, BoolTest::eq ));
|
||||
Node* correctedsign = NULL;
|
||||
if (ConditionalMoveLimit != 0) {
|
||||
correctedsign = _gvn.transform(CMoveNode::make(NULL, bolyplus1, signnode, longcon(0), TypeLong::LONG));
|
||||
} else {
|
||||
IfNode *ifyplus1 = create_and_xform_if(ylong_path,bolyplus1, PROB_FAIR, COUNT_UNKNOWN);
|
||||
RegionNode *r = new RegionNode(3);
|
||||
Node *phi = new PhiNode(r, TypeLong::LONG);
|
||||
r->init_req(1, _gvn.transform(new IfFalseNode(ifyplus1)));
|
||||
r->init_req(2, _gvn.transform(new IfTrueNode(ifyplus1)));
|
||||
phi->init_req(1, signnode);
|
||||
phi->init_req(2, longcon(0));
|
||||
correctedsign = _gvn.transform(phi);
|
||||
ylong_path = _gvn.transform(r);
|
||||
record_for_igvn(r);
|
||||
}
|
||||
|
||||
// zero node
|
||||
Node *conzero = longcon(0);
|
||||
// Check (1&(long)y)==0?
|
||||
Node *cmpeq1 = _gvn.transform(new CmpLNode(correctedsign, conzero));
|
||||
// Check if (1&(long)y)!=0?, if so the result is negative
|
||||
Node *bol3 = _gvn.transform(new BoolNode( cmpeq1, BoolTest::ne ));
|
||||
// abs(x)
|
||||
Node *absx=_gvn.transform(new AbsDNode(x));
|
||||
// abs(x)^y
|
||||
Node *absxpowy = _gvn.transform(new PowDNode(C, control(), absx, y));
|
||||
// -abs(x)^y
|
||||
Node *negabsxpowy = _gvn.transform(new NegDNode (absxpowy));
|
||||
// (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
|
||||
Node *signresult = NULL;
|
||||
if (ConditionalMoveLimit != 0) {
|
||||
signresult = _gvn.transform(CMoveNode::make(NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
|
||||
} else {
|
||||
IfNode *ifyeven = create_and_xform_if(ylong_path,bol3, PROB_FAIR, COUNT_UNKNOWN);
|
||||
RegionNode *r = new RegionNode(3);
|
||||
Node *phi = new PhiNode(r, Type::DOUBLE);
|
||||
r->init_req(1, _gvn.transform(new IfFalseNode(ifyeven)));
|
||||
r->init_req(2, _gvn.transform(new IfTrueNode(ifyeven)));
|
||||
phi->init_req(1, absxpowy);
|
||||
phi->init_req(2, negabsxpowy);
|
||||
signresult = _gvn.transform(phi);
|
||||
ylong_path = _gvn.transform(r);
|
||||
record_for_igvn(r);
|
||||
}
|
||||
// Set complex path fast result
|
||||
r->init_req(2, ylong_path);
|
||||
phi->init_req(2, signresult);
|
||||
|
||||
static const jlong nan_bits = CONST64(0x7ff8000000000000);
|
||||
Node *slow_result = makecon(TypeD::make(*(double*)&nan_bits)); // return NaN
|
||||
r->init_req(1,slow_path);
|
||||
phi->init_req(1,slow_result);
|
||||
|
||||
// Post merge
|
||||
set_control(_gvn.transform(r));
|
||||
record_for_igvn(r);
|
||||
result = _gvn.transform(phi);
|
||||
}
|
||||
|
||||
result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
|
||||
|
||||
// control from finish_pow_exp is now input to the region node
|
||||
region_node->set_req(2, control());
|
||||
// the result from finish_pow_exp is now input to the phi node
|
||||
phi_node->init_req(2, result);
|
||||
set_control(_gvn.transform(region_node));
|
||||
record_for_igvn(region_node);
|
||||
set_result(_gvn.transform(phi_node));
|
||||
|
||||
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------runtime_math-----------------------------
|
||||
bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
|
||||
assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
|
||||
@ -2005,7 +1766,9 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
||||
return StubRoutines::dexp() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dexp(), "dexp") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp), "EXP");
|
||||
case vmIntrinsics::_dpow: return Matcher::has_match_rule(Op_PowD) ? inline_pow() :
|
||||
case vmIntrinsics::_dpow:
|
||||
return StubRoutines::dpow() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_DD_D_Type(), StubRoutines::dpow(), "dpow") :
|
||||
runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow), "POW");
|
||||
#undef FN_PTR
|
||||
|
||||
|
@ -1519,17 +1519,3 @@ const Type *Log10DNode::Value( PhaseTransform *phase ) const {
|
||||
return TypeD::make( StubRoutines::intrinsic_log10( d ) );
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
// Compute pow
|
||||
const Type *PowDNode::Value( PhaseTransform *phase ) const {
|
||||
const Type *t1 = phase->type( in(1) );
|
||||
if( t1 == Type::TOP ) return Type::TOP;
|
||||
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
|
||||
const Type *t2 = phase->type( in(2) );
|
||||
if( t2 == Type::TOP ) return Type::TOP;
|
||||
if( t2->base() != Type::DoubleCon ) return Type::DOUBLE;
|
||||
double d1 = t1->getd();
|
||||
double d2 = t2->getd();
|
||||
return TypeD::make( StubRoutines::intrinsic_pow( d1, d2 ) );
|
||||
}
|
||||
|
@ -491,20 +491,6 @@ public:
|
||||
virtual const Type *Value( PhaseTransform *phase ) const;
|
||||
};
|
||||
|
||||
//------------------------------PowDNode---------------------------------------
|
||||
// Raise a double to a double power
|
||||
class PowDNode : public Node {
|
||||
public:
|
||||
PowDNode(Compile* C, Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {
|
||||
init_flags(Flag_is_expensive);
|
||||
C->add_expensive_node(this);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return Type::DOUBLE; }
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
virtual const Type *Value( PhaseTransform *phase ) const;
|
||||
};
|
||||
|
||||
//-------------------------------ReverseBytesINode--------------------------------
|
||||
// reverse bytes of an integer
|
||||
class ReverseBytesINode : public Node {
|
||||
|
@ -153,9 +153,9 @@ address StubRoutines::_vectorizedMismatch = NULL;
|
||||
|
||||
address StubRoutines::_dexp = NULL;
|
||||
address StubRoutines::_dlog = NULL;
|
||||
address StubRoutines::_dpow = NULL;
|
||||
|
||||
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
|
||||
double (* StubRoutines::_intrinsic_pow )(double, double) = NULL;
|
||||
double (* StubRoutines::_intrinsic_sin )(double) = NULL;
|
||||
double (* StubRoutines::_intrinsic_cos )(double) = NULL;
|
||||
double (* StubRoutines::_intrinsic_tan )(double) = NULL;
|
||||
|
@ -212,6 +212,7 @@ class StubRoutines: AllStatic {
|
||||
|
||||
static address _dexp;
|
||||
static address _dlog;
|
||||
static address _dpow;
|
||||
|
||||
// These are versions of the java.lang.Math methods which perform
|
||||
// the same operations as the intrinsic version. They are used for
|
||||
@ -384,6 +385,7 @@ class StubRoutines: AllStatic {
|
||||
|
||||
static address dexp() { return _dexp; }
|
||||
static address dlog() { return _dlog; }
|
||||
static address dpow() { return _dpow; }
|
||||
|
||||
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
||||
|
||||
|
@ -860,6 +860,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
static_field(StubRoutines, _mulAdd, address) \
|
||||
static_field(StubRoutines, _dexp, address) \
|
||||
static_field(StubRoutines, _dlog, address) \
|
||||
static_field(StubRoutines, _dpow, address) \
|
||||
static_field(StubRoutines, _vectorizedMismatch, address) \
|
||||
static_field(StubRoutines, _jbyte_arraycopy, address) \
|
||||
static_field(StubRoutines, _jshort_arraycopy, address) \
|
||||
@ -2058,7 +2059,6 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
declare_c2_type(AtanDNode, Node) \
|
||||
declare_c2_type(SqrtDNode, Node) \
|
||||
declare_c2_type(Log10DNode, Node) \
|
||||
declare_c2_type(PowDNode, Node) \
|
||||
declare_c2_type(ReverseBytesINode, Node) \
|
||||
declare_c2_type(ReverseBytesLNode, Node) \
|
||||
declare_c2_type(ReductionNode, Node) \
|
||||
|
Loading…
x
Reference in New Issue
Block a user