7133857: exp() and pow() should use the x87 ISA on x86

Use x87 instructions to implement exp() and pow() in interpreter/c1/c2.

Reviewed-by: kvn, never, twisti
This commit is contained in:
Roland Westrelin 2012-05-15 10:10:23 +02:00
parent eb4a860bc3
commit b305cf722e
26 changed files with 783 additions and 279 deletions

View File

@ -738,7 +738,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
case vmIntrinsics::_dlog: // fall through
case vmIntrinsics::_dsin: // fall through
case vmIntrinsics::_dtan: // fall through
case vmIntrinsics::_dcos: {
case vmIntrinsics::_dcos: // fall through
case vmIntrinsics::_dexp: {
assert(x->number_of_arguments() == 1, "wrong type");
address runtime_entry = NULL;
@ -758,12 +759,23 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
case vmIntrinsics::_dlog10:
runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
break;
case vmIntrinsics::_dexp:
runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
break;
default:
ShouldNotReachHere();
}
LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
set_result(x, result);
break;
}
case vmIntrinsics::_dpow: {
assert(x->number_of_arguments() == 2, "wrong type");
address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL);
set_result(x, result);
break;
}
}
}

View File

@ -403,6 +403,8 @@ address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter:
case Interpreter::java_lang_math_abs : break;
case Interpreter::java_lang_math_log : break;
case Interpreter::java_lang_math_log10 : break;
case Interpreter::java_lang_math_pow : break;
case Interpreter::java_lang_math_exp : break;
case Interpreter::java_lang_ref_reference_get
: entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
default : ShouldNotReachHere(); break;

View File

@ -3578,6 +3578,21 @@ void Assembler::fyl2x() {
emit_byte(0xF1);
}
void Assembler::frndint() {
emit_byte(0xD9);
emit_byte(0xFC);
}
void Assembler::f2xm1() {
emit_byte(0xD9);
emit_byte(0xF0);
}
void Assembler::fldl2e() {
emit_byte(0xD9);
emit_byte(0xEA);
}
// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
@ -6868,6 +6883,242 @@ void MacroAssembler::fldcw(AddressLiteral src) {
Assembler::fldcw(as_Address(src));
}
void MacroAssembler::pow_exp_core_encoding() {
// kills rax, rcx, rdx
subptr(rsp,sizeof(jdouble));
// computes 2^X. Stack: X ...
// f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
// keep it on the thread's stack to compute 2^int(X) later
// then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
// final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
fld_s(0); // Stack: X X ...
frndint(); // Stack: int(X) X ...
fsuba(1); // Stack: int(X) X-int(X) ...
fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ...
f2xm1(); // Stack: 2^(X-int(X))-1 ...
fld1(); // Stack: 1 2^(X-int(X))-1 ...
faddp(1); // Stack: 2^(X-int(X))
// computes 2^(int(X)): add exponent bias (1023) to int(X), then
// shift int(X)+1023 to exponent position.
// Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
// bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
// values so detect them and set result to NaN.
movl(rax,Address(rsp,0));
movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
addl(rax, 1023);
movl(rdx,rax);
shll(rax,20);
// Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
addl(rdx,1);
// Check that 1 < int(X)+1023+1 < 2048
// in 3 steps:
// 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
// 2- (int(X)+1023+1)&-2048 != 0
// 3- (int(X)+1023+1)&-2048 != 1
// Do 2- first because addl just updated the flags.
cmov32(Assembler::equal,rax,rcx);
cmpl(rdx,1);
cmov32(Assembler::equal,rax,rcx);
testl(rdx,rcx);
cmov32(Assembler::notEqual,rax,rcx);
movl(Address(rsp,4),rax);
movl(Address(rsp,0),0);
fmul_d(Address(rsp,0)); // Stack: 2^X ...
addptr(rsp,sizeof(jdouble));
}
void MacroAssembler::fast_pow() {
// computes X^Y = 2^(Y * log2(X))
// if fast computation is not possible, result is NaN. Requires
// fallback from user of this macro.
fyl2x(); // Stack: (Y*log2(X)) ...
pow_exp_core_encoding(); // Stack: exp(X) ...
}
void MacroAssembler::fast_exp() {
// computes exp(X) = 2^(X * log2(e))
// if fast computation is not possible, result is NaN. Requires
// fallback from user of this macro.
fldl2e(); // Stack: log2(e) X ...
fmulp(1); // Stack: (X*log2(e)) ...
pow_exp_core_encoding(); // Stack: exp(X) ...
}
void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
// kills rax, rcx, rdx
// pow and exp needs 2 extra registers on the fpu stack.
Label slow_case, done;
Register tmp = noreg;
if (!VM_Version::supports_cmov()) {
// fcmp needs a temporary so preserve rdx,
tmp = rdx;
}
Register tmp2 = rax;
NOT_LP64(Register tmp3 = rcx;)
if (is_exp) {
// Stack: X
fld_s(0); // duplicate argument for runtime call. Stack: X X
fast_exp(); // Stack: exp(X) X
fcmp(tmp, 0, false, false); // Stack: exp(X) X
// exp(X) not equal to itself: exp(X) is NaN go to slow case.
jcc(Assembler::parity, slow_case);
// get rid of duplicate argument. Stack: exp(X)
if (num_fpu_regs_in_use > 0) {
fxch();
fpop();
} else {
ffree(1);
}
jmp(done);
} else {
// Stack: X Y
Label x_negative, y_odd;
fldz(); // Stack: 0 X Y
fcmp(tmp, 1, true, false); // Stack: X Y
jcc(Assembler::above, x_negative);
// X >= 0
fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
fld_s(1); // Stack: X Y X Y
fast_pow(); // Stack: X^Y X Y
fcmp(tmp, 0, false, false); // Stack: X^Y X Y
// X^Y not equal to itself: X^Y is NaN go to slow case.
jcc(Assembler::parity, slow_case);
// get rid of duplicate arguments. Stack: X^Y
if (num_fpu_regs_in_use > 0) {
fxch(); fpop();
fxch(); fpop();
} else {
ffree(2);
ffree(1);
}
jmp(done);
// X <= 0
bind(x_negative);
fld_s(1); // Stack: Y X Y
frndint(); // Stack: int(Y) X Y
fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
jcc(Assembler::notEqual, slow_case);
subptr(rsp, 8);
// For X^Y, when X < 0, Y has to be an integer and the final
// result depends on whether it's odd or even. We just checked
// that int(Y) == Y. We move int(Y) to gp registers as a 64 bit
// integer to test its parity. If int(Y) is huge and doesn't fit
// in the 64 bit integer range, the integer indefinite value will
// end up in the gp registers. Huge numbers are all even, the
// integer indefinite number is even so it's fine.
#ifdef ASSERT
// Let's check we don't end up with an integer indefinite number
// when not expected. First test for huge numbers: check whether
// int(Y)+1 == int(Y) which is true for very large numbers and
// those are all even. A 64 bit integer is guaranteed to not
// overflow for numbers where y+1 != y (when precision is set to
// double precision).
Label y_not_huge;
fld1(); // Stack: 1 int(Y) X Y
fadd(1); // Stack: 1+int(Y) int(Y) X Y
#ifdef _LP64
// trip to memory to force the precision down from double extended
// precision
fstp_d(Address(rsp, 0));
fld_d(Address(rsp, 0));
#endif
fcmp(tmp, 1, true, false); // Stack: int(Y) X Y
#endif
// move int(Y) as 64 bit integer to thread's stack
fistp_d(Address(rsp,0)); // Stack: X Y
#ifdef ASSERT
jcc(Assembler::notEqual, y_not_huge);
// Y is huge so we know it's even. It may not fit in a 64 bit
// integer and we don't want the debug code below to see the
// integer indefinite value so overwrite int(Y) on the thread's
// stack with 0.
movl(Address(rsp, 0), 0);
movl(Address(rsp, 4), 0);
bind(y_not_huge);
#endif
fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
fld_s(1); // Stack: X Y X Y
fabs(); // Stack: abs(X) Y X Y
fast_pow(); // Stack: abs(X)^Y X Y
fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
// abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
pop(tmp2);
NOT_LP64(pop(tmp3));
jcc(Assembler::parity, slow_case);
#ifdef ASSERT
// Check that int(Y) is not integer indefinite value (int
// overflow). Shouldn't happen because for values that would
// overflow, 1+int(Y)==Y which was tested earlier.
#ifndef _LP64
{
Label integer;
testl(tmp2, tmp2);
jcc(Assembler::notZero, integer);
cmpl(tmp3, 0x80000000);
jcc(Assembler::notZero, integer);
stop("integer indefinite value shouldn't be seen here");
bind(integer);
}
#else
{
Label integer;
shlq(tmp2, 1);
jcc(Assembler::carryClear, integer);
jcc(Assembler::notZero, integer);
stop("integer indefinite value shouldn't be seen here");
bind(integer);
}
#endif
#endif
// get rid of duplicate arguments. Stack: X^Y
if (num_fpu_regs_in_use > 0) {
fxch(); fpop();
fxch(); fpop();
} else {
ffree(2);
ffree(1);
}
testl(tmp2, 1);
jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
// X <= 0, Y even: X^Y = -abs(X)^Y
fchs(); // Stack: -abs(X)^Y Y
jmp(done);
}
// slow case: runtime call
bind(slow_case);
fpop(); // pop incorrect result or int(Y)
fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
is_exp ? 1 : 2, num_fpu_regs_in_use);
// Come here with result in F-TOS
bind(done);
}
void MacroAssembler::fpop() {
ffree();
fincstp();
@ -8045,6 +8296,144 @@ void MacroAssembler::incr_allocated_bytes(Register thread,
#endif
}
void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
pusha();
// if we are coming from c1, xmm registers may be live
if (UseSSE >= 1) {
subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
}
int off = 0;
if (UseSSE == 1) {
movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
} else if (UseSSE >= 2) {
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7);
#ifdef _LP64
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14);
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15);
#endif
}
// Preserve registers across runtime call
int incoming_argument_and_return_value_offset = -1;
if (num_fpu_regs_in_use > 1) {
// Must preserve all other FPU regs (could alternatively convert
// SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
// FPU state, but can not trust C compiler)
NEEDS_CLEANUP;
// NOTE that in this case we also push the incoming argument(s) to
// the stack and restore it later; we also use this stack slot to
// hold the return value from dsin, dcos etc.
for (int i = 0; i < num_fpu_regs_in_use; i++) {
subptr(rsp, sizeof(jdouble));
fstp_d(Address(rsp, 0));
}
incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
for (int i = nb_args-1; i >= 0; i--) {
fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
}
}
subptr(rsp, nb_args*sizeof(jdouble));
for (int i = 0; i < nb_args; i++) {
fstp_d(Address(rsp, i*sizeof(jdouble)));
}
#ifdef _LP64
if (nb_args > 0) {
movdbl(xmm0, Address(rsp, 0));
}
if (nb_args > 1) {
movdbl(xmm1, Address(rsp, sizeof(jdouble)));
}
assert(nb_args <= 2, "unsupported number of args");
#endif // _LP64
// NOTE: we must not use call_VM_leaf here because that requires a
// complete interpreter frame in debug mode -- same bug as 4387334
// MacroAssembler::call_VM_leaf_base is perfectly safe and will
// do proper 64bit abi
NEEDS_CLEANUP;
// Need to add stack banging before this runtime call if it needs to
// be taken; however, there is no generic stack banging routine at
// the MacroAssembler level
MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
#ifdef _LP64
movsd(Address(rsp, 0), xmm0);
fld_d(Address(rsp, 0));
#endif // _LP64
addptr(rsp, sizeof(jdouble) * nb_args);
if (num_fpu_regs_in_use > 1) {
// Must save return value to stack and then restore entire FPU
// stack except incoming arguments
fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
fld_d(Address(rsp, 0));
addptr(rsp, sizeof(jdouble));
}
fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
addptr(rsp, sizeof(jdouble) * nb_args);
}
off = 0;
if (UseSSE == 1) {
movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
} else if (UseSSE >= 2) {
movdbl(xmm0, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm1, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm2, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm3, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm4, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm5, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm6, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm7, Address(rsp,off++*sizeof(jdouble)));
#ifdef _LP64
movdbl(xmm8, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm9, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm10, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm11, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm12, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm13, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm14, Address(rsp,off++*sizeof(jdouble)));
movdbl(xmm15, Address(rsp,off++*sizeof(jdouble)));
#endif
}
if (UseSSE >= 1) {
addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
}
popa();
}
static const double pi_4 = 0.7853981633974483;
void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
@ -8092,73 +8481,27 @@ void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
// slow case: runtime call
bind(slow_case);
// Preserve registers across runtime call
pusha();
int incoming_argument_and_return_value_offset = -1;
if (num_fpu_regs_in_use > 1) {
// Must preserve all other FPU regs (could alternatively convert
// SharedRuntime::dsin and dcos into assembly routines known not to trash
// FPU state, but can not trust C compiler)
NEEDS_CLEANUP;
// NOTE that in this case we also push the incoming argument to
// the stack and restore it later; we also use this stack slot to
// hold the return value from dsin or dcos.
for (int i = 0; i < num_fpu_regs_in_use; i++) {
subptr(rsp, sizeof(jdouble));
fstp_d(Address(rsp, 0));
}
incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
fld_d(Address(rsp, incoming_argument_and_return_value_offset));
}
subptr(rsp, sizeof(jdouble));
fstp_d(Address(rsp, 0));
#ifdef _LP64
movdbl(xmm0, Address(rsp, 0));
#endif // _LP64
// NOTE: we must not use call_VM_leaf here because that requires a
// complete interpreter frame in debug mode -- same bug as 4387334
// MacroAssembler::call_VM_leaf_base is perfectly safe and will
// do proper 64bit abi
NEEDS_CLEANUP;
// Need to add stack banging before this runtime call if it needs to
// be taken; however, there is no generic stack banging routine at
// the MacroAssembler level
switch(trig) {
case 's':
{
MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
}
break;
case 'c':
{
MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
}
break;
case 't':
{
MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
}
break;
default:
assert(false, "bad intrinsic");
break;
}
#ifdef _LP64
movsd(Address(rsp, 0), xmm0);
fld_d(Address(rsp, 0));
#endif // _LP64
addptr(rsp, sizeof(jdouble));
if (num_fpu_regs_in_use > 1) {
// Must save return value to stack and then restore entire FPU stack
fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
for (int i = 0; i < num_fpu_regs_in_use; i++) {
fld_d(Address(rsp, 0));
addptr(rsp, sizeof(jdouble));
}
}
popa();
// Come here with result in F-TOS
bind(done);

View File

@ -1148,6 +1148,9 @@ private:
void fxsave(Address dst);
void fyl2x();
void frndint();
void f2xm1();
void fldl2e();
void hlt();
@ -2387,7 +2390,28 @@ class MacroAssembler: public Assembler {
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
void ldmxcsr(AddressLiteral src);
// compute pow(x,y) and exp(x) with x86 instructions. Don't cover
// all corner cases and may result in NaN and require fallback to a
// runtime call.
void fast_pow();
void fast_exp();
// computes exp(x). Fallback to runtime call included.
void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
// computes pow(x,y). Fallback to runtime call included.
void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
private:
// call runtime as a fallback for trig functions and pow/exp.
void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use);
// computes 2^(Ylog2X); Ylog2X in ST(0)
void pow_exp_core_encoding();
// computes pow(x,y) or exp(x). Fallback to runtime call included.
void pow_or_exp(bool is_exp, int num_fpu_regs_in_use);
// these are private because users should be doing movflt/movdbl
void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }

View File

@ -2446,6 +2446,12 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
// Should consider not saving rbx, if not necessary
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
break;
case lir_exp :
__ exp_with_fallback(op->as_Op2()->fpu_stack_size());
break;
case lir_pow :
__ pow_with_fallback(op->as_Op2()->fpu_stack_size());
break;
default : ShouldNotReachHere();
}
} else {

View File

@ -823,7 +823,7 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
assert(x->number_of_arguments() == 1, "wrong type");
assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
LIRItem value(x->argument_at(0), this);
bool use_fpu = false;
@ -834,6 +834,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
case vmIntrinsics::_dtan:
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dexp:
case vmIntrinsics::_dpow:
use_fpu = true;
}
} else {
@ -843,20 +845,37 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
value.load_item();
LIR_Opr calc_input = value.result();
LIR_Opr calc_input2 = NULL;
if (x->id() == vmIntrinsics::_dpow) {
LIRItem extra_arg(x->argument_at(1), this);
if (UseSSE < 2) {
extra_arg.set_destroys_register();
}
extra_arg.load_item();
calc_input2 = extra_arg.result();
}
LIR_Opr calc_result = rlock_result(x);
// sin and cos need two free fpu stack slots, so register two temporary operands
// sin, cos, pow and exp need two free fpu stack slots, so register
// two temporary operands
LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
if (use_fpu) {
LIR_Opr tmp = FrameMap::fpu0_double_opr;
int tmp_start = 1;
if (calc_input2 != NULL) {
__ move(calc_input2, tmp);
tmp_start = 2;
calc_input2 = tmp;
}
__ move(calc_input, tmp);
calc_input = tmp;
calc_result = tmp;
tmp1 = FrameMap::caller_save_fpu_reg_at(1);
tmp2 = FrameMap::caller_save_fpu_reg_at(2);
tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
}
switch(x->id()) {
@ -867,6 +886,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
case vmIntrinsics::_dlog: __ log (calc_input, calc_result, tmp1); break;
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
case vmIntrinsics::_dexp: __ exp (calc_input, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
default: ShouldNotReachHere();
}

View File

@ -690,8 +690,8 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
case lir_mul_strictfp:
case lir_div_strictfp: {
assert(op2->tmp_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot");
insert_free_if_dead(op2->tmp_opr());
assert(op2->tmp1_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot");
insert_free_if_dead(op2->tmp1_opr());
assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
// fall-through: continue with the normal handling of lir_mul and lir_div
}
@ -787,16 +787,17 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
case lir_log:
case lir_log10: {
// log and log10 needs one temporary fpu stack slot, so there is ontemporary
// registers stored in temp of the operation.
// the stack allocator must guarantee that the stack slots are really free,
// otherwise there might be a stack overflow.
// log and log10 need one temporary fpu stack slot, so
// there is one temporary registers stored in temp of the
// operation. the stack allocator must guarantee that the stack
// slots are really free, otherwise there might be a stack
// overflow.
assert(right->is_illegal(), "must be");
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
assert(op2->tmp_opr()->is_fpu_register(), "must be");
assert(op2->tmp1_opr()->is_fpu_register(), "must be");
insert_free_if_dead(op2->tmp_opr());
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
@ -812,8 +813,9 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
case lir_tan:
case lir_sin:
case lir_cos: {
// sin and cos need two temporary fpu stack slots, so there are two temporary
case lir_cos:
case lir_exp: {
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
// registers (stored in right and temp of the operation).
// the stack allocator must guarantee that the stack slots are really free,
// otherwise there might be a stack overflow.
@ -821,11 +823,11 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
assert(res->is_fpu_register(), "must be");
// assert(left->is_last_use(), "old value gets destroyed");
assert(right->is_fpu_register(), "right is used as the first temporary register");
assert(op2->tmp_opr()->is_fpu_register(), "temp is used as the second temporary register");
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp_opr()) && fpu_num(op2->tmp_opr()) != fpu_num(res), "need distinct temp registers");
assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
insert_free_if_dead(right);
insert_free_if_dead(op2->tmp_opr());
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
@ -839,6 +841,53 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
break;
}
case lir_pow: {
// pow needs two temporary fpu stack slots, so there are two temporary
// registers (stored in tmp1 and tmp2 of the operation).
// the stack allocator must guarantee that the stack slots are really free,
// otherwise there might be a stack overflow.
assert(left->is_fpu_register(), "must be");
assert(right->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
assert(op2->tmp1_opr()->is_fpu_register(), "tmp1 is the first temporary register");
assert(op2->tmp2_opr()->is_fpu_register(), "tmp2 is the second temporary register");
assert(fpu_num(left) != fpu_num(right) && fpu_num(left) != fpu_num(op2->tmp1_opr()) && fpu_num(left) != fpu_num(op2->tmp2_opr()) && fpu_num(left) != fpu_num(res), "need distinct temp registers");
assert(fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(right) != fpu_num(op2->tmp2_opr()) && fpu_num(right) != fpu_num(res), "need distinct temp registers");
assert(fpu_num(op2->tmp1_opr()) != fpu_num(op2->tmp2_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
assert(fpu_num(op2->tmp2_opr()) != fpu_num(res), "need distinct temp registers");
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(op2->tmp2_opr());
// Must bring both operands to top of stack with following operand ordering:
// * fpu stack before pow: ... right left
// * fpu stack after pow: ... left
insert_free_if_dead(res, right);
if (tos_offset(right) != 1) {
insert_exchange(right);
insert_exchange(1);
}
insert_exchange(left);
assert(tos_offset(right) == 1, "check");
assert(tos_offset(left) == 0, "check");
new_left = to_fpu_stack_top(left);
new_right = to_fpu_stack(right);
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
sim()->pop();
do_rename(right, res);
new_res = to_fpu_stack_top(res);
break;
}
default: {
assert(false, "missed a fpu-operation");
}

View File

@ -181,6 +181,19 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
__ push_fTOS();
__ pop_fTOS();
break;
case Interpreter::java_lang_math_pow:
__ fld_d(Address(rsp, 3*wordSize)); // second argument
__ pow_with_fallback(0);
// Store to stack to convert 80bit precision back to 64bits
__ push_fTOS();
__ pop_fTOS();
break;
case Interpreter::java_lang_math_exp:
__ exp_with_fallback(0);
// Store to stack to convert 80bit precision back to 64bits
__ push_fTOS();
__ pop_fTOS();
break;
default :
ShouldNotReachHere();
}

View File

@ -271,6 +271,14 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
case Interpreter::java_lang_math_log10:
__ flog10();
break;
case Interpreter::java_lang_math_pow:
__ fld_d(Address(rsp, 3*wordSize)); // second argument (one
// empty stack slot)
__ pow_with_fallback(0);
break;
case Interpreter::java_lang_math_exp:
__ exp_with_fallback(0);
break;
default :
ShouldNotReachHere();
}

View File

@ -2136,11 +2136,23 @@ class StubGenerator: public StubCodeGenerator {
__ trigfunc('t');
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "exp");
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
// The intrinsic version of these seem to return the same value as
// the strict version.
StubRoutines::_intrinsic_exp = SharedRuntime::dexp;
StubRoutines::_intrinsic_pow = SharedRuntime::dpow;
__ fld_d(Address(rsp, 4));
__ exp_with_fallback(0);
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "pow");
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
__ fld_d(Address(rsp, 12));
__ fld_d(Address(rsp, 4));
__ pow_with_fallback(0);
__ ret(0);
}
}
public:

View File

@ -2928,11 +2928,34 @@ class StubGenerator: public StubCodeGenerator {
__ addq(rsp, 8);
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "exp");
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
// The intrinsic version of these seem to return the same value as
// the strict version.
StubRoutines::_intrinsic_exp = SharedRuntime::dexp;
StubRoutines::_intrinsic_pow = SharedRuntime::dpow;
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ exp_with_fallback(0);
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "pow");
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm1);
__ fld_d(Address(rsp, 0));
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ pow_with_fallback(0);
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
}
#undef __

View File

@ -1518,7 +1518,9 @@ address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter:
case Interpreter::java_lang_math_abs : // fall thru
case Interpreter::java_lang_math_log : // fall thru
case Interpreter::java_lang_math_log10 : // fall thru
case Interpreter::java_lang_math_sqrt : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break;
case Interpreter::java_lang_math_sqrt : // fall thru
case Interpreter::java_lang_math_pow : // fall thru
case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break;
case Interpreter::java_lang_ref_reference_get
: entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
default : ShouldNotReachHere(); break;
@ -1540,7 +1542,9 @@ bool AbstractInterpreter::can_be_compiled(methodHandle m) {
case Interpreter::java_lang_math_abs : // fall thru
case Interpreter::java_lang_math_log : // fall thru
case Interpreter::java_lang_math_log10 : // fall thru
case Interpreter::java_lang_math_sqrt :
case Interpreter::java_lang_math_sqrt : // fall thru
case Interpreter::java_lang_math_pow : // fall thru
case Interpreter::java_lang_math_exp :
return false;
default:
return true;

View File

@ -1534,7 +1534,9 @@ address AbstractInterpreterGenerator::generate_method_entry(
case Interpreter::java_lang_math_abs : // fall thru
case Interpreter::java_lang_math_log : // fall thru
case Interpreter::java_lang_math_log10 : // fall thru
case Interpreter::java_lang_math_sqrt : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind); break;
case Interpreter::java_lang_math_sqrt : // fall thru
case Interpreter::java_lang_math_pow : // fall thru
case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind); break;
case Interpreter::java_lang_ref_reference_get
: entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
default : ShouldNotReachHere(); break;
@ -1558,7 +1560,9 @@ bool AbstractInterpreter::can_be_compiled(methodHandle m) {
case Interpreter::java_lang_math_abs : // fall thru
case Interpreter::java_lang_math_log : // fall thru
case Interpreter::java_lang_math_log10 : // fall thru
case Interpreter::java_lang_math_sqrt :
case Interpreter::java_lang_math_sqrt : // fall thru
case Interpreter::java_lang_math_pow : // fall thru
case Interpreter::java_lang_math_exp :
return false;
default:
return true;

View File

@ -2536,45 +2536,6 @@ encode %{
__ fld_d(Address(rsp, 0));
%}
// Compute X^Y using Intel's fast hardware instructions, if possible.
// Otherwise return a NaN.
enc_class pow_exp_core_encoding %{
// FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
emit_opcode(cbuf,0x1C);
emit_d8(cbuf,0x24);
emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask
emit_rm(cbuf, 0x3, 0x0, ECX_enc);
emit_d32(cbuf,0xFFFFF800);
emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias
emit_rm(cbuf, 0x3, 0x0, EAX_enc);
emit_d32(cbuf,1023);
emit_opcode(cbuf,0x8B); // mov rbx,eax
emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position
emit_rm(cbuf,0x3,0x4,EAX_enc);
emit_d8(cbuf,20);
emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow
emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX
emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word
encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_d32(cbuf,0);
emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
%}
enc_class Push_Result_Mod_DPR( regDPR src) %{
if ($src$$reg != FPR1L_enc) {
// fincstp
@ -10100,162 +10061,68 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
ins_pipe( pipe_slow );
%}
instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
predicate (UseSSE<=1);
match(Set Y (PowD X Y)); // Raise X to the Yth power
effect(KILL rax, KILL rbx, KILL rcx);
format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
"FLD_D $X\n\t"
"FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
"FDUP \t\t\t# Q Q\n\t"
"FRNDINT\t\t\t# int(Q) Q\n\t"
"FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
"FISTP dword [ESP]\n\t"
"F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
"FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
"FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
"MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
"MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
"ADD EAX,1023\t\t# Double exponent bias\n\t"
"MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
"SHL EAX,20\t\t# Shift exponent into place\n\t"
"TEST EBX,ECX\t\t# Check for overflow\n\t"
"CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
"MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
"MOV [ESP+0],0\n\t"
"FMUL ST(0),[ESP+0]\t# Scale\n\t"
"ADD ESP,8"
%}
ins_encode( push_stack_temp_qword,
Push_Reg_DPR(X),
Opcode(0xD9), Opcode(0xF1), // fyl2x
pow_exp_core_encoding,
pop_stack_temp_qword);
effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ subptr(rsp, 8);
__ fld_s($X$$reg - 1);
__ fast_pow();
__ addptr(rsp, 8);
%}
ins_pipe( pipe_slow );
%}
instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
"MOVSD [ESP],$src1\n\t"
"FLD FPR1,$src1\n\t"
"MOVSD [ESP],$src0\n\t"
"FLD FPR1,$src0\n\t"
"FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
"FDUP \t\t\t# Q Q\n\t"
"FRNDINT\t\t\t# int(Q) Q\n\t"
"FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
"FISTP dword [ESP]\n\t"
"F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
"FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
"FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
"MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
"MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
"ADD EAX,1023\t\t# Double exponent bias\n\t"
"MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
"SHL EAX,20\t\t# Shift exponent into place\n\t"
"TEST EBX,ECX\t\t# Check for overflow\n\t"
"CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
"MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
"MOV [ESP+0],0\n\t"
"FMUL ST(0),[ESP+0]\t# Scale\n\t"
"FST_D [ESP]\n\t"
"MOVSD $dst,[ESP]\n\t"
"ADD ESP,8"
%}
ins_encode( push_stack_temp_qword,
push_xmm_to_fpr1(src1),
push_xmm_to_fpr1(src0),
Opcode(0xD9), Opcode(0xF1), // fyl2x
pow_exp_core_encoding,
Push_ResultD(dst) );
effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src1$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ movdbl(Address(rsp, 0), $src0$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ fast_pow();
__ fstp_d(Address(rsp, 0));
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
__ addptr(rsp, 8);
%}
ins_pipe( pipe_slow );
%}
instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
predicate (UseSSE<=1);
match(Set dpr1 (ExpD dpr1));
effect(KILL rax, KILL rbx, KILL rcx);
format %{ "SUB ESP,8\t\t# Fast-path EXP encoding"
"FLDL2E \t\t\t# Ld log2(e) X\n\t"
"FMULP \t\t\t# Q=X*log2(e)\n\t"
"FDUP \t\t\t# Q Q\n\t"
"FRNDINT\t\t\t# int(Q) Q\n\t"
"FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
"FISTP dword [ESP]\n\t"
"F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
"FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
"FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
"MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
"MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
"ADD EAX,1023\t\t# Double exponent bias\n\t"
"MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
"SHL EAX,20\t\t# Shift exponent into place\n\t"
"TEST EBX,ECX\t\t# Check for overflow\n\t"
"CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
"MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
"MOV [ESP+0],0\n\t"
"FMUL ST(0),[ESP+0]\t# Scale\n\t"
"ADD ESP,8"
%}
ins_encode( push_stack_temp_qword,
Opcode(0xD9), Opcode(0xEA), // fldl2e
Opcode(0xDE), Opcode(0xC9), // fmulp
pow_exp_core_encoding,
pop_stack_temp_qword);
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ fast_exp();
%}
ins_pipe( pipe_slow );
%}
instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (ExpD src));
effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t"
"MOVSD [ESP],$src\n\t"
"FLDL2E \t\t\t# Ld log2(e) X\n\t"
"FMULP \t\t\t# Q=X*log2(e) X\n\t"
"FDUP \t\t\t# Q Q\n\t"
"FRNDINT\t\t\t# int(Q) Q\n\t"
"FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
"FISTP dword [ESP]\n\t"
"F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
"FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
"FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
"MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
"MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
"ADD EAX,1023\t\t# Double exponent bias\n\t"
"MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
"SHL EAX,20\t\t# Shift exponent into place\n\t"
"TEST EBX,ECX\t\t# Check for overflow\n\t"
"CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
"MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
"MOV [ESP+0],0\n\t"
"FMUL ST(0),[ESP+0]\t# Scale\n\t"
"FST_D [ESP]\n\t"
"MOVSD $dst,[ESP]\n\t"
"ADD ESP,8"
%}
ins_encode( Push_SrcD(src),
Opcode(0xD9), Opcode(0xEA), // fldl2e
Opcode(0xDE), Opcode(0xC9), // fmulp
pow_exp_core_encoding,
Push_ResultD(dst) );
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ fast_exp();
__ fstp_d(Address(rsp, 0));
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
__ addptr(rsp, 8);
%}
ins_pipe( pipe_slow );
%}
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack

View File

@ -9823,7 +9823,39 @@ instruct logD_reg(regD dst) %{
ins_pipe( pipe_slow );
%}
instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src1$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ movdbl(Address(rsp, 0), $src0$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ fast_pow();
__ fstp_d(Address(rsp, 0));
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
__ addptr(rsp, 8);
%}
ins_pipe( pipe_slow );
%}
instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
match(Set dst (ExpD src));
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ fast_exp();
__ fstp_d(Address(rsp, 0));
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
__ addptr(rsp, 8);
%}
ins_pipe( pipe_slow );
%}
//----------Arithmetic Conversion Instructions---------------------------------

View File

@ -2949,6 +2949,8 @@ GraphBuilder::GraphBuilder(Compilation* compilation, IRScope* scope)
case vmIntrinsics::_dtan : // fall through
case vmIntrinsics::_dlog : // fall through
case vmIntrinsics::_dlog10 : // fall through
case vmIntrinsics::_dexp : // fall through
case vmIntrinsics::_dpow : // fall through
{
// Compiles where the root method is an intrinsic need a special
// compilation environment because the bytecodes for the method
@ -2969,6 +2971,9 @@ GraphBuilder::GraphBuilder(Compilation* compilation, IRScope* scope)
_state = start_block->state()->copy_for_parsing();
_last = start_block;
load_local(doubleType, 0);
if (scope->method()->intrinsic_id() == vmIntrinsics::_dpow) {
load_local(doubleType, 2);
}
// Emit the intrinsic node.
bool result = try_inline_intrinsics(scope->method());
@ -3182,6 +3187,8 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
case vmIntrinsics::_dtan : // fall through
case vmIntrinsics::_dlog : // fall through
case vmIntrinsics::_dlog10 : // fall through
case vmIntrinsics::_dexp : // fall through
case vmIntrinsics::_dpow : // fall through
if (!InlineMathNatives) return false;
cantrap = false;
preserves_state = true;

View File

@ -624,11 +624,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
{
assert(op->as_Op2() != NULL, "must be");
LIR_Op2* op2 = (LIR_Op2*)op;
assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() &&
op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
if (op2->_info) do_info(op2->_info);
if (op2->_opr1->is_valid()) do_input(op2->_opr1);
if (op2->_opr2->is_valid()) do_input(op2->_opr2);
if (op2->_tmp->is_valid()) do_temp(op2->_tmp);
if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
if (op2->_result->is_valid()) do_output(op2->_result);
break;
@ -641,7 +643,8 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
assert(op->as_Op2() != NULL, "must be");
LIR_Op2* op2 = (LIR_Op2*)op;
assert(op2->_info == NULL && op2->_tmp->is_illegal(), "not used");
assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
do_input(op2->_opr1);
@ -665,10 +668,12 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
assert(op2->_opr1->is_valid(), "used");
assert(op2->_opr2->is_valid(), "used");
assert(op2->_result->is_valid(), "used");
assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() &&
op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
do_input(op2->_opr1); do_temp(op2->_opr1);
do_input(op2->_opr2); do_temp(op2->_opr2);
if (op2->_tmp->is_valid()) do_temp(op2->_tmp);
if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
do_output(op2->_result);
break;
@ -682,6 +687,8 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
if (op2->_opr1->is_valid()) do_temp(op2->_opr1);
if (op2->_opr2->is_valid()) do_input(op2->_opr2); // exception object is input parameter
assert(op2->_result->is_illegal(), "no result");
assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() &&
op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
break;
}
@ -702,7 +709,8 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
case lir_sin:
case lir_cos:
case lir_log:
case lir_log10: {
case lir_log10:
case lir_exp: {
assert(op->as_Op2() != NULL, "must be");
LIR_Op2* op2 = (LIR_Op2*)op;
@ -711,16 +719,47 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
// Register input operand as temp to guarantee that it doesn't
// overlap with the input.
assert(op2->_info == NULL, "not used");
assert(op2->_tmp5->is_illegal(), "not used");
assert(op2->_tmp2->is_valid() == (op->code() == lir_exp), "not used");
assert(op2->_tmp3->is_valid() == (op->code() == lir_exp), "not used");
assert(op2->_tmp4->is_valid() == (op->code() == lir_exp), "not used");
assert(op2->_opr1->is_valid(), "used");
do_input(op2->_opr1); do_temp(op2->_opr1);
if (op2->_opr2->is_valid()) do_temp(op2->_opr2);
if (op2->_tmp->is_valid()) do_temp(op2->_tmp);
if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
if (op2->_tmp2->is_valid()) do_temp(op2->_tmp2);
if (op2->_tmp3->is_valid()) do_temp(op2->_tmp3);
if (op2->_tmp4->is_valid()) do_temp(op2->_tmp4);
if (op2->_result->is_valid()) do_output(op2->_result);
break;
}
case lir_pow: {
assert(op->as_Op2() != NULL, "must be");
LIR_Op2* op2 = (LIR_Op2*)op;
// On x86 pow needs two temporary fpu stack slots: tmp1 and
// tmp2. Register input operands as temps to guarantee that it
// doesn't overlap with the temporary slots.
assert(op2->_info == NULL, "not used");
assert(op2->_opr1->is_valid() && op2->_opr2->is_valid(), "used");
assert(op2->_tmp1->is_valid() && op2->_tmp2->is_valid() && op2->_tmp3->is_valid()
&& op2->_tmp4->is_valid() && op2->_tmp5->is_valid(), "used");
assert(op2->_result->is_valid(), "used");
do_input(op2->_opr1); do_temp(op2->_opr1);
do_input(op2->_opr2); do_temp(op2->_opr2);
do_temp(op2->_tmp1);
do_temp(op2->_tmp2);
do_temp(op2->_tmp3);
do_temp(op2->_tmp4);
do_temp(op2->_tmp5);
do_output(op2->_result);
break;
}
// LIR_Op3
case lir_idiv:
@ -1670,6 +1709,8 @@ const char * LIR_Op::name() const {
case lir_tan: s = "tan"; break;
case lir_log: s = "log"; break;
case lir_log10: s = "log10"; break;
case lir_exp: s = "exp"; break;
case lir_pow: s = "pow"; break;
case lir_logic_and: s = "logic_and"; break;
case lir_logic_or: s = "logic_or"; break;
case lir_logic_xor: s = "logic_xor"; break;
@ -1892,7 +1933,11 @@ void LIR_Op2::print_instr(outputStream* out) const {
}
in_opr1()->print(out); out->print(" ");
in_opr2()->print(out); out->print(" ");
if (tmp_opr()->is_valid()) { tmp_opr()->print(out); out->print(" "); }
if (tmp1_opr()->is_valid()) { tmp1_opr()->print(out); out->print(" "); }
if (tmp2_opr()->is_valid()) { tmp2_opr()->print(out); out->print(" "); }
if (tmp3_opr()->is_valid()) { tmp3_opr()->print(out); out->print(" "); }
if (tmp4_opr()->is_valid()) { tmp4_opr()->print(out); out->print(" "); }
if (tmp5_opr()->is_valid()) { tmp5_opr()->print(out); out->print(" "); }
result_opr()->print(out);
}

View File

@ -916,6 +916,8 @@ enum LIR_Code {
, lir_tan
, lir_log
, lir_log10
, lir_exp
, lir_pow
, lir_logic_and
, lir_logic_or
, lir_logic_xor
@ -1560,7 +1562,11 @@ class LIR_Op2: public LIR_Op {
LIR_Opr _opr1;
LIR_Opr _opr2;
BasicType _type;
LIR_Opr _tmp;
LIR_Opr _tmp1;
LIR_Opr _tmp2;
LIR_Opr _tmp3;
LIR_Opr _tmp4;
LIR_Opr _tmp5;
LIR_Condition _condition;
void verify() const;
@ -1573,7 +1579,11 @@ class LIR_Op2: public LIR_Op {
, _type(T_ILLEGAL)
, _condition(condition)
, _fpu_stack_size(0)
, _tmp(LIR_OprFact::illegalOpr) {
, _tmp1(LIR_OprFact::illegalOpr)
, _tmp2(LIR_OprFact::illegalOpr)
, _tmp3(LIR_OprFact::illegalOpr)
, _tmp4(LIR_OprFact::illegalOpr)
, _tmp5(LIR_OprFact::illegalOpr) {
assert(code == lir_cmp, "code check");
}
@ -1584,7 +1594,11 @@ class LIR_Op2: public LIR_Op {
, _type(type)
, _condition(condition)
, _fpu_stack_size(0)
, _tmp(LIR_OprFact::illegalOpr) {
, _tmp1(LIR_OprFact::illegalOpr)
, _tmp2(LIR_OprFact::illegalOpr)
, _tmp3(LIR_OprFact::illegalOpr)
, _tmp4(LIR_OprFact::illegalOpr)
, _tmp5(LIR_OprFact::illegalOpr) {
assert(code == lir_cmove, "code check");
assert(type != T_ILLEGAL, "cmove should have type");
}
@ -1597,25 +1611,38 @@ class LIR_Op2: public LIR_Op {
, _type(type)
, _condition(lir_cond_unknown)
, _fpu_stack_size(0)
, _tmp(LIR_OprFact::illegalOpr) {
, _tmp1(LIR_OprFact::illegalOpr)
, _tmp2(LIR_OprFact::illegalOpr)
, _tmp3(LIR_OprFact::illegalOpr)
, _tmp4(LIR_OprFact::illegalOpr)
, _tmp5(LIR_OprFact::illegalOpr) {
assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
}
LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp)
LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
LIR_Opr tmp3 = LIR_OprFact::illegalOpr, LIR_Opr tmp4 = LIR_OprFact::illegalOpr, LIR_Opr tmp5 = LIR_OprFact::illegalOpr)
: LIR_Op(code, result, NULL)
, _opr1(opr1)
, _opr2(opr2)
, _type(T_ILLEGAL)
, _condition(lir_cond_unknown)
, _fpu_stack_size(0)
, _tmp(tmp) {
, _tmp1(tmp1)
, _tmp2(tmp2)
, _tmp3(tmp3)
, _tmp4(tmp4)
, _tmp5(tmp5) {
assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
}
LIR_Opr in_opr1() const { return _opr1; }
LIR_Opr in_opr2() const { return _opr2; }
BasicType type() const { return _type; }
LIR_Opr tmp_opr() const { return _tmp; }
LIR_Opr tmp1_opr() const { return _tmp1; }
LIR_Opr tmp2_opr() const { return _tmp2; }
LIR_Opr tmp3_opr() const { return _tmp3; }
LIR_Opr tmp4_opr() const { return _tmp4; }
LIR_Opr tmp5_opr() const { return _tmp5; }
LIR_Condition condition() const {
assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); return _condition;
}
@ -2025,6 +2052,8 @@ class LIR_List: public CompilationResourceObj {
void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); }
void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); }
void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); }
void exp (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_exp , from, tmp1, to, tmp2, tmp3, tmp4, tmp5)); }
void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); }
void add (LIR_Opr left, LIR_Opr right, LIR_Opr res) { append(new LIR_Op2(lir_add, left, right, res)); }
void sub (LIR_Opr left, LIR_Opr right, LIR_Opr res, CodeEmitInfo* info = NULL) { append(new LIR_Op2(lir_sub, left, right, res, info)); }

View File

@ -718,7 +718,7 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
if (op->in_opr2()->is_constant()) {
shift_op(op->code(), op->in_opr1(), op->in_opr2()->as_constant_ptr()->as_jint(), op->result_opr());
} else {
shift_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp_opr());
shift_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr());
}
break;
@ -746,6 +746,8 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
case lir_cos:
case lir_log:
case lir_log10:
case lir_exp:
case lir_pow:
intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
break;

View File

@ -2960,7 +2960,9 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
case vmIntrinsics::_dsqrt: // fall through
case vmIntrinsics::_dtan: // fall through
case vmIntrinsics::_dsin : // fall through
case vmIntrinsics::_dcos : do_MathIntrinsic(x); break;
case vmIntrinsics::_dcos : // fall through
case vmIntrinsics::_dexp : // fall through
case vmIntrinsics::_dpow : do_MathIntrinsic(x); break;
case vmIntrinsics::_arraycopy: do_ArrayCopy(x); break;
// java.nio.Buffer.checkIndex

View File

@ -6579,6 +6579,8 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
case lir_abs:
case lir_log10:
case lir_log:
case lir_pow:
case lir_exp:
case lir_logic_and:
case lir_logic_or:
case lir_logic_xor:

View File

@ -107,6 +107,8 @@ class AbstractInterpreter: AllStatic {
java_lang_math_sqrt, // implementation of java.lang.Math.sqrt (x)
java_lang_math_log, // implementation of java.lang.Math.log (x)
java_lang_math_log10, // implementation of java.lang.Math.log10 (x)
java_lang_math_pow, // implementation of java.lang.Math.pow (x,y)
java_lang_math_exp, // implementation of java.lang.Math.exp (x)
java_lang_ref_reference_get, // implementation of java.lang.ref.Reference.get()
number_of_method_entries,
invalid = -1

View File

@ -221,6 +221,8 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(methodHandle m)
case vmIntrinsics::_dsqrt : return java_lang_math_sqrt ;
case vmIntrinsics::_dlog : return java_lang_math_log ;
case vmIntrinsics::_dlog10: return java_lang_math_log10;
case vmIntrinsics::_dpow : return java_lang_math_pow ;
case vmIntrinsics::_dexp : return java_lang_math_exp ;
case vmIntrinsics::_Reference_get:
return java_lang_ref_reference_get;

View File

@ -370,6 +370,8 @@ void TemplateInterpreterGenerator::generate_all() {
method_entry(java_lang_math_sqrt )
method_entry(java_lang_math_log )
method_entry(java_lang_math_log10)
method_entry(java_lang_math_exp )
method_entry(java_lang_math_pow )
method_entry(java_lang_ref_reference_get)
// all native method kinds (must be one contiguous block)

View File

@ -1557,9 +1557,6 @@ bool LibraryCallKit::inline_exp(vmIntrinsics::ID id) {
// every again. NaN results requires StrictMath.exp handling.
if (too_many_traps(Deoptimization::Reason_intrinsic)) return false;
// Do not intrinsify on older platforms which lack cmove.
if (ConditionalMoveLimit == 0) return false;
_sp += arg_size(); // restore stack pointer
Node *x = pop_math_arg();
Node *result = _gvn.transform(new (C, 2) ExpDNode(0,x));
@ -1802,15 +1799,11 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false;
case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_abs(id) : false;
// These intrinsics don't work on X86. The ad implementation doesn't
// handle NaN's properly. Instead of returning infinity, the ad
// implementation returns a NaN on overflow. See bug: 6304089
// Once the ad implementations are fixed, change the code below
// to match the intrinsics above
case vmIntrinsics::_dexp: return
Matcher::has_match_rule(Op_ExpD) ? inline_exp(id) :
runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
case vmIntrinsics::_dpow: return
Matcher::has_match_rule(Op_PowD) ? inline_pow(id) :
runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
// These intrinsics are not yet correctly implemented

View File

@ -1314,7 +1314,5 @@ const Type *PowDNode::Value( PhaseTransform *phase ) const {
if( t2->base() != Type::DoubleCon ) return Type::DOUBLE;
double d1 = t1->getd();
double d2 = t2->getd();
if( d1 < 0.0 ) return Type::DOUBLE;
if( d2 < 0.0 ) return Type::DOUBLE;
return TypeD::make( StubRoutines::intrinsic_pow( d1, d2 ) );
}