8132207: update for x86 exp in the math lib
Add new java.lang.Math() intrinsics from x86 Reviewed-by: kvn, iveresov
This commit is contained in:
parent
12cba200a3
commit
5a633b1cbd
@ -770,6 +770,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
|||||||
case 0x55: // andnps
|
case 0x55: // andnps
|
||||||
case 0x56: // orps
|
case 0x56: // orps
|
||||||
case 0x57: // xorps
|
case 0x57: // xorps
|
||||||
|
case 0x59: //mulpd
|
||||||
case 0x6E: // movd
|
case 0x6E: // movd
|
||||||
case 0x7E: // movd
|
case 0x7E: // movd
|
||||||
case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
|
case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
|
||||||
@ -3030,6 +3031,15 @@ void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
|
|||||||
emit_int8(imm8);
|
emit_int8(imm8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
|
||||||
|
assert(VM_Version::supports_sse2(), "");
|
||||||
|
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
|
||||||
|
VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
|
||||||
|
emit_int8(0x15);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
emit_int8(imm8);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
|
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
|
||||||
assert(VM_Version::supports_sse4_1(), "");
|
assert(VM_Version::supports_sse4_1(), "");
|
||||||
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
|
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
|
||||||
@ -3048,6 +3058,15 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
|
|||||||
emit_int8(imm8);
|
emit_int8(imm8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
|
||||||
|
assert(VM_Version::supports_sse2(), "");
|
||||||
|
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
|
||||||
|
VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
|
||||||
|
emit_int8((unsigned char)0xC4);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
emit_int8(imm8);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
|
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
|
||||||
assert(VM_Version::supports_sse4_1(), "");
|
assert(VM_Version::supports_sse4_1(), "");
|
||||||
if (VM_Version::supports_evex()) {
|
if (VM_Version::supports_evex()) {
|
||||||
@ -4063,6 +4082,16 @@ void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::mulpd(XMMRegister dst, Address src) {
|
||||||
|
_instruction_uses_vl = true;
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
if (VM_Version::supports_evex()) {
|
||||||
|
emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
|
||||||
|
} else {
|
||||||
|
emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::mulps(XMMRegister dst, XMMRegister src) {
|
void Assembler::mulps(XMMRegister dst, XMMRegister src) {
|
||||||
_instruction_uses_vl = true;
|
_instruction_uses_vl = true;
|
||||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
@ -4251,6 +4280,26 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
|||||||
emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
|
emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
|
||||||
|
_instruction_uses_vl = true;
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
if (VM_Version::supports_evex()) {
|
||||||
|
emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
|
||||||
|
} else {
|
||||||
|
emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
|
||||||
|
_instruction_uses_vl = true;
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
if (VM_Version::supports_evex()) {
|
||||||
|
emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
|
||||||
|
} else {
|
||||||
|
emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
|
void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
|
||||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
if (VM_Version::supports_avx512dq()) {
|
if (VM_Version::supports_avx512dq()) {
|
||||||
@ -4871,8 +4920,9 @@ void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// AND packed integers
|
// logical operations packed integers
|
||||||
void Assembler::pand(XMMRegister dst, XMMRegister src) {
|
void Assembler::pand(XMMRegister dst, XMMRegister src) {
|
||||||
|
_instruction_uses_vl = true;
|
||||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
|
emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
|
||||||
}
|
}
|
||||||
@ -4893,6 +4943,17 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
|
|||||||
emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
|
emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::pandn(XMMRegister dst, XMMRegister src) {
|
||||||
|
_instruction_uses_vl = true;
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
if (VM_Version::supports_evex()) {
|
||||||
|
emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::por(XMMRegister dst, XMMRegister src) {
|
void Assembler::por(XMMRegister dst, XMMRegister src) {
|
||||||
_instruction_uses_vl = true;
|
_instruction_uses_vl = true;
|
||||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
@ -1679,10 +1679,14 @@ private:
|
|||||||
// SSE 4.1 extract
|
// SSE 4.1 extract
|
||||||
void pextrd(Register dst, XMMRegister src, int imm8);
|
void pextrd(Register dst, XMMRegister src, int imm8);
|
||||||
void pextrq(Register dst, XMMRegister src, int imm8);
|
void pextrq(Register dst, XMMRegister src, int imm8);
|
||||||
|
// SSE 2 extract
|
||||||
|
void pextrw(Register dst, XMMRegister src, int imm8);
|
||||||
|
|
||||||
// SSE 4.1 insert
|
// SSE 4.1 insert
|
||||||
void pinsrd(XMMRegister dst, Register src, int imm8);
|
void pinsrd(XMMRegister dst, Register src, int imm8);
|
||||||
void pinsrq(XMMRegister dst, Register src, int imm8);
|
void pinsrq(XMMRegister dst, Register src, int imm8);
|
||||||
|
// SSE 2 insert
|
||||||
|
void pinsrw(XMMRegister dst, Register src, int imm8);
|
||||||
|
|
||||||
// SSE4.1 packed move
|
// SSE4.1 packed move
|
||||||
void pmovzxbw(XMMRegister dst, XMMRegister src);
|
void pmovzxbw(XMMRegister dst, XMMRegister src);
|
||||||
@ -1933,6 +1937,7 @@ private:
|
|||||||
|
|
||||||
// Multiply Packed Floating-Point Values
|
// Multiply Packed Floating-Point Values
|
||||||
void mulpd(XMMRegister dst, XMMRegister src);
|
void mulpd(XMMRegister dst, XMMRegister src);
|
||||||
|
void mulpd(XMMRegister dst, Address src);
|
||||||
void mulps(XMMRegister dst, XMMRegister src);
|
void mulps(XMMRegister dst, XMMRegister src);
|
||||||
void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
@ -1959,6 +1964,9 @@ private:
|
|||||||
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||||
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||||
|
|
||||||
|
void unpckhpd(XMMRegister dst, XMMRegister src);
|
||||||
|
void unpcklpd(XMMRegister dst, XMMRegister src);
|
||||||
|
|
||||||
// Bitwise Logical XOR of Packed Floating-Point Values
|
// Bitwise Logical XOR of Packed Floating-Point Values
|
||||||
void xorpd(XMMRegister dst, XMMRegister src);
|
void xorpd(XMMRegister dst, XMMRegister src);
|
||||||
void xorps(XMMRegister dst, XMMRegister src);
|
void xorps(XMMRegister dst, XMMRegister src);
|
||||||
@ -2054,6 +2062,9 @@ private:
|
|||||||
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||||
|
|
||||||
|
// Andn packed integers
|
||||||
|
void pandn(XMMRegister dst, XMMRegister src);
|
||||||
|
|
||||||
// Or packed integers
|
// Or packed integers
|
||||||
void por(XMMRegister dst, XMMRegister src);
|
void por(XMMRegister dst, XMMRegister src);
|
||||||
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
|
@ -2457,9 +2457,6 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
|
|||||||
// Should consider not saving rbx, if not necessary
|
// Should consider not saving rbx, if not necessary
|
||||||
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
|
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
|
||||||
break;
|
break;
|
||||||
case lir_exp :
|
|
||||||
__ exp_with_fallback(op->as_Op2()->fpu_stack_size());
|
|
||||||
break;
|
|
||||||
case lir_pow :
|
case lir_pow :
|
||||||
__ pow_with_fallback(op->as_Op2()->fpu_stack_size());
|
__ pow_with_fallback(op->as_Op2()->fpu_stack_size());
|
||||||
break;
|
break;
|
||||||
|
@ -808,6 +808,12 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
|
|||||||
|
|
||||||
void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||||
assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
|
assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
|
||||||
|
|
||||||
|
if (x->id() == vmIntrinsics::_dexp) {
|
||||||
|
do_ExpIntrinsic(x);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
LIRItem value(x->argument_at(0), this);
|
LIRItem value(x->argument_at(0), this);
|
||||||
|
|
||||||
bool use_fpu = false;
|
bool use_fpu = false;
|
||||||
@ -818,7 +824,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
|||||||
case vmIntrinsics::_dtan:
|
case vmIntrinsics::_dtan:
|
||||||
case vmIntrinsics::_dlog:
|
case vmIntrinsics::_dlog:
|
||||||
case vmIntrinsics::_dlog10:
|
case vmIntrinsics::_dlog10:
|
||||||
case vmIntrinsics::_dexp:
|
|
||||||
case vmIntrinsics::_dpow:
|
case vmIntrinsics::_dpow:
|
||||||
use_fpu = true;
|
use_fpu = true;
|
||||||
}
|
}
|
||||||
@ -870,7 +875,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
|||||||
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
|
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
|
||||||
case vmIntrinsics::_dlog: __ log (calc_input, calc_result, tmp1); break;
|
case vmIntrinsics::_dlog: __ log (calc_input, calc_result, tmp1); break;
|
||||||
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
|
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
|
||||||
case vmIntrinsics::_dexp: __ exp (calc_input, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
|
|
||||||
case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
|
case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
|
||||||
default: ShouldNotReachHere();
|
default: ShouldNotReachHere();
|
||||||
}
|
}
|
||||||
@ -880,6 +884,32 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LIRGenerator::do_ExpIntrinsic(Intrinsic* x) {
|
||||||
|
LIRItem value(x->argument_at(0), this);
|
||||||
|
value.set_destroys_register();
|
||||||
|
|
||||||
|
LIR_Opr calc_result = rlock_result(x);
|
||||||
|
LIR_Opr result_reg = result_register_for(x->type());
|
||||||
|
|
||||||
|
BasicTypeList signature(1);
|
||||||
|
signature.append(T_DOUBLE);
|
||||||
|
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
|
||||||
|
|
||||||
|
value.load_item_force(cc->at(0));
|
||||||
|
|
||||||
|
#ifndef _LP64
|
||||||
|
LIR_Opr tmp = FrameMap::fpu0_double_opr;
|
||||||
|
result_reg = tmp;
|
||||||
|
if (VM_Version::supports_sse2()) {
|
||||||
|
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||||
|
} else {
|
||||||
|
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||||
|
#endif
|
||||||
|
__ move(result_reg, calc_result);
|
||||||
|
}
|
||||||
|
|
||||||
void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
|
void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
|
||||||
assert(x->number_of_arguments() == 5, "wrong type");
|
assert(x->number_of_arguments() == 5, "wrong type");
|
||||||
|
@ -814,8 +814,7 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
|
|||||||
|
|
||||||
case lir_tan:
|
case lir_tan:
|
||||||
case lir_sin:
|
case lir_sin:
|
||||||
case lir_cos:
|
case lir_cos: {
|
||||||
case lir_exp: {
|
|
||||||
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
|
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
|
||||||
// registers (stored in right and temp of the operation).
|
// registers (stored in right and temp of the operation).
|
||||||
// the stack allocator must guarantee that the stack slots are really free,
|
// the stack allocator must guarantee that the stack slots are really free,
|
||||||
|
@ -151,10 +151,14 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
|
|||||||
__ pop_fTOS();
|
__ pop_fTOS();
|
||||||
break;
|
break;
|
||||||
case Interpreter::java_lang_math_exp:
|
case Interpreter::java_lang_math_exp:
|
||||||
__ exp_with_fallback(0);
|
__ subptr(rsp, 2*wordSize);
|
||||||
// Store to stack to convert 80bit precision back to 64bits
|
__ fstp_d(Address(rsp, 0));
|
||||||
__ push_fTOS();
|
if (VM_Version::supports_sse2()) {
|
||||||
__ pop_fTOS();
|
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||||
|
} else {
|
||||||
|
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
|
||||||
|
}
|
||||||
|
__ addptr(rsp, 2*wordSize);
|
||||||
break;
|
break;
|
||||||
default :
|
default :
|
||||||
ShouldNotReachHere();
|
ShouldNotReachHere();
|
||||||
|
@ -252,6 +252,9 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
|
|||||||
|
|
||||||
if (kind == Interpreter::java_lang_math_sqrt) {
|
if (kind == Interpreter::java_lang_math_sqrt) {
|
||||||
__ sqrtsd(xmm0, Address(rsp, wordSize));
|
__ sqrtsd(xmm0, Address(rsp, wordSize));
|
||||||
|
} else if (kind == Interpreter::java_lang_math_exp) {
|
||||||
|
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||||
|
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||||
} else {
|
} else {
|
||||||
__ fld_d(Address(rsp, wordSize));
|
__ fld_d(Address(rsp, wordSize));
|
||||||
switch (kind) {
|
switch (kind) {
|
||||||
@ -278,9 +281,6 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
|
|||||||
// empty stack slot)
|
// empty stack slot)
|
||||||
__ pow_with_fallback(0);
|
__ pow_with_fallback(0);
|
||||||
break;
|
break;
|
||||||
case Interpreter::java_lang_math_exp:
|
|
||||||
__ exp_with_fallback(0);
|
|
||||||
break;
|
|
||||||
default :
|
default :
|
||||||
ShouldNotReachHere();
|
ShouldNotReachHere();
|
||||||
}
|
}
|
||||||
|
@ -3033,6 +3033,15 @@ void MacroAssembler::fldcw(AddressLiteral src) {
|
|||||||
Assembler::fldcw(as_Address(src));
|
Assembler::fldcw(as_Address(src));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
|
||||||
|
if (reachable(src)) {
|
||||||
|
Assembler::mulpd(dst, as_Address(src));
|
||||||
|
} else {
|
||||||
|
lea(rscratch1, src);
|
||||||
|
Assembler::mulpd(dst, Address(rscratch1, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MacroAssembler::pow_exp_core_encoding() {
|
void MacroAssembler::pow_exp_core_encoding() {
|
||||||
// kills rax, rcx, rdx
|
// kills rax, rcx, rdx
|
||||||
subptr(rsp,sizeof(jdouble));
|
subptr(rsp,sizeof(jdouble));
|
||||||
@ -3105,19 +3114,7 @@ void MacroAssembler::fast_pow() {
|
|||||||
BLOCK_COMMENT("} fast_pow");
|
BLOCK_COMMENT("} fast_pow");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroAssembler::fast_exp() {
|
void MacroAssembler::pow_or_exp(int num_fpu_regs_in_use) {
|
||||||
// computes exp(X) = 2^(X * log2(e))
|
|
||||||
// if fast computation is not possible, result is NaN. Requires
|
|
||||||
// fallback from user of this macro.
|
|
||||||
// increase precision for intermediate steps of the computation
|
|
||||||
increase_precision();
|
|
||||||
fldl2e(); // Stack: log2(e) X ...
|
|
||||||
fmulp(1); // Stack: (X*log2(e)) ...
|
|
||||||
pow_exp_core_encoding(); // Stack: exp(X) ...
|
|
||||||
restore_precision();
|
|
||||||
}
|
|
||||||
|
|
||||||
void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
|
|
||||||
// kills rax, rcx, rdx
|
// kills rax, rcx, rdx
|
||||||
// pow and exp needs 2 extra registers on the fpu stack.
|
// pow and exp needs 2 extra registers on the fpu stack.
|
||||||
Label slow_case, done;
|
Label slow_case, done;
|
||||||
@ -3129,22 +3126,6 @@ void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
|
|||||||
Register tmp2 = rax;
|
Register tmp2 = rax;
|
||||||
Register tmp3 = rcx;
|
Register tmp3 = rcx;
|
||||||
|
|
||||||
if (is_exp) {
|
|
||||||
// Stack: X
|
|
||||||
fld_s(0); // duplicate argument for runtime call. Stack: X X
|
|
||||||
fast_exp(); // Stack: exp(X) X
|
|
||||||
fcmp(tmp, 0, false, false); // Stack: exp(X) X
|
|
||||||
// exp(X) not equal to itself: exp(X) is NaN go to slow case.
|
|
||||||
jcc(Assembler::parity, slow_case);
|
|
||||||
// get rid of duplicate argument. Stack: exp(X)
|
|
||||||
if (num_fpu_regs_in_use > 0) {
|
|
||||||
fxch();
|
|
||||||
fpop();
|
|
||||||
} else {
|
|
||||||
ffree(1);
|
|
||||||
}
|
|
||||||
jmp(done);
|
|
||||||
} else {
|
|
||||||
// Stack: X Y
|
// Stack: X Y
|
||||||
Label x_negative, y_not_2;
|
Label x_negative, y_not_2;
|
||||||
|
|
||||||
@ -3296,15 +3277,13 @@ void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
|
|||||||
|
|
||||||
fchs(); // Stack: -abs(X)^Y Y
|
fchs(); // Stack: -abs(X)^Y Y
|
||||||
jmp(done);
|
jmp(done);
|
||||||
}
|
|
||||||
|
|
||||||
// slow case: runtime call
|
// slow case: runtime call
|
||||||
bind(slow_case);
|
bind(slow_case);
|
||||||
|
|
||||||
fpop(); // pop incorrect result or int(Y)
|
fpop(); // pop incorrect result or int(Y)
|
||||||
|
|
||||||
fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
|
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2, num_fpu_regs_in_use);
|
||||||
is_exp ? 1 : 2, num_fpu_regs_in_use);
|
|
||||||
|
|
||||||
// Come here with result in F-TOS
|
// Come here with result in F-TOS
|
||||||
bind(done);
|
bind(done);
|
||||||
|
@ -907,14 +907,14 @@ class MacroAssembler: public Assembler {
|
|||||||
// all corner cases and may result in NaN and require fallback to a
|
// all corner cases and may result in NaN and require fallback to a
|
||||||
// runtime call.
|
// runtime call.
|
||||||
void fast_pow();
|
void fast_pow();
|
||||||
void fast_exp();
|
void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||||
|
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||||
|
Register rax, Register rcx, Register rdx, Register tmp);
|
||||||
void increase_precision();
|
void increase_precision();
|
||||||
void restore_precision();
|
void restore_precision();
|
||||||
|
|
||||||
// computes exp(x). Fallback to runtime call included.
|
|
||||||
void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
|
|
||||||
// computes pow(x,y). Fallback to runtime call included.
|
// computes pow(x,y). Fallback to runtime call included.
|
||||||
void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
|
void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(num_fpu_regs_in_use); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@ -925,7 +925,7 @@ private:
|
|||||||
void pow_exp_core_encoding();
|
void pow_exp_core_encoding();
|
||||||
|
|
||||||
// computes pow(x,y) or exp(x). Fallback to runtime call included.
|
// computes pow(x,y) or exp(x). Fallback to runtime call included.
|
||||||
void pow_or_exp(bool is_exp, int num_fpu_regs_in_use);
|
void pow_or_exp(int num_fpu_regs_in_use);
|
||||||
|
|
||||||
// these are private because users should be doing movflt/movdbl
|
// these are private because users should be doing movflt/movdbl
|
||||||
|
|
||||||
@ -971,6 +971,10 @@ public:
|
|||||||
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
|
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
|
||||||
void movsd(XMMRegister dst, AddressLiteral src);
|
void movsd(XMMRegister dst, AddressLiteral src);
|
||||||
|
|
||||||
|
void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
|
||||||
|
void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
|
||||||
|
void mulpd(XMMRegister dst, AddressLiteral src);
|
||||||
|
|
||||||
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
|
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
|
||||||
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
|
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
|
||||||
void mulsd(XMMRegister dst, AddressLiteral src);
|
void mulsd(XMMRegister dst, AddressLiteral src);
|
||||||
|
677
hotspot/src/cpu/x86/vm/macroAssembler_x86_libm.cpp
Normal file
677
hotspot/src/cpu/x86/vm/macroAssembler_x86_libm.cpp
Normal file
@ -0,0 +1,677 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Intel Corporation.
|
||||||
|
* Intel Math Library (LIBM) Source Code
|
||||||
|
*
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
// ALGORITHM DESCRIPTION
|
||||||
|
// ---------------------
|
||||||
|
//
|
||||||
|
// Description:
|
||||||
|
// Let K = 64 (table size).
|
||||||
|
// x x/log(2) n
|
||||||
|
// e = 2 = 2 * T[j] * (1 + P(y))
|
||||||
|
// where
|
||||||
|
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
|
||||||
|
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
|
||||||
|
// j/K
|
||||||
|
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
|
||||||
|
//
|
||||||
|
// P(y) is a minimax polynomial approximation of exp(x)-1
|
||||||
|
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
|
||||||
|
//
|
||||||
|
// To avoid problems with arithmetic overflow and underflow,
|
||||||
|
// n n1 n2
|
||||||
|
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
|
||||||
|
// where BIAS is a value of exponent bias.
|
||||||
|
//
|
||||||
|
// Special cases:
|
||||||
|
// exp(NaN) = NaN
|
||||||
|
// exp(+INF) = +INF
|
||||||
|
// exp(-INF) = 0
|
||||||
|
// exp(x) = 1 for subnormals
|
||||||
|
// for finite argument, only exp(0)=1 is exact
|
||||||
|
// For IEEE double
|
||||||
|
// if x > 709.782712893383973096 then exp(x) overflow
|
||||||
|
// if x < -745.133219101941108420 then exp(x) underflow
|
||||||
|
//
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#include "precompiled.hpp"
|
||||||
|
#include "asm/assembler.hpp"
|
||||||
|
#include "asm/assembler.inline.hpp"
|
||||||
|
#include "macroAssembler_x86.hpp"
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define ALIGNED_(x) __declspec(align(x))
|
||||||
|
#else
|
||||||
|
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
|
||||||
|
ALIGNED_(16) juint _cv[] =
|
||||||
|
{
|
||||||
|
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
|
||||||
|
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
|
||||||
|
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
|
||||||
|
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
|
||||||
|
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(16) juint _shifter[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(16) juint _mmask[] =
|
||||||
|
{
|
||||||
|
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(16) juint _bias[] =
|
||||||
|
{
|
||||||
|
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(16) juint _Tbl_addr[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||||
|
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||||
|
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||||
|
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||||
|
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||||
|
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||||
|
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||||
|
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||||
|
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||||
|
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||||
|
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||||
|
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||||
|
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||||
|
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||||
|
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||||
|
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||||
|
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||||
|
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||||
|
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||||
|
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||||
|
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||||
|
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||||
|
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||||
|
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||||
|
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||||
|
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||||
|
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||||
|
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||||
|
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||||
|
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||||
|
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||||
|
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||||
|
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||||
|
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||||
|
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||||
|
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||||
|
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||||
|
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||||
|
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||||
|
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||||
|
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||||
|
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||||
|
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||||
|
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||||
|
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||||
|
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||||
|
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||||
|
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||||
|
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||||
|
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||||
|
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||||
|
0x000fa7c1UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(16) juint _ALLONES[] =
|
||||||
|
{
|
||||||
|
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(16) juint _ebias[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(4) juint _XMAX[] =
|
||||||
|
{
|
||||||
|
0xffffffffUL, 0x7fefffffUL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(4) juint _XMIN[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x00100000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(4) juint _INF[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x7ff00000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(4) juint _ZERO[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x00000000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
ALIGNED_(4) juint _ONE_val[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x3ff00000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Registers:
|
||||||
|
// input: xmm0
|
||||||
|
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||||
|
// rax, rdx, rcx, tmp - r11
|
||||||
|
|
||||||
|
// Code generated by Intel C compiler for LIBM library
|
||||||
|
|
||||||
|
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||||
|
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||||
|
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||||
|
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||||
|
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
|
||||||
|
|
||||||
|
assert_different_registers(tmp, eax, ecx, edx);
|
||||||
|
jmp(start);
|
||||||
|
address cv = (address)_cv;
|
||||||
|
address Shifter = (address)_shifter;
|
||||||
|
address mmask = (address)_mmask;
|
||||||
|
address bias = (address)_bias;
|
||||||
|
address Tbl_addr = (address)_Tbl_addr;
|
||||||
|
address ALLONES = (address)_ALLONES;
|
||||||
|
address ebias = (address)_ebias;
|
||||||
|
address XMAX = (address)_XMAX;
|
||||||
|
address XMIN = (address)_XMIN;
|
||||||
|
address INF = (address)_INF;
|
||||||
|
address ZERO = (address)_ZERO;
|
||||||
|
address ONE_val = (address)_ONE_val;
|
||||||
|
|
||||||
|
bind(start);
|
||||||
|
subq(rsp, 24);
|
||||||
|
movsd(Address(rsp, 8), xmm0);
|
||||||
|
unpcklpd(xmm0, xmm0);
|
||||||
|
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||||
|
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||||
|
movdqu(xmm2, ExternalAddress(16+cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||||
|
movdqu(xmm3, ExternalAddress(32+cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||||
|
pextrw(eax, xmm0, 3);
|
||||||
|
andl(eax, 32767);
|
||||||
|
movl(edx, 16527);
|
||||||
|
subl(edx, eax);
|
||||||
|
subl(eax, 15504);
|
||||||
|
orl(edx, eax);
|
||||||
|
cmpl(edx, INT_MIN);
|
||||||
|
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||||
|
mulpd(xmm1, xmm0);
|
||||||
|
addpd(xmm1, xmm6);
|
||||||
|
movapd(xmm7, xmm1);
|
||||||
|
subpd(xmm1, xmm6);
|
||||||
|
mulpd(xmm2, xmm1);
|
||||||
|
movdqu(xmm4, ExternalAddress(64+cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||||
|
mulpd(xmm3, xmm1);
|
||||||
|
movdqu(xmm5, ExternalAddress(80+cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||||
|
subpd(xmm0, xmm2);
|
||||||
|
movdl(eax, xmm7);
|
||||||
|
movl(ecx, eax);
|
||||||
|
andl(ecx, 63);
|
||||||
|
shll(ecx, 4);
|
||||||
|
sarl(eax, 6);
|
||||||
|
movl(edx, eax);
|
||||||
|
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||||
|
pand(xmm7, xmm6);
|
||||||
|
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||||
|
paddq(xmm7, xmm6);
|
||||||
|
psllq(xmm7, 46);
|
||||||
|
subpd(xmm0, xmm3);
|
||||||
|
lea(tmp, ExternalAddress(Tbl_addr));
|
||||||
|
movdqu(xmm2, Address(ecx,tmp));
|
||||||
|
mulpd(xmm4, xmm0);
|
||||||
|
movapd(xmm6, xmm0);
|
||||||
|
movapd(xmm1, xmm0);
|
||||||
|
mulpd(xmm6, xmm6);
|
||||||
|
mulpd(xmm0, xmm6);
|
||||||
|
addpd(xmm5, xmm4);
|
||||||
|
mulsd(xmm0, xmm6);
|
||||||
|
mulpd(xmm6, ExternalAddress(48+cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||||
|
addsd(xmm1, xmm2);
|
||||||
|
unpckhpd(xmm2, xmm2);
|
||||||
|
mulpd(xmm0, xmm5);
|
||||||
|
addsd(xmm1, xmm0);
|
||||||
|
por(xmm2, xmm7);
|
||||||
|
unpckhpd(xmm0, xmm0);
|
||||||
|
addsd(xmm0, xmm1);
|
||||||
|
addsd(xmm0, xmm6);
|
||||||
|
addl(edx, 894);
|
||||||
|
cmpl(edx, 1916);
|
||||||
|
jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||||
|
mulsd(xmm0, xmm2);
|
||||||
|
addsd(xmm0, xmm2);
|
||||||
|
jmp (B1_5);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_1_0_2);
|
||||||
|
xorpd(xmm3, xmm3);
|
||||||
|
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
||||||
|
movl(edx, -1022);
|
||||||
|
subl(edx, eax);
|
||||||
|
movdl(xmm5, edx);
|
||||||
|
psllq(xmm4, xmm5);
|
||||||
|
movl(ecx, eax);
|
||||||
|
sarl(eax, 1);
|
||||||
|
pinsrw(xmm3, eax, 3);
|
||||||
|
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
||||||
|
psllq(xmm3, 4);
|
||||||
|
psubd(xmm2, xmm3);
|
||||||
|
mulsd(xmm0, xmm2);
|
||||||
|
cmpl(edx, 52);
|
||||||
|
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
||||||
|
pand(xmm4, xmm2);
|
||||||
|
paddd(xmm3, xmm6);
|
||||||
|
subsd(xmm2, xmm4);
|
||||||
|
addsd(xmm0, xmm2);
|
||||||
|
cmpl(ecx, 1023);
|
||||||
|
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
||||||
|
pextrw(ecx, xmm0, 3);
|
||||||
|
andl(ecx, 32768);
|
||||||
|
orl(edx, ecx);
|
||||||
|
cmpl(edx, 0);
|
||||||
|
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||||
|
movapd(xmm6, xmm0);
|
||||||
|
addsd(xmm0, xmm4);
|
||||||
|
mulsd(xmm0, xmm3);
|
||||||
|
pextrw(ecx, xmm0, 3);
|
||||||
|
andl(ecx, 32752);
|
||||||
|
cmpl(ecx, 0);
|
||||||
|
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
|
||||||
|
jmp(B1_5);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_5_0_2);
|
||||||
|
mulsd(xmm6, xmm3);
|
||||||
|
mulsd(xmm4, xmm3);
|
||||||
|
movdqu(xmm0, xmm6);
|
||||||
|
pxor(xmm6, xmm4);
|
||||||
|
psrad(xmm6, 31);
|
||||||
|
pshufd(xmm6, xmm6, 85);
|
||||||
|
psllq(xmm0, 1);
|
||||||
|
psrlq(xmm0, 1);
|
||||||
|
pxor(xmm0, xmm6);
|
||||||
|
psrlq(xmm6, 63);
|
||||||
|
paddq(xmm0, xmm6);
|
||||||
|
paddq(xmm0, xmm4);
|
||||||
|
movl(Address(rsp,0), 15);
|
||||||
|
jmp(L_2TAG_PACKET_6_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_4_0_2);
|
||||||
|
addsd(xmm0, xmm4);
|
||||||
|
mulsd(xmm0, xmm3);
|
||||||
|
jmp(B1_5);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_3_0_2);
|
||||||
|
addsd(xmm0, xmm4);
|
||||||
|
mulsd(xmm0, xmm3);
|
||||||
|
pextrw(ecx, xmm0, 3);
|
||||||
|
andl(ecx, 32752);
|
||||||
|
cmpl(ecx, 32752);
|
||||||
|
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||||
|
jmp(B1_5);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_2_0_2);
|
||||||
|
paddd(xmm3, xmm6);
|
||||||
|
addpd(xmm0, xmm2);
|
||||||
|
mulsd(xmm0, xmm3);
|
||||||
|
movl(Address(rsp,0), 15);
|
||||||
|
jmp(L_2TAG_PACKET_6_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_8_0_2);
|
||||||
|
cmpl(eax, 2146435072);
|
||||||
|
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
|
||||||
|
movl(eax, Address(rsp,12));
|
||||||
|
cmpl(eax, INT_MIN);
|
||||||
|
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
|
||||||
|
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
|
||||||
|
mulsd(xmm0, xmm0);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_7_0_2);
|
||||||
|
movl(Address(rsp,0), 14);
|
||||||
|
jmp(L_2TAG_PACKET_6_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_10_0_2);
|
||||||
|
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
|
||||||
|
mulsd(xmm0, xmm0);
|
||||||
|
movl(Address(rsp,0), 15);
|
||||||
|
jmp(L_2TAG_PACKET_6_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_9_0_2);
|
||||||
|
movl(edx, Address(rsp,8));
|
||||||
|
cmpl(eax, 2146435072);
|
||||||
|
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
|
||||||
|
cmpl(edx, 0);
|
||||||
|
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||||
|
movl(eax, Address(rsp,12));
|
||||||
|
cmpl(eax, 2146435072);
|
||||||
|
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
|
||||||
|
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
|
||||||
|
jmp(B1_5);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_12_0_2);
|
||||||
|
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
|
||||||
|
jmp(B1_5);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_11_0_2);
|
||||||
|
movsd(xmm0, Address(rsp, 8));
|
||||||
|
addsd(xmm0, xmm0);
|
||||||
|
jmp(B1_5);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_0_0_2);
|
||||||
|
movl(eax, Address(rsp, 12));
|
||||||
|
andl(eax, 2147483647);
|
||||||
|
cmpl(eax, 1083179008);
|
||||||
|
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
|
||||||
|
movsd(Address(rsp, 8), xmm0);
|
||||||
|
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
|
||||||
|
jmp(B1_5);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_6_0_2);
|
||||||
|
movq(Address(rsp, 16), xmm0);
|
||||||
|
|
||||||
|
bind(B1_3);
|
||||||
|
movq(xmm0, Address(rsp, 16));
|
||||||
|
|
||||||
|
bind(B1_5);
|
||||||
|
addq(rsp, 24);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _LP64
|
||||||
|
|
||||||
|
ALIGNED_(16) juint _static_const_table[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
|
||||||
|
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
|
||||||
|
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
|
||||||
|
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
|
||||||
|
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
|
||||||
|
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
|
||||||
|
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
|
||||||
|
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
|
||||||
|
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||||
|
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||||
|
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||||
|
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||||
|
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||||
|
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||||
|
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||||
|
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||||
|
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||||
|
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||||
|
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||||
|
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||||
|
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||||
|
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||||
|
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||||
|
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||||
|
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||||
|
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||||
|
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||||
|
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||||
|
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||||
|
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||||
|
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||||
|
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||||
|
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||||
|
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||||
|
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||||
|
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||||
|
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||||
|
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||||
|
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||||
|
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||||
|
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||||
|
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||||
|
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||||
|
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||||
|
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||||
|
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||||
|
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||||
|
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||||
|
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||||
|
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||||
|
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||||
|
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||||
|
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||||
|
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||||
|
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||||
|
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||||
|
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||||
|
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||||
|
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||||
|
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
|
||||||
|
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
|
||||||
|
0x00100000UL
|
||||||
|
};
|
||||||
|
|
||||||
|
//registers,
|
||||||
|
// input: (rbp + 8)
|
||||||
|
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||||
|
// rax, rdx, rcx, rbx (tmp)
|
||||||
|
|
||||||
|
// Code generated by Intel C compiler for LIBM library
|
||||||
|
|
||||||
|
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||||
|
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||||
|
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||||
|
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||||
|
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||||
|
|
||||||
|
assert_different_registers(tmp, eax, ecx, edx);
|
||||||
|
jmp(start);
|
||||||
|
address static_const_table = (address)_static_const_table;
|
||||||
|
|
||||||
|
bind(start);
|
||||||
|
subl(rsp, 120);
|
||||||
|
movl(Address(rsp, 64), tmp);
|
||||||
|
lea(tmp, ExternalAddress(static_const_table));
|
||||||
|
movdqu(xmm0, Address(rsp, 128));
|
||||||
|
unpcklpd(xmm0, xmm0);
|
||||||
|
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||||
|
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||||
|
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||||
|
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||||
|
pextrw(eax, xmm0, 3);
|
||||||
|
andl(eax, 32767);
|
||||||
|
movl(edx, 16527);
|
||||||
|
subl(edx, eax);
|
||||||
|
subl(eax, 15504);
|
||||||
|
orl(edx, eax);
|
||||||
|
cmpl(edx, INT_MIN);
|
||||||
|
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||||
|
mulpd(xmm1, xmm0);
|
||||||
|
addpd(xmm1, xmm6);
|
||||||
|
movapd(xmm7, xmm1);
|
||||||
|
subpd(xmm1, xmm6);
|
||||||
|
mulpd(xmm2, xmm1);
|
||||||
|
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||||
|
mulpd(xmm3, xmm1);
|
||||||
|
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||||
|
subpd(xmm0, xmm2);
|
||||||
|
movdl(eax, xmm7);
|
||||||
|
movl(ecx, eax);
|
||||||
|
andl(ecx, 63);
|
||||||
|
shll(ecx, 4);
|
||||||
|
sarl(eax, 6);
|
||||||
|
movl(edx, eax);
|
||||||
|
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||||
|
pand(xmm7, xmm6);
|
||||||
|
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||||
|
paddq(xmm7, xmm6);
|
||||||
|
psllq(xmm7, 46);
|
||||||
|
subpd(xmm0, xmm3);
|
||||||
|
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
|
||||||
|
mulpd(xmm4, xmm0);
|
||||||
|
movapd(xmm6, xmm0);
|
||||||
|
movapd(xmm1, xmm0);
|
||||||
|
mulpd(xmm6, xmm6);
|
||||||
|
mulpd(xmm0, xmm6);
|
||||||
|
addpd(xmm5, xmm4);
|
||||||
|
mulsd(xmm0, xmm6);
|
||||||
|
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||||
|
addsd(xmm1, xmm2);
|
||||||
|
unpckhpd(xmm2, xmm2);
|
||||||
|
mulpd(xmm0, xmm5);
|
||||||
|
addsd(xmm1, xmm0);
|
||||||
|
por(xmm2, xmm7);
|
||||||
|
unpckhpd(xmm0, xmm0);
|
||||||
|
addsd(xmm0, xmm1);
|
||||||
|
addsd(xmm0, xmm6);
|
||||||
|
addl(edx, 894);
|
||||||
|
cmpl(edx, 1916);
|
||||||
|
jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||||
|
mulsd(xmm0, xmm2);
|
||||||
|
addsd(xmm0, xmm2);
|
||||||
|
jmp(L_2TAG_PACKET_2_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_1_0_2);
|
||||||
|
fnstcw(Address(rsp, 24));
|
||||||
|
movzwl(edx, Address(rsp, 24));
|
||||||
|
orl(edx, 768);
|
||||||
|
movw(Address(rsp, 28), edx);
|
||||||
|
fldcw(Address(rsp, 28));
|
||||||
|
movl(edx, eax);
|
||||||
|
sarl(eax, 1);
|
||||||
|
subl(edx, eax);
|
||||||
|
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
|
||||||
|
pandn(xmm6, xmm2);
|
||||||
|
addl(eax, 1023);
|
||||||
|
movdl(xmm3, eax);
|
||||||
|
psllq(xmm3, 52);
|
||||||
|
por(xmm6, xmm3);
|
||||||
|
addl(edx, 1023);
|
||||||
|
movdl(xmm4, edx);
|
||||||
|
psllq(xmm4, 52);
|
||||||
|
movsd(Address(rsp, 8), xmm0);
|
||||||
|
fld_d(Address(rsp, 8));
|
||||||
|
movsd(Address(rsp, 16), xmm6);
|
||||||
|
fld_d(Address(rsp, 16));
|
||||||
|
fmula(1);
|
||||||
|
faddp(1);
|
||||||
|
movsd(Address(rsp, 8), xmm4);
|
||||||
|
fld_d(Address(rsp, 8));
|
||||||
|
fmulp(1);
|
||||||
|
fstp_d(Address(rsp, 8));
|
||||||
|
movsd(xmm0,Address(rsp, 8));
|
||||||
|
fldcw(Address(rsp, 24));
|
||||||
|
pextrw(ecx, xmm0, 3);
|
||||||
|
andl(ecx, 32752);
|
||||||
|
cmpl(ecx, 32752);
|
||||||
|
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
||||||
|
cmpl(ecx, 0);
|
||||||
|
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||||
|
jmp(L_2TAG_PACKET_2_0_2);
|
||||||
|
cmpl(ecx, INT_MIN);
|
||||||
|
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
|
||||||
|
cmpl(ecx, -1064950997);
|
||||||
|
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
||||||
|
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
|
||||||
|
movl(edx, Address(rsp, 128));
|
||||||
|
cmpl(edx ,-17155601);
|
||||||
|
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
||||||
|
jmp(L_2TAG_PACKET_4_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_3_0_2);
|
||||||
|
movl(edx, 14);
|
||||||
|
jmp(L_2TAG_PACKET_5_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_4_0_2);
|
||||||
|
movl(edx, 15);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_5_0_2);
|
||||||
|
movsd(Address(rsp, 0), xmm0);
|
||||||
|
movsd(xmm0, Address(rsp, 128));
|
||||||
|
fld_d(Address(rsp, 0));
|
||||||
|
jmp(L_2TAG_PACKET_6_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_7_0_2);
|
||||||
|
cmpl(eax, 2146435072);
|
||||||
|
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
|
||||||
|
movl(eax, Address(rsp, 132));
|
||||||
|
cmpl(eax, INT_MIN);
|
||||||
|
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
|
||||||
|
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
|
||||||
|
mulsd(xmm0, xmm0);
|
||||||
|
movl(edx, 14);
|
||||||
|
jmp(L_2TAG_PACKET_5_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_9_0_2);
|
||||||
|
movsd(xmm0, Address(tmp, 1216));
|
||||||
|
mulsd(xmm0, xmm0);
|
||||||
|
movl(edx, 15);
|
||||||
|
jmp(L_2TAG_PACKET_5_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_8_0_2);
|
||||||
|
movl(edx, Address(rsp, 128));
|
||||||
|
cmpl(eax, 2146435072);
|
||||||
|
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
|
||||||
|
cmpl(edx, 0);
|
||||||
|
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
|
||||||
|
movl(eax, Address(rsp, 132));
|
||||||
|
cmpl(eax, 2146435072);
|
||||||
|
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||||
|
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
|
||||||
|
jmp(L_2TAG_PACKET_2_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_11_0_2);
|
||||||
|
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
|
||||||
|
jmp(L_2TAG_PACKET_2_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_10_0_2);
|
||||||
|
movsd(xmm0, Address(rsp, 128));
|
||||||
|
addsd(xmm0, xmm0);
|
||||||
|
jmp(L_2TAG_PACKET_2_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_0_0_2);
|
||||||
|
movl(eax, Address(rsp, 132));
|
||||||
|
andl(eax, 2147483647);
|
||||||
|
cmpl(eax, 1083179008);
|
||||||
|
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||||
|
movsd(xmm0, Address(rsp, 128));
|
||||||
|
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
|
||||||
|
jmp(L_2TAG_PACKET_2_0_2);
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_2_0_2);
|
||||||
|
movsd(Address(rsp, 48), xmm0);
|
||||||
|
fld_d(Address(rsp, 48));
|
||||||
|
|
||||||
|
bind(L_2TAG_PACKET_6_0_2);
|
||||||
|
movl(tmp, Address(rsp, 64));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -2134,14 +2134,6 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
__ trigfunc('t');
|
__ trigfunc('t');
|
||||||
__ ret(0);
|
__ ret(0);
|
||||||
}
|
}
|
||||||
{
|
|
||||||
StubCodeMark mark(this, "StubRoutines", "exp");
|
|
||||||
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
|
|
||||||
|
|
||||||
__ fld_d(Address(rsp, 4));
|
|
||||||
__ exp_with_fallback(0);
|
|
||||||
__ ret(0);
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
StubCodeMark mark(this, "StubRoutines", "pow");
|
StubCodeMark mark(this, "StubRoutines", "pow");
|
||||||
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
|
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
|
||||||
@ -3048,6 +3040,32 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
address generate_libmExp() {
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
const XMMRegister x0 = xmm0;
|
||||||
|
const XMMRegister x1 = xmm1;
|
||||||
|
const XMMRegister x2 = xmm2;
|
||||||
|
const XMMRegister x3 = xmm3;
|
||||||
|
|
||||||
|
const XMMRegister x4 = xmm4;
|
||||||
|
const XMMRegister x5 = xmm5;
|
||||||
|
const XMMRegister x6 = xmm6;
|
||||||
|
const XMMRegister x7 = xmm7;
|
||||||
|
|
||||||
|
const Register tmp = rbx;
|
||||||
|
|
||||||
|
BLOCK_COMMENT("Entry:");
|
||||||
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||||
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
__ ret(0);
|
||||||
|
|
||||||
|
return start;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Safefetch stubs.
|
// Safefetch stubs.
|
||||||
void generate_safefetch(const char* name, int size, address* entry,
|
void generate_safefetch(const char* name, int size, address* entry,
|
||||||
address* fault_pc, address* continuation_pc) {
|
address* fault_pc, address* continuation_pc) {
|
||||||
@ -3268,6 +3286,9 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||||
}
|
}
|
||||||
|
if (VM_Version::supports_sse2()) {
|
||||||
|
StubRoutines::_dexp = generate_libmExp();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3038,19 +3038,6 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
__ addq(rsp, 8);
|
__ addq(rsp, 8);
|
||||||
__ ret(0);
|
__ ret(0);
|
||||||
}
|
}
|
||||||
{
|
|
||||||
StubCodeMark mark(this, "StubRoutines", "exp");
|
|
||||||
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
|
|
||||||
|
|
||||||
__ subq(rsp, 8);
|
|
||||||
__ movdbl(Address(rsp, 0), xmm0);
|
|
||||||
__ fld_d(Address(rsp, 0));
|
|
||||||
__ exp_with_fallback(0);
|
|
||||||
__ fstp_d(Address(rsp, 0));
|
|
||||||
__ movdbl(xmm0, Address(rsp, 0));
|
|
||||||
__ addq(rsp, 8);
|
|
||||||
__ ret(0);
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
StubCodeMark mark(this, "StubRoutines", "pow");
|
StubCodeMark mark(this, "StubRoutines", "pow");
|
||||||
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
|
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
|
||||||
@ -4180,6 +4167,44 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
address generate_libmExp() {
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
const XMMRegister x0 = xmm0;
|
||||||
|
const XMMRegister x1 = xmm1;
|
||||||
|
const XMMRegister x2 = xmm2;
|
||||||
|
const XMMRegister x3 = xmm3;
|
||||||
|
|
||||||
|
const XMMRegister x4 = xmm4;
|
||||||
|
const XMMRegister x5 = xmm5;
|
||||||
|
const XMMRegister x6 = xmm6;
|
||||||
|
const XMMRegister x7 = xmm7;
|
||||||
|
|
||||||
|
const Register tmp = r11;
|
||||||
|
|
||||||
|
BLOCK_COMMENT("Entry:");
|
||||||
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
|
||||||
|
#ifdef _WIN64
|
||||||
|
// save the xmm registers which must be preserved 6-7
|
||||||
|
__ movdqu(xmm_save(6), as_XMMRegister(6));
|
||||||
|
__ movdqu(xmm_save(7), as_XMMRegister(7));
|
||||||
|
#endif
|
||||||
|
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||||
|
|
||||||
|
#ifdef _WIN64
|
||||||
|
// restore xmm regs belonging to calling function
|
||||||
|
__ movdqu(as_XMMRegister(6), xmm_save(6));
|
||||||
|
__ movdqu(as_XMMRegister(7), xmm_save(7));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
__ ret(0);
|
||||||
|
|
||||||
|
return start;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#undef __
|
#undef __
|
||||||
#define __ masm->
|
#define __ masm->
|
||||||
@ -4367,6 +4392,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||||
}
|
}
|
||||||
|
StubRoutines::_dexp = generate_libmExp();
|
||||||
}
|
}
|
||||||
|
|
||||||
void generate_all() {
|
void generate_all() {
|
||||||
|
@ -9911,35 +9911,6 @@ instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXR
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
|
||||||
instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
|
|
||||||
predicate (UseSSE<=1);
|
|
||||||
match(Set dpr1 (ExpD dpr1));
|
|
||||||
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
|
|
||||||
format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %}
|
|
||||||
ins_encode %{
|
|
||||||
__ fast_exp();
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
|
|
||||||
predicate (UseSSE>=2);
|
|
||||||
match(Set dst (ExpD src));
|
|
||||||
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
|
|
||||||
format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %}
|
|
||||||
ins_encode %{
|
|
||||||
__ subptr(rsp, 8);
|
|
||||||
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
|
|
||||||
__ fld_d(Address(rsp, 0));
|
|
||||||
__ fast_exp();
|
|
||||||
__ fstp_d(Address(rsp, 0));
|
|
||||||
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
|
|
||||||
__ addptr(rsp, 8);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
|
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
|
||||||
predicate (UseSSE<=1);
|
predicate (UseSSE<=1);
|
||||||
// The source Double operand on FPU stack
|
// The source Double operand on FPU stack
|
||||||
|
@ -9898,22 +9898,6 @@ instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rc
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
|
|
||||||
match(Set dst (ExpD src));
|
|
||||||
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
|
|
||||||
format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %}
|
|
||||||
ins_encode %{
|
|
||||||
__ subptr(rsp, 8);
|
|
||||||
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
|
|
||||||
__ fld_d(Address(rsp, 0));
|
|
||||||
__ fast_exp();
|
|
||||||
__ fstp_d(Address(rsp, 0));
|
|
||||||
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
|
|
||||||
__ addptr(rsp, 8);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
//----------Arithmetic Conversion Instructions---------------------------------
|
//----------Arithmetic Conversion Instructions---------------------------------
|
||||||
|
|
||||||
instruct roundFloat_nop(regF dst)
|
instruct roundFloat_nop(regF dst)
|
||||||
|
@ -4006,7 +4006,6 @@ int MatchRule::is_expensive() const {
|
|||||||
strcmp(opType,"DivD")==0 ||
|
strcmp(opType,"DivD")==0 ||
|
||||||
strcmp(opType,"DivF")==0 ||
|
strcmp(opType,"DivF")==0 ||
|
||||||
strcmp(opType,"DivI")==0 ||
|
strcmp(opType,"DivI")==0 ||
|
||||||
strcmp(opType,"ExpD")==0 ||
|
|
||||||
strcmp(opType,"LogD")==0 ||
|
strcmp(opType,"LogD")==0 ||
|
||||||
strcmp(opType,"Log10D")==0 ||
|
strcmp(opType,"Log10D")==0 ||
|
||||||
strcmp(opType,"ModD")==0 ||
|
strcmp(opType,"ModD")==0 ||
|
||||||
|
@ -732,8 +732,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
|
|||||||
case lir_sin:
|
case lir_sin:
|
||||||
case lir_cos:
|
case lir_cos:
|
||||||
case lir_log:
|
case lir_log:
|
||||||
case lir_log10:
|
case lir_log10: {
|
||||||
case lir_exp: {
|
|
||||||
assert(op->as_Op2() != NULL, "must be");
|
assert(op->as_Op2() != NULL, "must be");
|
||||||
LIR_Op2* op2 = (LIR_Op2*)op;
|
LIR_Op2* op2 = (LIR_Op2*)op;
|
||||||
|
|
||||||
@ -743,9 +742,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
|
|||||||
// overlap with the input.
|
// overlap with the input.
|
||||||
assert(op2->_info == NULL, "not used");
|
assert(op2->_info == NULL, "not used");
|
||||||
assert(op2->_tmp5->is_illegal(), "not used");
|
assert(op2->_tmp5->is_illegal(), "not used");
|
||||||
assert(op2->_tmp2->is_valid() == (op->code() == lir_exp), "not used");
|
|
||||||
assert(op2->_tmp3->is_valid() == (op->code() == lir_exp), "not used");
|
|
||||||
assert(op2->_tmp4->is_valid() == (op->code() == lir_exp), "not used");
|
|
||||||
assert(op2->_opr1->is_valid(), "used");
|
assert(op2->_opr1->is_valid(), "used");
|
||||||
do_input(op2->_opr1); do_temp(op2->_opr1);
|
do_input(op2->_opr1); do_temp(op2->_opr1);
|
||||||
|
|
||||||
@ -1775,7 +1771,6 @@ const char * LIR_Op::name() const {
|
|||||||
case lir_tan: s = "tan"; break;
|
case lir_tan: s = "tan"; break;
|
||||||
case lir_log: s = "log"; break;
|
case lir_log: s = "log"; break;
|
||||||
case lir_log10: s = "log10"; break;
|
case lir_log10: s = "log10"; break;
|
||||||
case lir_exp: s = "exp"; break;
|
|
||||||
case lir_pow: s = "pow"; break;
|
case lir_pow: s = "pow"; break;
|
||||||
case lir_logic_and: s = "logic_and"; break;
|
case lir_logic_and: s = "logic_and"; break;
|
||||||
case lir_logic_or: s = "logic_or"; break;
|
case lir_logic_or: s = "logic_or"; break;
|
||||||
|
@ -961,7 +961,6 @@ enum LIR_Code {
|
|||||||
, lir_tan
|
, lir_tan
|
||||||
, lir_log
|
, lir_log
|
||||||
, lir_log10
|
, lir_log10
|
||||||
, lir_exp
|
|
||||||
, lir_pow
|
, lir_pow
|
||||||
, lir_logic_and
|
, lir_logic_and
|
||||||
, lir_logic_or
|
, lir_logic_or
|
||||||
@ -2199,7 +2198,6 @@ class LIR_List: public CompilationResourceObj {
|
|||||||
void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); }
|
void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); }
|
||||||
void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); }
|
void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); }
|
||||||
void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); }
|
void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); }
|
||||||
void exp (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_exp , from, tmp1, to, tmp2, tmp3, tmp4, tmp5)); }
|
|
||||||
void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); }
|
void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); }
|
||||||
|
|
||||||
void add (LIR_Opr left, LIR_Opr right, LIR_Opr res) { append(new LIR_Op2(lir_add, left, right, res)); }
|
void add (LIR_Opr left, LIR_Opr right, LIR_Opr res) { append(new LIR_Op2(lir_add, left, right, res)); }
|
||||||
|
@ -739,7 +739,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
|
|||||||
case lir_cos:
|
case lir_cos:
|
||||||
case lir_log:
|
case lir_log:
|
||||||
case lir_log10:
|
case lir_log10:
|
||||||
case lir_exp:
|
|
||||||
case lir_pow:
|
case lir_pow:
|
||||||
intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
|
intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
|
||||||
break;
|
break;
|
||||||
|
@ -244,6 +244,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
|
|||||||
void do_getClass(Intrinsic* x);
|
void do_getClass(Intrinsic* x);
|
||||||
void do_currentThread(Intrinsic* x);
|
void do_currentThread(Intrinsic* x);
|
||||||
void do_MathIntrinsic(Intrinsic* x);
|
void do_MathIntrinsic(Intrinsic* x);
|
||||||
|
void do_ExpIntrinsic(Intrinsic* x);
|
||||||
void do_ArrayCopy(Intrinsic* x);
|
void do_ArrayCopy(Intrinsic* x);
|
||||||
void do_CompareAndSwap(Intrinsic* x, ValueType* type);
|
void do_CompareAndSwap(Intrinsic* x, ValueType* type);
|
||||||
void do_NIOCheckIndex(Intrinsic* x);
|
void do_NIOCheckIndex(Intrinsic* x);
|
||||||
|
@ -6588,7 +6588,6 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
|
|||||||
case lir_log10:
|
case lir_log10:
|
||||||
case lir_log:
|
case lir_log:
|
||||||
case lir_pow:
|
case lir_pow:
|
||||||
case lir_exp:
|
|
||||||
case lir_logic_and:
|
case lir_logic_and:
|
||||||
case lir_logic_or:
|
case lir_logic_or:
|
||||||
case lir_logic_xor:
|
case lir_logic_xor:
|
||||||
|
@ -317,6 +317,7 @@ const char* Runtime1::name_for_address(address entry) {
|
|||||||
FUNCTION_CASE(entry, TRACE_TIME_METHOD);
|
FUNCTION_CASE(entry, TRACE_TIME_METHOD);
|
||||||
#endif
|
#endif
|
||||||
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
|
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
|
||||||
|
FUNCTION_CASE(entry, StubRoutines::dexp());
|
||||||
|
|
||||||
#undef FUNCTION_CASE
|
#undef FUNCTION_CASE
|
||||||
|
|
||||||
|
@ -131,7 +131,6 @@ macro(DivModL)
|
|||||||
macro(EncodeISOArray)
|
macro(EncodeISOArray)
|
||||||
macro(EncodeP)
|
macro(EncodeP)
|
||||||
macro(EncodePKlass)
|
macro(EncodePKlass)
|
||||||
macro(ExpD)
|
|
||||||
macro(FastLock)
|
macro(FastLock)
|
||||||
macro(FastUnlock)
|
macro(FastUnlock)
|
||||||
macro(Goto)
|
macro(Goto)
|
||||||
|
@ -222,7 +222,6 @@ class LibraryCallKit : public GraphKit {
|
|||||||
bool inline_math_negateExactL();
|
bool inline_math_negateExactL();
|
||||||
bool inline_math_subtractExactI(bool is_decrement);
|
bool inline_math_subtractExactI(bool is_decrement);
|
||||||
bool inline_math_subtractExactL(bool is_decrement);
|
bool inline_math_subtractExactL(bool is_decrement);
|
||||||
bool inline_exp();
|
|
||||||
bool inline_pow();
|
bool inline_pow();
|
||||||
Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
|
Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
|
||||||
bool inline_min_max(vmIntrinsics::ID id);
|
bool inline_min_max(vmIntrinsics::ID id);
|
||||||
@ -1535,20 +1534,6 @@ Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeF
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------inline_exp-------------------------------------
|
|
||||||
// Inline exp instructions, if possible. The Intel hardware only misses
|
|
||||||
// really odd corner cases (+/- Infinity). Just uncommon-trap them.
|
|
||||||
bool LibraryCallKit::inline_exp() {
|
|
||||||
Node* arg = round_double_node(argument(0));
|
|
||||||
Node* n = _gvn.transform(new ExpDNode(C, control(), arg));
|
|
||||||
|
|
||||||
n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
|
|
||||||
set_result(n);
|
|
||||||
|
|
||||||
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------inline_pow-------------------------------------
|
//------------------------------inline_pow-------------------------------------
|
||||||
// Inline power instructions, if possible.
|
// Inline power instructions, if possible.
|
||||||
bool LibraryCallKit::inline_pow() {
|
bool LibraryCallKit::inline_pow() {
|
||||||
@ -1776,7 +1761,8 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
|||||||
case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
|
case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
|
||||||
case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false;
|
case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false;
|
||||||
|
|
||||||
case vmIntrinsics::_dexp: return Matcher::has_match_rule(Op_ExpD) ? inline_exp() :
|
case vmIntrinsics::_dexp:
|
||||||
|
return (UseSSE >= 2) ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dexp(), "dexp") :
|
||||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp), "EXP");
|
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp), "EXP");
|
||||||
case vmIntrinsics::_dpow: return Matcher::has_match_rule(Op_PowD) ? inline_pow() :
|
case vmIntrinsics::_dpow: return Matcher::has_match_rule(Op_PowD) ? inline_pow() :
|
||||||
runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow), "POW");
|
runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow), "POW");
|
||||||
|
@ -1530,18 +1530,6 @@ const Type *Log10DNode::Value( PhaseTransform *phase ) const {
|
|||||||
return TypeD::make( StubRoutines::intrinsic_log10( d ) );
|
return TypeD::make( StubRoutines::intrinsic_log10( d ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
//=============================================================================
|
|
||||||
//------------------------------Value------------------------------------------
|
|
||||||
// Compute exp
|
|
||||||
const Type *ExpDNode::Value( PhaseTransform *phase ) const {
|
|
||||||
const Type *t1 = phase->type( in(1) );
|
|
||||||
if( t1 == Type::TOP ) return Type::TOP;
|
|
||||||
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
|
|
||||||
double d = t1->getd();
|
|
||||||
return TypeD::make( StubRoutines::intrinsic_exp( d ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
//------------------------------Value------------------------------------------
|
//------------------------------Value------------------------------------------
|
||||||
// Compute pow
|
// Compute pow
|
||||||
|
@ -477,20 +477,6 @@ public:
|
|||||||
virtual const Type *Value( PhaseTransform *phase ) const;
|
virtual const Type *Value( PhaseTransform *phase ) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
//------------------------------ExpDNode---------------------------------------
|
|
||||||
// Exponentiate a double
|
|
||||||
class ExpDNode : public Node {
|
|
||||||
public:
|
|
||||||
ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
|
|
||||||
init_flags(Flag_is_expensive);
|
|
||||||
C->add_expensive_node(this);
|
|
||||||
}
|
|
||||||
virtual int Opcode() const;
|
|
||||||
const Type *bottom_type() const { return Type::DOUBLE; }
|
|
||||||
virtual uint ideal_reg() const { return Op_RegD; }
|
|
||||||
virtual const Type *Value( PhaseTransform *phase ) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
//------------------------------LogDNode---------------------------------------
|
//------------------------------LogDNode---------------------------------------
|
||||||
// Log_e of a double
|
// Log_e of a double
|
||||||
class LogDNode : public Node {
|
class LogDNode : public Node {
|
||||||
|
@ -148,9 +148,10 @@ address StubRoutines::_mulAdd = NULL;
|
|||||||
address StubRoutines::_montgomeryMultiply = NULL;
|
address StubRoutines::_montgomeryMultiply = NULL;
|
||||||
address StubRoutines::_montgomerySquare = NULL;
|
address StubRoutines::_montgomerySquare = NULL;
|
||||||
|
|
||||||
|
address StubRoutines::_dexp = NULL;
|
||||||
|
|
||||||
double (* StubRoutines::_intrinsic_log )(double) = NULL;
|
double (* StubRoutines::_intrinsic_log )(double) = NULL;
|
||||||
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
|
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
|
||||||
double (* StubRoutines::_intrinsic_exp )(double) = NULL;
|
|
||||||
double (* StubRoutines::_intrinsic_pow )(double, double) = NULL;
|
double (* StubRoutines::_intrinsic_pow )(double, double) = NULL;
|
||||||
double (* StubRoutines::_intrinsic_sin )(double) = NULL;
|
double (* StubRoutines::_intrinsic_sin )(double) = NULL;
|
||||||
double (* StubRoutines::_intrinsic_cos )(double) = NULL;
|
double (* StubRoutines::_intrinsic_cos )(double) = NULL;
|
||||||
|
@ -207,6 +207,8 @@ class StubRoutines: AllStatic {
|
|||||||
static address _montgomeryMultiply;
|
static address _montgomeryMultiply;
|
||||||
static address _montgomerySquare;
|
static address _montgomerySquare;
|
||||||
|
|
||||||
|
static address _dexp;
|
||||||
|
|
||||||
// These are versions of the java.lang.Math methods which perform
|
// These are versions of the java.lang.Math methods which perform
|
||||||
// the same operations as the intrinsic version. They are used for
|
// the same operations as the intrinsic version. They are used for
|
||||||
// constant folding in the compiler to ensure equivalence. If the
|
// constant folding in the compiler to ensure equivalence. If the
|
||||||
@ -215,7 +217,6 @@ class StubRoutines: AllStatic {
|
|||||||
// SharedRuntime.
|
// SharedRuntime.
|
||||||
static double (*_intrinsic_log)(double);
|
static double (*_intrinsic_log)(double);
|
||||||
static double (*_intrinsic_log10)(double);
|
static double (*_intrinsic_log10)(double);
|
||||||
static double (*_intrinsic_exp)(double);
|
|
||||||
static double (*_intrinsic_pow)(double, double);
|
static double (*_intrinsic_pow)(double, double);
|
||||||
static double (*_intrinsic_sin)(double);
|
static double (*_intrinsic_sin)(double);
|
||||||
static double (*_intrinsic_cos)(double);
|
static double (*_intrinsic_cos)(double);
|
||||||
@ -375,6 +376,8 @@ class StubRoutines: AllStatic {
|
|||||||
static address montgomeryMultiply() { return _montgomeryMultiply; }
|
static address montgomeryMultiply() { return _montgomeryMultiply; }
|
||||||
static address montgomerySquare() { return _montgomerySquare; }
|
static address montgomerySquare() { return _montgomerySquare; }
|
||||||
|
|
||||||
|
static address dexp() {return _dexp; }
|
||||||
|
|
||||||
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
||||||
|
|
||||||
static address zero_aligned_words() { return _zero_aligned_words; }
|
static address zero_aligned_words() { return _zero_aligned_words; }
|
||||||
@ -387,10 +390,6 @@ class StubRoutines: AllStatic {
|
|||||||
assert(_intrinsic_log != NULL, "must be defined");
|
assert(_intrinsic_log != NULL, "must be defined");
|
||||||
return _intrinsic_log10(d);
|
return _intrinsic_log10(d);
|
||||||
}
|
}
|
||||||
static double intrinsic_exp(double d) {
|
|
||||||
assert(_intrinsic_exp != NULL, "must be defined");
|
|
||||||
return _intrinsic_exp(d);
|
|
||||||
}
|
|
||||||
static double intrinsic_pow(double d, double d2) {
|
static double intrinsic_pow(double d, double d2) {
|
||||||
assert(_intrinsic_pow != NULL, "must be defined");
|
assert(_intrinsic_pow != NULL, "must be defined");
|
||||||
return _intrinsic_pow(d, d2);
|
return _intrinsic_pow(d, d2);
|
||||||
|
@ -837,6 +837,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
|||||||
static_field(StubRoutines, _multiplyToLen, address) \
|
static_field(StubRoutines, _multiplyToLen, address) \
|
||||||
static_field(StubRoutines, _squareToLen, address) \
|
static_field(StubRoutines, _squareToLen, address) \
|
||||||
static_field(StubRoutines, _mulAdd, address) \
|
static_field(StubRoutines, _mulAdd, address) \
|
||||||
|
static_field(StubRoutines, _dexp, address) \
|
||||||
\
|
\
|
||||||
/*****************/ \
|
/*****************/ \
|
||||||
/* SharedRuntime */ \
|
/* SharedRuntime */ \
|
||||||
@ -1992,7 +1993,6 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
|||||||
declare_c2_type(TanDNode, Node) \
|
declare_c2_type(TanDNode, Node) \
|
||||||
declare_c2_type(AtanDNode, Node) \
|
declare_c2_type(AtanDNode, Node) \
|
||||||
declare_c2_type(SqrtDNode, Node) \
|
declare_c2_type(SqrtDNode, Node) \
|
||||||
declare_c2_type(ExpDNode, Node) \
|
|
||||||
declare_c2_type(LogDNode, Node) \
|
declare_c2_type(LogDNode, Node) \
|
||||||
declare_c2_type(Log10DNode, Node) \
|
declare_c2_type(Log10DNode, Node) \
|
||||||
declare_c2_type(PowDNode, Node) \
|
declare_c2_type(PowDNode, Node) \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user