8292638: x86: Improve scratch register handling in VM stubs

Co-authored-by: Aleksey Shipilev <shade@openjdk.org>
Reviewed-by: kvn, shade
This commit is contained in:
Vladimir Ivanov 2022-08-23 19:32:51 +00:00
parent d24b7b7026
commit f3be6731d3
14 changed files with 371 additions and 507 deletions

View File

@ -12165,81 +12165,93 @@ void Assembler::set_byte_if_not_zero(Register dst) {
#else // LP64
// 64bit only pieces of the assembler
void Assembler::set_byte_if_not_zero(Register dst) {
int enc = prefix_and_encode(dst->encoding(), true);
emit_int24(0x0F, (unsigned char)0x95, (0xC0 | enc));
}
// 64bit only pieces of the assembler
// This should only be used by 64bit instructions that can use rip-relative
// it cannot be used by instructions that want an immediate value.
bool Assembler::reachable(AddressLiteral adr) {
int64_t disp;
relocInfo::relocType relocType = adr.reloc();
// Determine whether an address is always reachable in rip-relative addressing mode
// when accessed from the code cache.
static bool is_always_reachable(address target, relocInfo::relocType reloc_type) {
switch (reloc_type) {
// This should be rip-relative and easily reachable.
case relocInfo::internal_word_type: {
return true;
}
// This should be rip-relative within the code cache and easily
// reachable until we get huge code caches. (At which point
// IC code is going to have issues).
case relocInfo::virtual_call_type:
case relocInfo::opt_virtual_call_type:
case relocInfo::static_call_type:
case relocInfo::static_stub_type: {
return true;
}
case relocInfo::runtime_call_type:
case relocInfo::external_word_type:
case relocInfo::poll_return_type: // these are really external_word but need special
case relocInfo::poll_type: { // relocs to identify them
return CodeCache::contains(target);
}
default: {
return false;
}
}
}
// Determine whether an address is reachable in rip-relative addressing mode from the code cache.
static bool is_reachable(address target, relocInfo::relocType reloc_type) {
if (is_always_reachable(target, reloc_type)) {
return true;
}
switch (reloc_type) {
// None will force a 64bit literal to the code stream. Likely a placeholder
// for something that will be patched later and we need to certain it will
// always be reachable.
if (relocType == relocInfo::none) {
case relocInfo::none: {
return false;
}
if (relocType == relocInfo::internal_word_type) {
// This should be rip relative and easily reachable.
return true;
}
if (relocType == relocInfo::virtual_call_type ||
relocType == relocInfo::opt_virtual_call_type ||
relocType == relocInfo::static_call_type ||
relocType == relocInfo::static_stub_type ) {
// This should be rip relative within the code cache and easily
// reachable until we get huge code caches. (At which point
// ic code is going to have issues).
return true;
}
if (relocType != relocInfo::external_word_type &&
relocType != relocInfo::poll_return_type && // these are really external_word but need special
relocType != relocInfo::poll_type && // relocs to identify them
relocType != relocInfo::runtime_call_type ) {
return false;
}
// Stress the correction code
case relocInfo::runtime_call_type:
case relocInfo::external_word_type:
case relocInfo::poll_return_type: // these are really external_word but need special
case relocInfo::poll_type: { // relocs to identify them
assert(!CodeCache::contains(target), "always reachable");
if (ForceUnreachable) {
// Must be runtimecall reloc, see if it is in the codecache
// Flipping stuff in the codecache to be unreachable causes issues
// with things like inline caches where the additional instructions
// are not handled.
if (CodeCache::find_blob(adr._target) == NULL) {
return false;
}
return false; // stress the correction code
}
// For external_word_type/runtime_call_type if it is reachable from where we
// are now (possibly a temp buffer) and where we might end up
// anywhere in the codeCache then we are always reachable.
// This would have to change if we ever save/restore shared code
// to be more pessimistic.
disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
if (!is_simm32(disp)) return false;
disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
if (!is_simm32(disp)) return false;
disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
// Because rip relative is a disp + address_of_next_instruction and we
// don't know the value of address_of_next_instruction we apply a fudge factor
// to make sure we will be ok no matter the size of the instruction we get placed into.
// We don't have to fudge the checks above here because they are already worst case.
// 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
// + 4 because better safe than sorry.
const int fudge = 12 + 4;
if (disp < 0) {
disp -= fudge;
} else {
disp += fudge;
// anywhere in the code cache then we are always reachable.
// This would have to change if we ever save/restore shared code to be more pessimistic.
// Code buffer has to be allocated in the code cache, so check against
// code cache boundaries cover that case.
//
// In rip-relative addressing mode, an effective address is formed by adding displacement
// to the 64-bit RIP of the next instruction which is not known yet. Considering target address
// is guaranteed to be outside of the code cache, checking against code cache boundaries is enough
// to account for that.
return Assembler::is_simm32(target - CodeCache::low_bound()) &&
Assembler::is_simm32(target - CodeCache::high_bound());
}
return is_simm32(disp);
default: {
return false;
}
}
}
bool Assembler::reachable(AddressLiteral adr) {
assert(CodeCache::contains(pc()), "required");
return is_reachable(adr.target(), adr.reloc());
}
bool Assembler::always_reachable(AddressLiteral adr) {
assert(CodeCache::contains(pc()), "required");
return is_always_reachable(adr.target(), adr.reloc());
}
void Assembler::emit_data64(jlong data,

View File

@ -802,16 +802,18 @@ private:
void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
protected:
#ifdef ASSERT
#ifdef ASSERT
void check_relocation(RelocationHolder const& rspec, int format);
#endif
#endif
void emit_data(jint data, relocInfo::relocType rtype, int format);
void emit_data(jint data, RelocationHolder const& rspec, int format);
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
bool always_reachable(AddressLiteral adr) NOT_LP64( { return true; } );
bool reachable(AddressLiteral adr) NOT_LP64( { return true; } );
// These are all easily abused and hence protected

View File

@ -520,14 +520,15 @@ void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
}
void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
void MacroAssembler::cmp64(Register src1, AddressLiteral src2, Register rscratch) {
assert(!src2.is_lval(), "should use cmpptr");
assert(rscratch != noreg || always_reachable(src2), "missing");
if (reachable(src2)) {
cmpq(src1, as_Address(src2));
} else {
lea(rscratch1, src2);
Assembler::cmpq(src1, Address(rscratch1, 0));
lea(rscratch, src2);
Assembler::cmpq(src1, Address(rscratch, 0));
}
}
@ -1122,30 +1123,36 @@ void MacroAssembler::addptr(Address dst, Register src) {
LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
}
void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::addsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::addsd(dst, Address(rscratch1, 0));
lea(rscratch, src);
Assembler::addsd(dst, Address(rscratch, 0));
}
}
void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::addss(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
addss(dst, as_Address(src));
} else {
lea(rscratch1, src);
addss(dst, Address(rscratch1, 0));
lea(rscratch, src);
addss(dst, Address(rscratch, 0));
}
}
void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::addpd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::addpd(dst, Address(rscratch1, 0));
lea(rscratch, src);
Assembler::addpd(dst, Address(rscratch, 0));
}
}
@ -2124,12 +2131,13 @@ void MacroAssembler::empty_FPU_stack() {
}
#endif // !LP64
void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::mulpd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::mulpd(dst, Address(rscratch1, 0));
lea(rscratch, src);
Assembler::mulpd(dst, Address(rscratch, 0));
}
}
@ -2469,21 +2477,23 @@ void MacroAssembler::movbyte(ArrayAddress dst, int src) {
movb(as_Address(dst), src);
}
void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
movdl(dst, as_Address(src));
} else {
lea(rscratch1, src);
movdl(dst, Address(rscratch1, 0));
lea(rscratch, src);
movdl(dst, Address(rscratch, 0));
}
}
void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::movq(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
movq(dst, as_Address(src));
} else {
lea(rscratch1, src);
movq(dst, Address(rscratch1, 0));
lea(rscratch, src);
movq(dst, Address(rscratch, 0));
}
}
@ -2683,16 +2693,20 @@ void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral s
}
void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
lea(rscratch, src);
Assembler::evmovdquq(dst, mask, Address(rscratch, 0), merge, vector_len);
}
}
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::evmovdquq(dst, as_Address(src), vector_len);
} else {
@ -2710,12 +2724,14 @@ void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
}
}
void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::movsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::movsd(dst, Address(rscratch1, 0));
lea(rscratch, src);
Assembler::movsd(dst, Address(rscratch, 0));
}
}
@ -2746,12 +2762,14 @@ void MacroAssembler::vmovddup(XMMRegister dst, AddressLiteral src, int vector_le
}
}
void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::mulsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::mulsd(dst, Address(rscratch1, 0));
lea(rscratch, src);
Assembler::mulsd(dst, Address(rscratch, 0));
}
}
@ -3246,6 +3264,8 @@ void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src
void MacroAssembler::vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
assert(UseAVX > 0, "requires some form of AVX");
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::vpaddd(dst, nds, as_Address(src), vector_len);
} else {

View File

@ -762,7 +762,7 @@ public:
void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
// cmp64 to avoild hiding cmpq
void cmp64(Register src1, AddressLiteral src);
void cmp64(Register src1, AddressLiteral src, Register rscratch = rscratch1);
void cmpxchgptr(Register reg, Address adr);
@ -1055,7 +1055,7 @@ public:
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register r11);
Register rax, Register rcx, Register rdx, Register r11, Register tmp);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
@ -1067,12 +1067,14 @@ public:
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
Register rax, Register rcx, Register rdx, Register r8,
Register r9, Register r10, Register r11, Register tmp);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
Register rax, Register rcx, Register rdx, Register r8,
Register r9, Register r10, Register r11, Register tmp);
#else
private:
// Initialized in macroAssembler_x86_constants.cpp
@ -1135,15 +1137,15 @@ public:
void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, AddressLiteral src);
void addsd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, AddressLiteral src);
void addss(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); }
void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
void addpd(XMMRegister dst, AddressLiteral src);
void addpd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
using Assembler::vbroadcastsd;
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
@ -1249,34 +1251,34 @@ public:
}
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() != src->encoding() || mask != k0) {
Assembler::evmovdquq(dst, mask, src, merge, vector_len);
}
}
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
// Move Aligned Double Quadword
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); }
void movdqa(XMMRegister dst, AddressLiteral src);
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, AddressLiteral src);
void movsd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, AddressLiteral src);
void mulpd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, AddressLiteral src);
void mulsd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
@ -1369,7 +1371,7 @@ public:
void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch);
void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
@ -1888,8 +1890,8 @@ public:
// they will be hidden by the following overriding declaration.
using Assembler::movdl;
using Assembler::movq;
void movdl(XMMRegister dst, AddressLiteral src);
void movq(XMMRegister dst, AddressLiteral src);
void movdl(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void movq (XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
// Can push value or effective address
void pushptr(AddressLiteral src);

View File

@ -178,15 +178,14 @@
// The 64 bit code is at most SSE2 compliant
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx,
Register r8, Register r9, Register r10, Register r11) {
Register eax, Register ecx, Register edx, Register r8,
Register r9, Register r10, Register r11, Register tmp) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_4, start;
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
assert_different_registers(eax, ecx, edx, r8, r9, r10, r11, tmp);
bind(start);
push(rbx);
@ -195,33 +194,33 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(B1_2);
movl(eax, Address(rsp, 12));
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
movq(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x40245f30UL
andl(eax, 2147418112);
subl(eax, 808452096);
cmpl(eax, 281346048);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
mulsd(xmm1, xmm0);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
movdqu(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2), tmp /*rscratch*/); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54400000UL, 0x3fb921fbUL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addq(rdx, 1865232);
movdqu(xmm4, xmm0);
andq(rdx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
movdqu(xmm5, ExternalAddress(SC_4), tmp /*rscratch*/); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shlq(rdx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
mulsd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
@ -229,7 +228,7 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
movdqu(xmm6, ExternalAddress(SC_2), tmp /*rscratch*/); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
@ -245,9 +244,9 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
addpd(xmm5, ExternalAddress(SC_3), tmp /*rscratch*/); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
addpd(xmm6, ExternalAddress(SC_1), tmp /*rscratch*/); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
@ -275,7 +274,7 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
movq(xmm1, ExternalAddress(ONE), tmp /*rscratch*/); // 0x00000000UL, 0x3ff00000UL
subsd(xmm1, xmm0);
movdqu(xmm0, xmm1);
jmp(B1_4);
@ -423,8 +422,8 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
orl(edx, rsi);
xorl(edx, rbx);
pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(PI_4 + 8)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm2, ExternalAddress(PI_4), tmp /*rscratch*/); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(PI_4 + 8), tmp /*rscratch*/); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
xorpd(xmm5, xmm5);
subl(edx, 1008);
pinsrw(xmm5, edx, 3);
@ -448,17 +447,17 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm6, xmm2);
bind(L_2TAG_PACKET_11_0_1);
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
movq(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x40245f30UL
mulsd(xmm1, xmm0);
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
movq(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2siq(rdx, xmm1);
cvtsi2sdq(xmm1, rdx);
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2), tmp /*rscratch*/); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
shll(eax, 3);
@ -466,13 +465,13 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm4, xmm0);
addl(edx, eax);
andl(edx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
movdqu(xmm5, ExternalAddress(SC_4), tmp /*rscratch*/); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shll(edx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
mulsd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
@ -491,15 +490,15 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
subsd(xmm1, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
movdqu(xmm6, ExternalAddress(SC_2), tmp /*rscratch*/); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
addpd(xmm5, ExternalAddress(SC_3), tmp /*rscratch*/); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
addpd(xmm6, ExternalAddress(SC_1), tmp /*rscratch*/); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
@ -602,7 +601,7 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_2_0_1);
movsd(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
mulsd(xmm0, ExternalAddress(NEG_ZERO), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_13_0_1);

View File

@ -200,10 +200,10 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subq(rsp, 24);
movsd(Address(rsp, 8), xmm0);
unpcklpd(xmm0, xmm0);
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm2, ExternalAddress(cv + 16)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, ExternalAddress(cv + 32)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
movdqu(xmm6, ExternalAddress(SHIFTER)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm1, ExternalAddress(cv), tmp /*rscratch*/); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm2, ExternalAddress(cv + 16), tmp /*rscratch*/); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, ExternalAddress(cv + 32), tmp /*rscratch*/); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
movdqu(xmm6, ExternalAddress(SHIFTER), tmp /*rscratch*/); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
@ -217,9 +217,9 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
movdqu(xmm4, ExternalAddress(cv + 64), tmp /*rscratch*/); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
movdqu(xmm5, ExternalAddress(cv + 80), tmp /*rscratch*/); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
@ -227,9 +227,9 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
movdqu(xmm6, ExternalAddress(mmask), tmp /*rscratch*/); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
movdqu(xmm6, ExternalAddress(bias), tmp /*rscratch*/); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
@ -242,7 +242,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
mulpd(xmm6, ExternalAddress(cv + 48), tmp /*rscratch*/); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
@ -260,7 +260,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_1_0_2);
xorpd(xmm3, xmm3);
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
movdqu(xmm4, ExternalAddress(ALLONES), tmp /*rscratch*/); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
movl(edx, -1022);
subl(edx, eax);
movdl(xmm5, edx);
@ -268,7 +268,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movl(ecx, eax);
sarl(eax, 1);
pinsrw(xmm3, eax, 3);
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
movdqu(xmm6, ExternalAddress(ebias), tmp /*rscratch*/); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
psllq(xmm3, 4);
psubd(xmm2, xmm3);
mulsd(xmm0, xmm2);
@ -337,7 +337,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movl(eax, Address(rsp, 12));
cmpl(eax, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
movsd(xmm0, ExternalAddress(XMAX), tmp /*rscratch*/); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
bind(L_2TAG_PACKET_7_0_2);
@ -345,7 +345,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
movsd(xmm0, ExternalAddress(XMIN), tmp /*rscratch*/); // 0x00000000UL, 0x00100000UL
mulsd(xmm0, xmm0);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
@ -359,11 +359,11 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movl(eax, Address(rsp, 12));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
movsd(xmm0, ExternalAddress(INF), tmp /*rscratch*/); // 0x00000000UL, 0x7ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
movsd(xmm0, ExternalAddress(ZERO), tmp /*rscratch*/); // 0x00000000UL, 0x00000000UL
jmp(B1_5);
bind(L_2TAG_PACKET_11_0_2);
@ -377,7 +377,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
movsd(Address(rsp, 8), xmm0);
addsd(xmm0, ExternalAddress(ONE)); // 0x00000000UL, 0x3ff00000UL
addsd(xmm0, ExternalAddress(ONE), tmp /*rscratch*/); // 0x00000000UL, 0x3ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_6_0_2);

View File

@ -234,15 +234,15 @@ void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
movq(xmm6, ExternalAddress(log2), tmp1 /*rscratch*/); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, ExternalAddress(coeff), tmp1 /*rscratch*/); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp2, edx));
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
movdqu(xmm4, ExternalAddress(coeff + 16), tmp1 /*rscratch*/); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
movdqu(xmm2, ExternalAddress(coeff + 32), tmp1 /*rscratch*/); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
if (VM_Version::supports_sse3()) {
movddup(xmm5, xmm1);
@ -251,7 +251,7 @@ void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm5, xmm1);
movlhps(xmm5, xmm5);
}
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL
mulsd(xmm7, ExternalAddress(log2 + 8), tmp1 /*rscratch*/); // 0x93c76730UL, 0x3ceef357UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);

View File

@ -191,13 +191,13 @@ ATTRIBUTE_ALIGNED(16) juint _coeff_log10[] =
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// rax, rdx, rcx, r11, tmp
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register r11) {
Register eax, Register ecx, Register edx, Register r11, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_5;
@ -225,12 +225,12 @@ void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
movdqu(xmm5, ExternalAddress(HIGHSIGMASK), tmp /*rscratch*/); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
movdqu(xmm2, ExternalAddress(LOG10_E), tmp /*rscratch*/); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
@ -255,22 +255,22 @@ void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
movq(xmm6, ExternalAddress(log2), tmp /*rscratch*/); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
movdqu(xmm3, ExternalAddress(coeff), tmp /*rscratch*/); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
movdqu(xmm4, ExternalAddress(coeff + 16), tmp /*rscratch*/); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
movdqu(xmm2, ExternalAddress(coeff + 32), tmp /*rscratch*/); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL
mulsd(xmm7, ExternalAddress(log2 + 8), tmp /*rscratch*/); //0x1f12b358UL, 0x3cdfef31UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL
movq(xmm6, ExternalAddress(LOG10_E + 8), tmp /*rscratch*/); //0xbf2e4108UL, 0x3f5a7a6cUL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
@ -333,7 +333,7 @@ void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister
por(xmm0, xmm2);
movl(ecx, 18416);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
movdqu(xmm2, ExternalAddress(LOG10_E), tmp /*rscratch*/); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);

View File

@ -827,17 +827,17 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
// Special case: pow(x, 2.0) => x * x
movdq(tmp1, xmm1);
cmp64(tmp1, ExternalAddress(DOUBLE2));
cmp64(tmp1, ExternalAddress(DOUBLE2), tmp2 /*rscratch*/);
jccb(Assembler::notEqual, B1_2);
mulsd(xmm0, xmm0);
jmp(B1_5);
// Special case: pow(x, 0.5) => sqrt(x)
bind(B1_2);
cmp64(tmp1, ExternalAddress(DOUBLE0DOT5));
cmp64(tmp1, ExternalAddress(DOUBLE0DOT5), tmp2 /*rscratch*/);
jccb(Assembler::notEqual, L_POW); // For pow(x, y), check whether y == 0.5
movdq(tmp2, xmm0);
cmp64(tmp2, ExternalAddress(DOUBLE0));
cmp64(tmp2, ExternalAddress(DOUBLE0), tmp3 /*rscratch*/);
jccb(Assembler::less, L_POW); // pow(x, 0.5) => sqrt(x) only for x >= 0.0 or x is +inf/NaN
sqrtsd(xmm0, xmm0);
jmp(B1_5);
@ -861,9 +861,9 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addl(ecx, edx);
xorl(ecx, edx);
por(xmm0, xmm2);
movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
movdqu(xmm6, ExternalAddress(HIGHSIGMASK), tmp2 /*rscratch*/); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
psrlq(xmm0, 27);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
psrld(xmm0, 2);
addl(ecx, 16);
bsrl(ecx, ecx);
@ -903,10 +903,10 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_4_0_2);
mulsd(xmm3, xmm0);
movdqu(xmm1, ExternalAddress(coeff)); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
movdqu(xmm1, ExternalAddress(coeff), tmp2 /*rscratch*/); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
lea(tmp4, ExternalAddress(L_tbl));
subsd(xmm5, xmm2);
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL
movdqu(xmm4, ExternalAddress(coeff + 16), tmp2 /*rscratch*/); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL
movl(ecx, eax);
sarl(eax, 31);
addl(ecx, eax);
@ -914,12 +914,12 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addl(eax, 1);
bsrl(eax, eax);
unpcklpd(xmm5, xmm3);
movdqu(xmm6, ExternalAddress(32 + coeff)); //0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL
movdqu(xmm6, ExternalAddress(coeff + 32), tmp2 /*rscratch*/); //0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL
addsd(xmm3, xmm5);
andl(edx, 16760832);
shrl(edx, 10);
addpd(xmm5, Address(tmp4, edx, Address::times_1, -3648));
movdqu(xmm0, ExternalAddress(48 + coeff)); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
movdqu(xmm0, ExternalAddress(coeff + 48), tmp2 /*rscratch*/); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
pshufd(xmm2, xmm3, 68);
mulsd(xmm3, xmm3);
mulpd(xmm1, xmm2);
@ -932,7 +932,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movq(xmm1, Address(rsp, 16));
movw(ecx, Address(rsp, 22));
pshufd(xmm7, xmm5, 238);
movq(xmm4, ExternalAddress(HIGHMASK_Y)); //0x00000000UL, 0xfffffff8UL, 0x00000000UL, 0xffffffffUL
movq(xmm4, ExternalAddress(HIGHMASK_Y), tmp2 /*rscratch*/); //0x00000000UL, 0xfffffff8UL, 0x00000000UL, 0xffffffffUL
mulpd(xmm6, xmm2);
pshufd(xmm3, xmm3, 68);
mulpd(xmm0, xmm2);
@ -967,8 +967,8 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm5, Address(tmp4, edx, Address::times_8, 0));
addsd(xmm4, xmm1);
mulsd(xmm2, xmm0);
movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
movdqu(xmm7, ExternalAddress(e_coeff), tmp2 /*rscratch*/); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdqu(xmm3, ExternalAddress(e_coeff + 16), tmp2 /*rscratch*/); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
shll(ecx, 12);
xorl(ecx, tmp1);
andl(rcx, -1048576);
@ -1027,11 +1027,11 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
por(xmm0, xmm2);
movl(ecx, 18416);
psrlq(xmm0, 27);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm3, 12);
movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
movdqu(xmm6, ExternalAddress(HIGHSIGMASK), tmp2 /*rscratch*/); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
psrlq(xmm3, 12);
mulss(xmm0, xmm7);
movl(edx, -1024);
@ -1073,11 +1073,11 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
por(xmm0, xmm2);
movl(ecx, 18416);
psrlq(xmm0, 27);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm3, 12);
movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
movdqu(xmm6, ExternalAddress(HIGHSIGMASK), tmp2 /*rscratch*/); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
psrlq(xmm3, 12);
mulss(xmm0, xmm7);
movl(edx, -1024);
@ -1104,7 +1104,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cmpl(eax, 752);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_12_0_2);
addsd(xmm0, xmm7);
movq(xmm2, ExternalAddress(HALFMASK)); //0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL
movq(xmm2, ExternalAddress(HALFMASK), tmp2 /*rscratch*/); //0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL
addpd(xmm3, xmm0);
xorpd(xmm6, xmm6);
movl(eax, 17080);
@ -1132,8 +1132,8 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subsd(xmm6, xmm7);
lea(tmp4, ExternalAddress(T_exp));
addsd(xmm2, xmm1);
movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
movdqu(xmm7, ExternalAddress(e_coeff + 0), tmp2 /*rscratch*/); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdqu(xmm3, ExternalAddress(e_coeff + 16), tmp2 /*rscratch*/); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
subsd(xmm4, xmm6);
pextrw(edx, xmm6, 3);
movl(ecx, eax);
@ -1148,7 +1148,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
shll(ecx, 20);
xorl(ecx, tmp1);
movdl(xmm6, ecx);
movq(xmm1, ExternalAddress(32 + e_coeff)); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL
movq(xmm1, ExternalAddress(e_coeff + 32), tmp2 /*rscratch*/); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL
andl(edx, 32767);
cmpl(edx, 16529);
jcc(Assembler::above, L_2TAG_PACKET_12_0_2);
@ -1405,7 +1405,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
xorpd(xmm1, xmm1);
movl(edx, 30704);
pinsrw(xmm1, edx, 3);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm4, Address(rsp, 8));
pextrw(eax, xmm4, 3);
movl(edx, 8192);
@ -1438,7 +1438,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
xorpd(xmm1, xmm1);
movl(edx, 30704);
pinsrw(xmm1, edx, 3);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm4, Address(rsp, 8));
pextrw(eax, xmm4, 3);
movl(edx, 8192);
@ -1665,15 +1665,15 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
lea(r11, ExternalAddress(L_tbl));
movq(xmm4, ExternalAddress(coeff_h)); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
movq(xmm4, ExternalAddress(coeff_h), tmp2 /*rscratch*/); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
mulsd(xmm3, xmm0);
movq(xmm6, ExternalAddress(coeff_h)); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
movq(xmm6, ExternalAddress(coeff_h), tmp2 /*rscratch*/); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
subsd(xmm5, xmm2);
movq(xmm1, ExternalAddress(8 + coeff_h)); //0x00000000UL, 0xbf5dabe1UL
movq(xmm1, ExternalAddress(coeff_h + 8), tmp2 /*rscratch*/); //0x00000000UL, 0xbf5dabe1UL
pshufd(xmm2, xmm3, 68);
unpcklpd(xmm5, xmm3);
addsd(xmm3, xmm5);
movq(xmm0, ExternalAddress(8 + coeff_h)); //0x00000000UL, 0xbf5dabe1UL
movq(xmm0, ExternalAddress(coeff_h + 8), tmp2 /*rscratch*/); //0x00000000UL, 0xbf5dabe1UL
andl(edx, 16760832);
shrl(edx, 10);
addpd(xmm7, Address(tmp4, edx, Address::times_1, -3648));
@ -1698,13 +1698,13 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm5, xmm7);
addsd(xmm7, xmm2);
addsd(xmm4, xmm0);
movdqu(xmm0, ExternalAddress(coeff)); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
movdqu(xmm0, ExternalAddress(coeff), tmp2 /*rscratch*/); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
subsd(xmm5, xmm7);
addsd(xmm6, xmm4);
movdqu(xmm4, xmm7);
addsd(xmm5, xmm2);
addsd(xmm7, xmm1);
movdqu(xmm2, ExternalAddress(64 + coeff)); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
movdqu(xmm2, ExternalAddress(coeff + 64), tmp2 /*rscratch*/); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
subsd(xmm4, xmm7);
addsd(xmm6, xmm5);
addsd(xmm4, xmm1);
@ -1713,11 +1713,11 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm7, xmm5);
subsd(xmm1, xmm7);
addsd(xmm1, xmm5);
movdqu(xmm5, ExternalAddress(80 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL
movdqu(xmm5, ExternalAddress(coeff + 80), tmp2 /*rscratch*/); //0x9f95985aUL, 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL
pshufd(xmm3, xmm3, 68);
addsd(xmm6, xmm4);
addsd(xmm6, xmm1);
movdqu(xmm1, ExternalAddress(32 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL
movdqu(xmm1, ExternalAddress(coeff + 32), tmp2 /*rscratch*/); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL
mulpd(xmm0, xmm3);
mulpd(xmm2, xmm3);
pshufd(xmm4, xmm3, 68);
@ -1725,7 +1725,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addpd(xmm0, xmm1);
addpd(xmm5, xmm2);
mulsd(xmm4, xmm3);
movq(xmm2, ExternalAddress(HIGHMASK_LOG_X)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xfffff800UL
movq(xmm2, ExternalAddress(HIGHMASK_LOG_X), tmp2 /*rscratch*/); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xfffff800UL
mulpd(xmm3, xmm3);
movq(xmm1, Address(rsp, 16));
movw(ecx, Address(rsp, 22));
@ -1733,7 +1733,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
pextrw(eax, xmm7, 3);
mulpd(xmm5, xmm4);
mulpd(xmm0, xmm3);
movq(xmm4, ExternalAddress(8 + HIGHMASK_Y)); //0x00000000UL, 0xffffffffUL
movq(xmm4, ExternalAddress(HIGHMASK_Y + 8), tmp2 /*rscratch*/); //0x00000000UL, 0xffffffffUL
pand(xmm2, xmm7);
addsd(xmm5, xmm6);
subsd(xmm7, xmm2);
@ -1761,12 +1761,12 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulsd(xmm3, xmm7);
addsd(xmm6, xmm4);
addsd(xmm1, xmm3);
movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdqu(xmm7, ExternalAddress(e_coeff), tmp2 /*rscratch*/); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdl(edx, xmm6);
subsd(xmm6, xmm5);
lea(tmp4, ExternalAddress(T_exp));
movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
movq(xmm2, ExternalAddress(32 + e_coeff)); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, ExternalAddress(e_coeff + 16), tmp2 /*rscratch*/); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
movq(xmm2, ExternalAddress(e_coeff + 32), tmp2 /*rscratch*/); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL
subsd(xmm4, xmm6);
movl(ecx, edx);
andl(edx, 255);

View File

@ -819,9 +819,9 @@ enum {
movl(h, Address(CTX, 4*7));
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr +0)); //[PSHUFFLE_BYTE_FLIP_MASK wrt rip]
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip]
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip]
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); // [_SHUF_00BA wrt rip]
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); // [_SHUF_DC00 wrt rip]
movl(g, Address(CTX, 4*6));
@ -982,9 +982,9 @@ bind(only_one_block);
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //[PSHUFFLE_BYTE_FLIP_MASK wrt rip]
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip]
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip]
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); // [_SHUF_00BA wrt rip]
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); // [_SHUF_DC00 wrt rip]
movl(g, Address(CTX, 4*6)); // 0x1f83d9ab
@ -1374,7 +1374,7 @@ void MacroAssembler::sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegiste
movq(h, Address(CTX, 8 * 7));
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512;
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //PSHUFFLE_BYTE_FLIP_MASK wrt rip
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // PSHUFFLE_BYTE_FLIP_MASK wrt rip
vmovdqu(YMM_MASK_LO, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));
movq(g, Address(CTX, 8 * 6));

View File

@ -185,14 +185,14 @@ ATTRIBUTE_ALIGNED(8) juint _ALL_ONES[] =
void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ebx, Register ecx, Register edx, Register tmp1) {
Register eax, Register ebx, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1;
Label L_2TAG_PACKET_12_0_1, B1_4;
assert_different_registers(tmp1, eax, ebx, ecx, edx);
assert_different_registers(tmp, eax, ebx, ecx, edx);
address ALL_ONES = (address)_ALL_ONES;
@ -200,30 +200,29 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subq(rsp, 16);
movsd(Address(rsp, 8), xmm0);
movl(eax, Address(rsp, 12));
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
movq(xmm2, ExternalAddress(SHIFTER)); //0x00000000UL, 0x43380000UL
movq(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x40245f30UL
movq(xmm2, ExternalAddress(SHIFTER), tmp /*rscratch*/); //0x00000000UL, 0x43380000UL
andl(eax, 2147418112);
subl(eax, 808452096);
cmpl(eax, 281346048);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
mulsd(xmm1, xmm0);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
movdqu(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
movdqu(xmm6, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movdqu(xmm6, ExternalAddress(P_2), tmp /*rscratch*/); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mov64(r8, 0x3fb921fb54400000);
movdq(xmm3, r8);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
movdqu(xmm5, ExternalAddress(SC_4), tmp /*rscratch*/); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
pshufd(xmm4, xmm0, 68);
mulsd(xmm3, xmm1);
if (VM_Version::supports_sse3()) {
movddup(xmm1, xmm1);
}
else {
} else {
movlhps(xmm1, xmm1);
}
andl(edx, 63);
@ -231,14 +230,13 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
lea(rax, ExternalAddress(Ctable));
addq(rax, rdx);
mulpd(xmm6, xmm1);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
mulsd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
subsd(xmm0, xmm3);
if (VM_Version::supports_sse3()) {
movddup(xmm3, xmm4);
}
else {
} else {
movdqu(xmm3, xmm4);
movlhps(xmm3, xmm3);
}
@ -252,7 +250,7 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
movdqu(xmm6, ExternalAddress(SC_2), tmp /*rscratch*/); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
@ -262,9 +260,9 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
addpd(xmm5, ExternalAddress(SC_3), tmp /*rscratch*/); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
addpd(xmm6, ExternalAddress(SC_1), tmp /*rscratch*/); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
@ -293,14 +291,14 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
shrl(eax, 20);
cmpl(eax, 3325);
jcc(Assembler::notEqual, L_2TAG_PACKET_2_0_1);
mulsd(xmm0, ExternalAddress(ALL_ONES)); //0xffffffffUL, 0x3fefffffUL
mulsd(xmm0, ExternalAddress(ALL_ONES), tmp /*rscratch*/); //0xffffffffUL, 0x3fefffffUL
jmp(B1_4);
bind(L_2TAG_PACKET_2_0_1);
movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL
movq(xmm3, ExternalAddress(TWO_POW_55), tmp /*rscratch*/); //0x00000000UL, 0x43600000UL
mulsd(xmm3, xmm0);
subsd(xmm3, xmm0);
mulsd(xmm3, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL
mulsd(xmm3, ExternalAddress(TWO_POW_M55), tmp /*rscratch*/); //0x00000000UL, 0x3c800000UL
jmp(B1_4);
bind(L_2TAG_PACKET_1_0_1);
@ -447,8 +445,8 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
orl(edx, rsi);
xorl(edx, ebx);
pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm2, ExternalAddress(PI_4), tmp /*rscratch*/); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(PI_4 + 8), tmp /*rscratch*/); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
xorpd(xmm5, xmm5);
subl(edx, 1008);
pinsrw(xmm5, edx, 3);
@ -472,17 +470,17 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm6, xmm2);
bind(L_2TAG_PACKET_12_0_1);
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
movq(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x40245f30UL
mulsd(xmm1, xmm0);
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
movq(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2), tmp /*rscratch*/); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
shll(eax, 3);
@ -490,13 +488,13 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm4, xmm0);
addl(edx, eax);
andl(edx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm5, ExternalAddress(SC_4), tmp /*rscratch*/); //0x54400000UL, 0x3fb921fbUL
lea(rax, ExternalAddress(Ctable));
shll(edx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
mulsd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
@ -515,15 +513,15 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
subsd(xmm1, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
movdqu(xmm6, ExternalAddress(SC_2), tmp /*rscratch*/); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
addpd(xmm5, ExternalAddress(SC_3), tmp /*rscratch*/); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
addpd(xmm6, ExternalAddress(SC_1), tmp /*rscratch*/); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
@ -627,7 +625,7 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_3_0_1);
movq(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
mulsd(xmm0, ExternalAddress(NEG_ZERO), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_14_0_1);

View File

@ -462,14 +462,14 @@ ATTRIBUTE_ALIGNED(8) juint _QQ_2_tan[] =
void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register r8, Register r9,
Register r10, Register r11) {
Register r10, Register r11, Register tmp) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1, B1_2, B1_4;
assert_different_registers(eax, ecx, edx, r8, r9, r10, r11);
assert_different_registers(eax, ecx, edx, r8, r9, r10, r11, tmp);
address MUL16 = (address)_MUL16;
address sign_mask = (address)_sign_mask_tan;
@ -497,12 +497,12 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subl(eax, 16314);
cmpl(eax, 270);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL
movdqu(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movdqu(xmm6, ExternalAddress(MUL16), tmp /*rscratch*/); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL
unpcklpd(xmm0, xmm0);
movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL
movdqu(xmm4, ExternalAddress(sign_mask), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL
andpd(xmm4, xmm0);
movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL
movdqu(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL
mulpd(xmm1, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
@ -512,10 +512,10 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cvttpd2dq(xmm1, xmm1);
cvtdq2pd(xmm1, xmm1);
mulpd(xmm1, xmm6);
movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL
movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL
movdqu(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL
movq(xmm5, ExternalAddress(QQ_2), tmp /*rscratch*/); //0x676733afUL, 0x3d32e7b9UL
addq(rdx, 469248);
movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL
movdqu(xmm4, ExternalAddress(P_2), tmp /*rscratch*/); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL
mulpd(xmm3, xmm1);
andq(rdx, 31);
mulsd(xmm5, xmm1);
@ -523,17 +523,17 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulpd(xmm4, xmm1);
shlq(rcx, 1);
subpd(xmm0, xmm3);
mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL
mulpd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL
addq(rdx, rcx);
shlq(rcx, 2);
addq(rdx, rcx);
addsd(xmm5, xmm0);
movdqu(xmm2, xmm0);
subpd(xmm0, xmm4);
movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
movq(xmm6, ExternalAddress(ONE), tmp /*rscratch*/); //0x00000000UL, 0x3ff00000UL
shlq(rdx, 4);
lea(rax, ExternalAddress(Ctable));
andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL
andpd(xmm5, ExternalAddress(MASK_35), tmp /*rscratch*/); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, xmm0);
addq(rax, rdx);
subpd(xmm2, xmm0);
@ -586,7 +586,7 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm7, Address(rax, 136));
addsd(xmm7, xmm1);
addsd(xmm0, xmm7);
movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
movq(xmm7, ExternalAddress(ONE), tmp /*rscratch*/); //0x00000000UL, 0x3ff00000UL
mulsd(xmm4, xmm6);
movq(xmm2, Address(rax, 168));
andpd(xmm2, xmm6);
@ -616,26 +616,26 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
jcc(Assembler::below, L_2TAG_PACKET_3_0_1);
movdqu(xmm2, xmm0);
movdqu(xmm3, xmm0);
movq(xmm1, ExternalAddress(Q_11)); //0xb8fe4d77UL, 0x3f82609aUL
movq(xmm1, ExternalAddress(Q_11), tmp /*rscratch*/); //0xb8fe4d77UL, 0x3f82609aUL
mulsd(xmm2, xmm0);
mulsd(xmm3, xmm2);
mulsd(xmm1, xmm2);
addsd(xmm1, ExternalAddress(Q_9)); //0xbf847a43UL, 0x3f9664a0UL
addsd(xmm1, ExternalAddress(Q_9), tmp /*rscratch*/); //0xbf847a43UL, 0x3f9664a0UL
mulsd(xmm1, xmm2);
addsd(xmm1, ExternalAddress(Q_7)); //0x52c4c8abUL, 0x3faba1baUL
addsd(xmm1, ExternalAddress(Q_7), tmp /*rscratch*/); //0x52c4c8abUL, 0x3faba1baUL
mulsd(xmm1, xmm2);
addsd(xmm1, ExternalAddress(Q_5)); //0x11092746UL, 0x3fc11111UL
addsd(xmm1, ExternalAddress(Q_5), tmp /*rscratch*/); //0x11092746UL, 0x3fc11111UL
mulsd(xmm1, xmm2);
addsd(xmm1, ExternalAddress(Q_3)); //0x55555612UL, 0x3fd55555UL
addsd(xmm1, ExternalAddress(Q_3), tmp /*rscratch*/); //0x55555612UL, 0x3fd55555UL
mulsd(xmm1, xmm3);
addsd(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_3_0_1);
movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL
movq(xmm3, ExternalAddress(TWO_POW_55), tmp /*rscratch*/); //0x00000000UL, 0x43600000UL
mulsd(xmm3, xmm0);
addsd(xmm0, xmm3);
mulsd(xmm0, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL
mulsd(xmm0, ExternalAddress(TWO_POW_M55), tmp /*rscratch*/); //0x00000000UL, 0x3c800000UL
jmp(B1_4);
bind(L_2TAG_PACKET_2_0_1);
@ -786,8 +786,8 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
orl(edx, rsi);
xorl(edx, rbx);
pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL
movq(xmm7, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL
movq(xmm2, ExternalAddress(PI_4), tmp /*rscratch*/); //0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL
movq(xmm7, ExternalAddress(PI_4 + 8), tmp /*rscratch*/); //0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL
xorpd(xmm5, xmm5);
subl(edx, 1008);
pinsrw(xmm5, edx, 3);
@ -809,14 +809,14 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm0, xmm7);
subsd(xmm2, xmm0);
addsd(xmm7, xmm2);
movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL
movdqu(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL
if (VM_Version::supports_sse3()) {
movddup(xmm0, xmm0);
}
else {
movlhps(xmm0, xmm0);
}
movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL
movdqu(xmm4, ExternalAddress(sign_mask), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL
andpd(xmm4, xmm0);
mulpd(xmm1, xmm0);
if (VM_Version::supports_sse3()) {
@ -825,8 +825,8 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
else {
movlhps(xmm7, xmm7);
}
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL
movdqu(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movdqu(xmm6, ExternalAddress(MUL16), tmp /*rscratch*/); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL
por(xmm5, xmm4);
addpd(xmm1, xmm5);
movdqu(xmm5, xmm1);
@ -835,11 +835,11 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cvttpd2dq(xmm1, xmm1);
cvtdq2pd(xmm1, xmm1);
mulpd(xmm1, xmm6);
movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL
movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL
movdqu(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL
movq(xmm5, ExternalAddress(QQ_2), tmp /*rscratch*/); //0x676733afUL, 0x3d32e7b9UL
shll(eax, 4);
addl(edx, 469248);
movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL
movdqu(xmm4, ExternalAddress(P_2), tmp /*rscratch*/); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL
mulpd(xmm3, xmm1);
addl(edx, eax);
andl(edx, 31);
@ -848,17 +848,17 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulpd(xmm4, xmm1);
shll(ecx, 1);
subpd(xmm0, xmm3);
mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL
mulpd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL
addl(edx, ecx);
shll(ecx, 2);
addl(edx, ecx);
addsd(xmm5, xmm0);
movdqu(xmm2, xmm0);
subpd(xmm0, xmm4);
movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
movq(xmm6, ExternalAddress(ONE), tmp /*rscratch*/); //0x00000000UL, 0x3ff00000UL
shll(edx, 4);
lea(rax, ExternalAddress(Ctable));
andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL
andpd(xmm5, ExternalAddress(MASK_35), tmp /*rscratch*/); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, xmm0);
addq(rax, rdx);
subpd(xmm2, xmm0);
@ -912,7 +912,7 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm7, Address(rax, 136));
addsd(xmm7, xmm1);
addsd(xmm0, xmm7);
movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
movq(xmm7, ExternalAddress(ONE), tmp /*rscratch*/); //0x00000000UL, 0x3ff00000UL
mulsd(xmm4, xmm6);
movq(xmm2, Address(rax, 168));
andpd(xmm2, xmm6);
@ -1009,7 +1009,7 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_4_0_1);
movq(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
mulsd(xmm0, ExternalAddress(NEG_ZERO), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_14_0_1);

View File

@ -3545,21 +3545,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ fast_exp(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -3572,21 +3561,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ fast_log(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -3599,21 +3577,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ fast_log10(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -3626,21 +3593,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ fast_pow(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -3665,11 +3621,8 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
BLOCK_COMMENT("Entry:");
__ libm_sincos_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
__ libm_sincos_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
return start;
@ -3680,19 +3633,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rdx);
__ fast_sin(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rbx, rdx);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -3705,21 +3649,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ fast_cos(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -3732,11 +3665,8 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
BLOCK_COMMENT("Entry:");
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
__ libm_tancot_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
return start;
@ -3747,21 +3677,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ fast_tan(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);

View File

@ -7263,22 +7263,11 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ fast_exp(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r11);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -7292,23 +7281,11 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r11;
const Register tmp2 = r8;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
__ fast_log(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r11, r8);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -7322,22 +7299,11 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ fast_log10(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r11, r8);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -7351,25 +7317,11 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
__ fast_pow(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r8, r9, r10, r11);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -7383,18 +7335,6 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
@ -7402,8 +7342,8 @@ address generate_avx_ghash_processBlocks() {
__ push(rsi);
__ push(rdi);
#endif
__ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1);
__ fast_sin(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rbx, rcx, rdx, r8);
#ifdef _WIN64
__ pop(rdi);
__ pop(rsi);
@ -7421,21 +7361,6 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
@ -7443,7 +7368,8 @@ address generate_avx_ghash_processBlocks() {
__ push(rsi);
__ push(rdi);
#endif
__ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
__ fast_cos(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r8, r9, r10, r11, rbx);
#ifdef _WIN64
__ pop(rdi);
@ -7462,21 +7388,6 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
@ -7484,7 +7395,8 @@ address generate_avx_ghash_processBlocks() {
__ push(rsi);
__ push(rdi);
#endif
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
__ fast_tan(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r8, r9, r10, r11, rbx);
#ifdef _WIN64
__ pop(rdi);