8292638: x86: Improve scratch register handling in VM stubs

Co-authored-by: Aleksey Shipilev <shade@openjdk.org>
Reviewed-by: kvn, shade
This commit is contained in:
Vladimir Ivanov 2022-08-23 19:32:51 +00:00
parent d24b7b7026
commit f3be6731d3
14 changed files with 371 additions and 507 deletions

View File

@ -12165,81 +12165,93 @@ void Assembler::set_byte_if_not_zero(Register dst) {
#else // LP64 #else // LP64
// 64bit only pieces of the assembler
void Assembler::set_byte_if_not_zero(Register dst) { void Assembler::set_byte_if_not_zero(Register dst) {
int enc = prefix_and_encode(dst->encoding(), true); int enc = prefix_and_encode(dst->encoding(), true);
emit_int24(0x0F, (unsigned char)0x95, (0xC0 | enc)); emit_int24(0x0F, (unsigned char)0x95, (0xC0 | enc));
} }
// 64bit only pieces of the assembler
// This should only be used by 64bit instructions that can use rip-relative // This should only be used by 64bit instructions that can use rip-relative
// it cannot be used by instructions that want an immediate value. // it cannot be used by instructions that want an immediate value.
bool Assembler::reachable(AddressLiteral adr) { // Determine whether an address is always reachable in rip-relative addressing mode
int64_t disp; // when accessed from the code cache.
relocInfo::relocType relocType = adr.reloc(); static bool is_always_reachable(address target, relocInfo::relocType reloc_type) {
switch (reloc_type) {
// None will force a 64bit literal to the code stream. Likely a placeholder // This should be rip-relative and easily reachable.
// for something that will be patched later and we need to certain it will case relocInfo::internal_word_type: {
// always be reachable. return true;
if (relocType == relocInfo::none) { }
return false; // This should be rip-relative within the code cache and easily
}
if (relocType == relocInfo::internal_word_type) {
// This should be rip relative and easily reachable.
return true;
}
if (relocType == relocInfo::virtual_call_type ||
relocType == relocInfo::opt_virtual_call_type ||
relocType == relocInfo::static_call_type ||
relocType == relocInfo::static_stub_type ) {
// This should be rip relative within the code cache and easily
// reachable until we get huge code caches. (At which point // reachable until we get huge code caches. (At which point
// ic code is going to have issues). // IC code is going to have issues).
return true; case relocInfo::virtual_call_type:
} case relocInfo::opt_virtual_call_type:
if (relocType != relocInfo::external_word_type && case relocInfo::static_call_type:
relocType != relocInfo::poll_return_type && // these are really external_word but need special case relocInfo::static_stub_type: {
relocType != relocInfo::poll_type && // relocs to identify them return true;
relocType != relocInfo::runtime_call_type ) { }
return false; case relocInfo::runtime_call_type:
} case relocInfo::external_word_type:
case relocInfo::poll_return_type: // these are really external_word but need special
// Stress the correction code case relocInfo::poll_type: { // relocs to identify them
if (ForceUnreachable) { return CodeCache::contains(target);
// Must be runtimecall reloc, see if it is in the codecache }
// Flipping stuff in the codecache to be unreachable causes issues default: {
// with things like inline caches where the additional instructions
// are not handled.
if (CodeCache::find_blob(adr._target) == NULL) {
return false; return false;
} }
} }
// For external_word_type/runtime_call_type if it is reachable from where we }
// are now (possibly a temp buffer) and where we might end up
// anywhere in the codeCache then we are always reachable.
// This would have to change if we ever save/restore shared code
// to be more pessimistic.
disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
if (!is_simm32(disp)) return false;
disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
if (!is_simm32(disp)) return false;
disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int)); // Determine whether an address is reachable in rip-relative addressing mode from the code cache.
static bool is_reachable(address target, relocInfo::relocType reloc_type) {
// Because rip relative is a disp + address_of_next_instruction and we if (is_always_reachable(target, reloc_type)) {
// don't know the value of address_of_next_instruction we apply a fudge factor return true;
// to make sure we will be ok no matter the size of the instruction we get placed into.
// We don't have to fudge the checks above here because they are already worst case.
// 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
// + 4 because better safe than sorry.
const int fudge = 12 + 4;
if (disp < 0) {
disp -= fudge;
} else {
disp += fudge;
} }
return is_simm32(disp); switch (reloc_type) {
// None will force a 64bit literal to the code stream. Likely a placeholder
// for something that will be patched later and we need to certain it will
// always be reachable.
case relocInfo::none: {
return false;
}
case relocInfo::runtime_call_type:
case relocInfo::external_word_type:
case relocInfo::poll_return_type: // these are really external_word but need special
case relocInfo::poll_type: { // relocs to identify them
assert(!CodeCache::contains(target), "always reachable");
if (ForceUnreachable) {
return false; // stress the correction code
}
// For external_word_type/runtime_call_type if it is reachable from where we
// are now (possibly a temp buffer) and where we might end up
// anywhere in the code cache then we are always reachable.
// This would have to change if we ever save/restore shared code to be more pessimistic.
// Code buffer has to be allocated in the code cache, so check against
// code cache boundaries cover that case.
//
// In rip-relative addressing mode, an effective address is formed by adding displacement
// to the 64-bit RIP of the next instruction which is not known yet. Considering target address
// is guaranteed to be outside of the code cache, checking against code cache boundaries is enough
// to account for that.
return Assembler::is_simm32(target - CodeCache::low_bound()) &&
Assembler::is_simm32(target - CodeCache::high_bound());
}
default: {
return false;
}
}
}
bool Assembler::reachable(AddressLiteral adr) {
assert(CodeCache::contains(pc()), "required");
return is_reachable(adr.target(), adr.reloc());
}
bool Assembler::always_reachable(AddressLiteral adr) {
assert(CodeCache::contains(pc()), "required");
return is_always_reachable(adr.target(), adr.reloc());
} }
void Assembler::emit_data64(jlong data, void Assembler::emit_data64(jlong data,

View File

@ -802,16 +802,18 @@ private:
void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32); void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
protected: protected:
#ifdef ASSERT #ifdef ASSERT
void check_relocation(RelocationHolder const& rspec, int format); void check_relocation(RelocationHolder const& rspec, int format);
#endif #endif
void emit_data(jint data, relocInfo::relocType rtype, int format); void emit_data(jint data, relocInfo::relocType rtype, int format);
void emit_data(jint data, RelocationHolder const& rspec, int format); void emit_data(jint data, RelocationHolder const& rspec, int format);
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
bool reachable(AddressLiteral adr) NOT_LP64({ return true;}); bool always_reachable(AddressLiteral adr) NOT_LP64( { return true; } );
bool reachable(AddressLiteral adr) NOT_LP64( { return true; } );
// These are all easily abused and hence protected // These are all easily abused and hence protected

View File

@ -520,14 +520,15 @@ void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
} }
void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { void MacroAssembler::cmp64(Register src1, AddressLiteral src2, Register rscratch) {
assert(!src2.is_lval(), "should use cmpptr"); assert(!src2.is_lval(), "should use cmpptr");
assert(rscratch != noreg || always_reachable(src2), "missing");
if (reachable(src2)) { if (reachable(src2)) {
cmpq(src1, as_Address(src2)); cmpq(src1, as_Address(src2));
} else { } else {
lea(rscratch1, src2); lea(rscratch, src2);
Assembler::cmpq(src1, Address(rscratch1, 0)); Assembler::cmpq(src1, Address(rscratch, 0));
} }
} }
@ -1122,30 +1123,36 @@ void MacroAssembler::addptr(Address dst, Register src) {
LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
} }
void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
Assembler::addsd(dst, as_Address(src)); Assembler::addsd(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch, src);
Assembler::addsd(dst, Address(rscratch1, 0)); Assembler::addsd(dst, Address(rscratch, 0));
} }
} }
void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { void MacroAssembler::addss(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
addss(dst, as_Address(src)); addss(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch, src);
addss(dst, Address(rscratch1, 0)); addss(dst, Address(rscratch, 0));
} }
} }
void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
Assembler::addpd(dst, as_Address(src)); Assembler::addpd(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch, src);
Assembler::addpd(dst, Address(rscratch1, 0)); Assembler::addpd(dst, Address(rscratch, 0));
} }
} }
@ -2124,12 +2131,13 @@ void MacroAssembler::empty_FPU_stack() {
} }
#endif // !LP64 #endif // !LP64
void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
Assembler::mulpd(dst, as_Address(src)); Assembler::mulpd(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch, src);
Assembler::mulpd(dst, Address(rscratch1, 0)); Assembler::mulpd(dst, Address(rscratch, 0));
} }
} }
@ -2469,21 +2477,23 @@ void MacroAssembler::movbyte(ArrayAddress dst, int src) {
movb(as_Address(dst), src); movb(as_Address(dst), src);
} }
void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
movdl(dst, as_Address(src)); movdl(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch, src);
movdl(dst, Address(rscratch1, 0)); movdl(dst, Address(rscratch, 0));
} }
} }
void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { void MacroAssembler::movq(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
movq(dst, as_Address(src)); movq(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch, src);
movq(dst, Address(rscratch1, 0)); movq(dst, Address(rscratch, 0));
} }
} }
@ -2683,16 +2693,20 @@ void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral s
} }
void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) { int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len); Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
} else { } else {
lea(scratch_reg, src); lea(rscratch, src);
Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len); Assembler::evmovdquq(dst, mask, Address(rscratch, 0), merge, vector_len);
} }
} }
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
Assembler::evmovdquq(dst, as_Address(src), vector_len); Assembler::evmovdquq(dst, as_Address(src), vector_len);
} else { } else {
@ -2710,12 +2724,14 @@ void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
} }
} }
void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
Assembler::movsd(dst, as_Address(src)); Assembler::movsd(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch, src);
Assembler::movsd(dst, Address(rscratch1, 0)); Assembler::movsd(dst, Address(rscratch, 0));
} }
} }
@ -2746,12 +2762,14 @@ void MacroAssembler::vmovddup(XMMRegister dst, AddressLiteral src, int vector_le
} }
} }
void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
Assembler::mulsd(dst, as_Address(src)); Assembler::mulsd(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch, src);
Assembler::mulsd(dst, Address(rscratch1, 0)); Assembler::mulsd(dst, Address(rscratch, 0));
} }
} }
@ -3246,6 +3264,8 @@ void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src
void MacroAssembler::vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) { void MacroAssembler::vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
assert(UseAVX > 0, "requires some form of AVX"); assert(UseAVX > 0, "requires some form of AVX");
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) { if (reachable(src)) {
Assembler::vpaddd(dst, nds, as_Address(src), vector_len); Assembler::vpaddd(dst, nds, as_Address(src), vector_len);
} else { } else {

View File

@ -762,7 +762,7 @@ public:
void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
// cmp64 to avoild hiding cmpq // cmp64 to avoild hiding cmpq
void cmp64(Register src1, AddressLiteral src); void cmp64(Register src1, AddressLiteral src, Register rscratch = rscratch1);
void cmpxchgptr(Register reg, Address adr); void cmpxchgptr(Register reg, Address adr);
@ -1055,7 +1055,7 @@ public:
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register r11); Register rax, Register rcx, Register rdx, Register r11, Register tmp);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
@ -1067,12 +1067,14 @@ public:
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1, Register rax, Register rcx, Register rdx, Register r8,
Register tmp2, Register tmp3, Register tmp4); Register r9, Register r10, Register r11, Register tmp);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1, Register rax, Register rcx, Register rdx, Register r8,
Register tmp2, Register tmp3, Register tmp4); Register r9, Register r10, Register r11, Register tmp);
#else #else
private: private:
// Initialized in macroAssembler_x86_constants.cpp // Initialized in macroAssembler_x86_constants.cpp
@ -1133,17 +1135,17 @@ private:
public: public:
void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, AddressLiteral src); void addsd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, AddressLiteral src); void addss(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); }
void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
void addpd(XMMRegister dst, AddressLiteral src); void addpd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
using Assembler::vbroadcastsd; using Assembler::vbroadcastsd;
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1); void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
@ -1247,36 +1249,36 @@ public:
Assembler::evmovdquq(dst, src, vector_len); Assembler::evmovdquq(dst, src, vector_len);
} }
} }
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() != src->encoding() || mask != k0) { if (dst->encoding() != src->encoding() || mask != k0) {
Assembler::evmovdquq(dst, mask, src, merge, vector_len); Assembler::evmovdquq(dst, mask, src, merge, vector_len);
} }
} }
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
// Move Aligned Double Quadword // Move Aligned Double Quadword
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); }
void movdqa(XMMRegister dst, AddressLiteral src); void movdqa(XMMRegister dst, AddressLiteral src);
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, AddressLiteral src); void movsd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, AddressLiteral src); void mulpd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, AddressLiteral src); void mulsd(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
@ -1367,9 +1369,9 @@ public:
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch); void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
@ -1888,8 +1890,8 @@ public:
// they will be hidden by the following overriding declaration. // they will be hidden by the following overriding declaration.
using Assembler::movdl; using Assembler::movdl;
using Assembler::movq; using Assembler::movq;
void movdl(XMMRegister dst, AddressLiteral src); void movdl(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
void movq(XMMRegister dst, AddressLiteral src); void movq (XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
// Can push value or effective address // Can push value or effective address
void pushptr(AddressLiteral src); void pushptr(AddressLiteral src);

View File

@ -178,15 +178,14 @@
// The 64 bit code is at most SSE2 compliant // The 64 bit code is at most SSE2 compliant
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register eax, Register ecx, Register edx, Register r8,
Register r8, Register r9, Register r10, Register r11) { Register r9, Register r10, Register r11, Register tmp) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_4, start; Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_4, start;
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx); assert_different_registers(eax, ecx, edx, r8, r9, r10, r11, tmp);
bind(start); bind(start);
push(rbx); push(rbx);
@ -195,33 +194,33 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(B1_2); bind(B1_2);
movl(eax, Address(rsp, 12)); movl(eax, Address(rsp, 12));
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL movq(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x40245f30UL
andl(eax, 2147418112); andl(eax, 2147418112);
subl(eax, 808452096); subl(eax, 808452096);
cmpl(eax, 281346048); cmpl(eax, 281346048);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1); jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
mulsd(xmm1, xmm0); mulsd(xmm1, xmm0);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movdqu(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL movq(xmm4, ExternalAddress(SIGN_MASK), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0); pand(xmm4, xmm0);
por(xmm5, xmm4); por(xmm5, xmm4);
addpd(xmm1, xmm5); addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1); cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx); cvtsi2sdl(xmm1, edx);
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL movdqu(xmm2, ExternalAddress(P_2), tmp /*rscratch*/); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL movq(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54400000UL, 0x3fb921fbUL
mulsd(xmm3, xmm1); mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1); unpcklpd(xmm1, xmm1);
addq(rdx, 1865232); addq(rdx, 1865232);
movdqu(xmm4, xmm0); movdqu(xmm4, xmm0);
andq(rdx, 63); andq(rdx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL movdqu(xmm5, ExternalAddress(SC_4), tmp /*rscratch*/); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable)); lea(rax, ExternalAddress(Ctable));
shlq(rdx, 5); shlq(rdx, 5);
addq(rax, rdx); addq(rax, rdx);
mulpd(xmm2, xmm1); mulpd(xmm2, xmm1);
subsd(xmm0, xmm3); subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL mulsd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3); subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8)); movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0); unpcklpd(xmm0, xmm0);
@ -229,7 +228,7 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subsd(xmm4, xmm2); subsd(xmm4, xmm2);
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
subpd(xmm0, xmm2); subpd(xmm0, xmm2);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL movdqu(xmm6, ExternalAddress(SC_2), tmp /*rscratch*/); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm7, xmm4); mulsd(xmm7, xmm4);
subsd(xmm3, xmm4); subsd(xmm3, xmm4);
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
@ -245,9 +244,9 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulsd(xmm3, xmm4); mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0); mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0); mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL addpd(xmm5, ExternalAddress(SC_3), tmp /*rscratch*/); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0)); mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL addpd(xmm6, ExternalAddress(SC_1), tmp /*rscratch*/); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3); movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8)); addsd(xmm3, Address(rax, 8));
@ -275,7 +274,7 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
pextrw(eax, xmm0, 3); pextrw(eax, xmm0, 3);
andl(eax, 32767); andl(eax, 32767);
pinsrw(xmm0, eax, 3); pinsrw(xmm0, eax, 3);
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL movq(xmm1, ExternalAddress(ONE), tmp /*rscratch*/); // 0x00000000UL, 0x3ff00000UL
subsd(xmm1, xmm0); subsd(xmm1, xmm0);
movdqu(xmm0, xmm1); movdqu(xmm0, xmm1);
jmp(B1_4); jmp(B1_4);
@ -423,8 +422,8 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
orl(edx, rsi); orl(edx, rsi);
xorl(edx, rbx); xorl(edx, rbx);
pinsrw(xmm4, edx, 3); pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL movq(xmm2, ExternalAddress(PI_4), tmp /*rscratch*/); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(PI_4 + 8)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL movq(xmm6, ExternalAddress(PI_4 + 8), tmp /*rscratch*/); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
xorpd(xmm5, xmm5); xorpd(xmm5, xmm5);
subl(edx, 1008); subl(edx, 1008);
pinsrw(xmm5, edx, 3); pinsrw(xmm5, edx, 3);
@ -448,17 +447,17 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm6, xmm2); addsd(xmm6, xmm2);
bind(L_2TAG_PACKET_11_0_1); bind(L_2TAG_PACKET_11_0_1);
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL movq(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x40245f30UL
mulsd(xmm1, xmm0); mulsd(xmm1, xmm0);
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movq(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL movq(xmm4, ExternalAddress(SIGN_MASK), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0); pand(xmm4, xmm0);
por(xmm5, xmm4); por(xmm5, xmm4);
addpd(xmm1, xmm5); addpd(xmm1, xmm5);
cvttsd2siq(rdx, xmm1); cvttsd2siq(rdx, xmm1);
cvtsi2sdq(xmm1, rdx); cvtsi2sdq(xmm1, rdx);
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL movq(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL movdqu(xmm2, ExternalAddress(P_2), tmp /*rscratch*/); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mulsd(xmm3, xmm1); mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1); unpcklpd(xmm1, xmm1);
shll(eax, 3); shll(eax, 3);
@ -466,13 +465,13 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm4, xmm0); movdqu(xmm4, xmm0);
addl(edx, eax); addl(edx, eax);
andl(edx, 63); andl(edx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL movdqu(xmm5, ExternalAddress(SC_4), tmp /*rscratch*/); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable)); lea(rax, ExternalAddress(Ctable));
shll(edx, 5); shll(edx, 5);
addq(rax, rdx); addq(rax, rdx);
mulpd(xmm2, xmm1); mulpd(xmm2, xmm1);
subsd(xmm0, xmm3); subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL mulsd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3); subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8)); movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0); unpcklpd(xmm0, xmm0);
@ -491,15 +490,15 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm2, xmm3); addsd(xmm2, xmm3);
subsd(xmm7, xmm2); subsd(xmm7, xmm2);
subsd(xmm1, xmm6); subsd(xmm1, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL movdqu(xmm6, ExternalAddress(SC_2), tmp /*rscratch*/); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm2, xmm4); mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0); mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4); mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0); mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0); mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL addpd(xmm5, ExternalAddress(SC_3), tmp /*rscratch*/); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0)); mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL addpd(xmm6, ExternalAddress(SC_1), tmp /*rscratch*/); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3); movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8)); addsd(xmm3, Address(rax, 8));
@ -602,7 +601,7 @@ void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_2_0_1); bind(L_2TAG_PACKET_2_0_1);
movsd(xmm0, Address(rsp, 8)); movsd(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL mulsd(xmm0, ExternalAddress(NEG_ZERO), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0); movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_13_0_1); bind(L_2TAG_PACKET_13_0_1);

View File

@ -200,10 +200,10 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subq(rsp, 24); subq(rsp, 24);
movsd(Address(rsp, 8), xmm0); movsd(Address(rsp, 8), xmm0);
unpcklpd(xmm0, xmm0); unpcklpd(xmm0, xmm0);
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL movdqu(xmm1, ExternalAddress(cv), tmp /*rscratch*/); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm2, ExternalAddress(cv + 16)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL movdqu(xmm2, ExternalAddress(cv + 16), tmp /*rscratch*/); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, ExternalAddress(cv + 32)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL movdqu(xmm3, ExternalAddress(cv + 32), tmp /*rscratch*/); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
movdqu(xmm6, ExternalAddress(SHIFTER)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL movdqu(xmm6, ExternalAddress(SHIFTER), tmp /*rscratch*/); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
pextrw(eax, xmm0, 3); pextrw(eax, xmm0, 3);
andl(eax, 32767); andl(eax, 32767);
movl(edx, 16527); movl(edx, 16527);
@ -217,9 +217,9 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movapd(xmm7, xmm1); movapd(xmm7, xmm1);
subpd(xmm1, xmm6); subpd(xmm1, xmm6);
mulpd(xmm2, xmm1); mulpd(xmm2, xmm1);
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL movdqu(xmm4, ExternalAddress(cv + 64), tmp /*rscratch*/); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1); mulpd(xmm3, xmm1);
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL movdqu(xmm5, ExternalAddress(cv + 80), tmp /*rscratch*/); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2); subpd(xmm0, xmm2);
movdl(eax, xmm7); movdl(eax, xmm7);
movl(ecx, eax); movl(ecx, eax);
@ -227,9 +227,9 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
shll(ecx, 4); shll(ecx, 4);
sarl(eax, 6); sarl(eax, 6);
movl(edx, eax); movl(edx, eax);
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL movdqu(xmm6, ExternalAddress(mmask), tmp /*rscratch*/); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6); pand(xmm7, xmm6);
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL movdqu(xmm6, ExternalAddress(bias), tmp /*rscratch*/); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6); paddq(xmm7, xmm6);
psllq(xmm7, 46); psllq(xmm7, 46);
subpd(xmm0, xmm3); subpd(xmm0, xmm3);
@ -242,7 +242,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulpd(xmm0, xmm6); mulpd(xmm0, xmm6);
addpd(xmm5, xmm4); addpd(xmm5, xmm4);
mulsd(xmm0, xmm6); mulsd(xmm0, xmm6);
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL mulpd(xmm6, ExternalAddress(cv + 48), tmp /*rscratch*/); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2); addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2); unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5); mulpd(xmm0, xmm5);
@ -260,7 +260,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_1_0_2);
xorpd(xmm3, xmm3); xorpd(xmm3, xmm3);
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL movdqu(xmm4, ExternalAddress(ALLONES), tmp /*rscratch*/); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
movl(edx, -1022); movl(edx, -1022);
subl(edx, eax); subl(edx, eax);
movdl(xmm5, edx); movdl(xmm5, edx);
@ -268,7 +268,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movl(ecx, eax); movl(ecx, eax);
sarl(eax, 1); sarl(eax, 1);
pinsrw(xmm3, eax, 3); pinsrw(xmm3, eax, 3);
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL movdqu(xmm6, ExternalAddress(ebias), tmp /*rscratch*/); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
psllq(xmm3, 4); psllq(xmm3, 4);
psubd(xmm2, xmm3); psubd(xmm2, xmm3);
mulsd(xmm0, xmm2); mulsd(xmm0, xmm2);
@ -337,7 +337,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movl(eax, Address(rsp, 12)); movl(eax, Address(rsp, 12));
cmpl(eax, INT_MIN); cmpl(eax, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2); jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL movsd(xmm0, ExternalAddress(XMAX), tmp /*rscratch*/); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0); mulsd(xmm0, xmm0);
bind(L_2TAG_PACKET_7_0_2); bind(L_2TAG_PACKET_7_0_2);
@ -345,7 +345,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
jmp(L_2TAG_PACKET_6_0_2); jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_10_0_2); bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL movsd(xmm0, ExternalAddress(XMIN), tmp /*rscratch*/); // 0x00000000UL, 0x00100000UL
mulsd(xmm0, xmm0); mulsd(xmm0, xmm0);
movl(Address(rsp, 0), 15); movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2); jmp(L_2TAG_PACKET_6_0_2);
@ -359,11 +359,11 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movl(eax, Address(rsp, 12)); movl(eax, Address(rsp, 12));
cmpl(eax, 2146435072); cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2); jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL movsd(xmm0, ExternalAddress(INF), tmp /*rscratch*/); // 0x00000000UL, 0x7ff00000UL
jmp(B1_5); jmp(B1_5);
bind(L_2TAG_PACKET_12_0_2); bind(L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL movsd(xmm0, ExternalAddress(ZERO), tmp /*rscratch*/); // 0x00000000UL, 0x00000000UL
jmp(B1_5); jmp(B1_5);
bind(L_2TAG_PACKET_11_0_2); bind(L_2TAG_PACKET_11_0_2);
@ -377,7 +377,7 @@ void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cmpl(eax, 1083179008); cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
movsd(Address(rsp, 8), xmm0); movsd(Address(rsp, 8), xmm0);
addsd(xmm0, ExternalAddress(ONE)); // 0x00000000UL, 0x3ff00000UL addsd(xmm0, ExternalAddress(ONE), tmp /*rscratch*/); // 0x00000000UL, 0x3ff00000UL
jmp(B1_5); jmp(B1_5);
bind(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_6_0_2);

View File

@ -234,15 +234,15 @@ void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subl(eax, ecx); subl(eax, ecx);
cvtsi2sdl(xmm7, eax); cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0); mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL movq(xmm6, ExternalAddress(log2), tmp1 /*rscratch*/); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL movdqu(xmm3, ExternalAddress(coeff), tmp1 /*rscratch*/); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2); subsd(xmm5, xmm2);
andl(edx, 16711680); andl(edx, 16711680);
shrl(edx, 12); shrl(edx, 12);
movdqu(xmm0, Address(tmp2, edx)); movdqu(xmm0, Address(tmp2, edx));
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL movdqu(xmm4, ExternalAddress(coeff + 16), tmp1 /*rscratch*/); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5); addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL movdqu(xmm2, ExternalAddress(coeff + 32), tmp1 /*rscratch*/); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7); mulsd(xmm6, xmm7);
if (VM_Version::supports_sse3()) { if (VM_Version::supports_sse3()) {
movddup(xmm5, xmm1); movddup(xmm5, xmm1);
@ -251,7 +251,7 @@ void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm5, xmm1); movdqu(xmm5, xmm1);
movlhps(xmm5, xmm5); movlhps(xmm5, xmm5);
} }
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL mulsd(xmm7, ExternalAddress(log2 + 8), tmp1 /*rscratch*/); // 0x93c76730UL, 0x3ceef357UL
mulsd(xmm3, xmm1); mulsd(xmm3, xmm1);
addsd(xmm0, xmm6); addsd(xmm0, xmm6);
mulpd(xmm4, xmm5); mulpd(xmm4, xmm5);

View File

@ -191,13 +191,13 @@ ATTRIBUTE_ALIGNED(16) juint _coeff_log10[] =
// Registers: // Registers:
// input: xmm0 // input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11 // rax, rdx, rcx, r11, tmp
// Code generated by Intel C compiler for LIBM library // Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register r11) { Register eax, Register ecx, Register edx, Register r11, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_5; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_5;
@ -225,12 +225,12 @@ void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister
movdqu(xmm1, xmm0); movdqu(xmm1, xmm0);
movl(edx, 32768); movl(edx, 32768);
movdl(xmm4, edx); movdl(xmm4, edx);
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL movdqu(xmm5, ExternalAddress(HIGHSIGMASK), tmp /*rscratch*/); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3); pextrw(eax, xmm0, 3);
por(xmm0, xmm2); por(xmm0, xmm2);
movl(ecx, 16352); movl(ecx, 16352);
psrlq(xmm0, 27); psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL movdqu(xmm2, ExternalAddress(LOG10_E), tmp /*rscratch*/); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2); psrld(xmm0, 2);
rcpps(xmm0, xmm0); rcpps(xmm0, xmm0);
psllq(xmm1, 12); psllq(xmm1, 12);
@ -255,22 +255,22 @@ void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister
cvtsi2sdl(xmm7, eax); cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0); mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL movq(xmm6, ExternalAddress(log2), tmp /*rscratch*/); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL movdqu(xmm3, ExternalAddress(coeff), tmp /*rscratch*/); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
subsd(xmm5, xmm2); subsd(xmm5, xmm2);
andl(edx, 16711680); andl(edx, 16711680);
shrl(edx, 12); shrl(edx, 12);
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504)); movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL movdqu(xmm4, ExternalAddress(coeff + 16), tmp /*rscratch*/); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
addsd(xmm1, xmm5); addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL movdqu(xmm2, ExternalAddress(coeff + 32), tmp /*rscratch*/); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
mulsd(xmm6, xmm7); mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68); pshufd(xmm5, xmm1, 68);
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL mulsd(xmm7, ExternalAddress(log2 + 8), tmp /*rscratch*/); //0x1f12b358UL, 0x3cdfef31UL
mulsd(xmm3, xmm1); mulsd(xmm3, xmm1);
addsd(xmm0, xmm6); addsd(xmm0, xmm6);
mulpd(xmm4, xmm5); mulpd(xmm4, xmm5);
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL movq(xmm6, ExternalAddress(LOG10_E + 8), tmp /*rscratch*/); //0xbf2e4108UL, 0x3f5a7a6cUL
mulpd(xmm5, xmm5); mulpd(xmm5, xmm5);
addpd(xmm4, xmm2); addpd(xmm4, xmm2);
mulpd(xmm3, xmm5); mulpd(xmm3, xmm5);
@ -333,7 +333,7 @@ void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister
por(xmm0, xmm2); por(xmm0, xmm2);
movl(ecx, 18416); movl(ecx, 18416);
psrlq(xmm0, 27); psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL movdqu(xmm2, ExternalAddress(LOG10_E), tmp /*rscratch*/); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2); psrld(xmm0, 2);
rcpps(xmm0, xmm0); rcpps(xmm0, xmm0);
psllq(xmm1, 12); psllq(xmm1, 12);

View File

@ -827,17 +827,17 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
// Special case: pow(x, 2.0) => x * x // Special case: pow(x, 2.0) => x * x
movdq(tmp1, xmm1); movdq(tmp1, xmm1);
cmp64(tmp1, ExternalAddress(DOUBLE2)); cmp64(tmp1, ExternalAddress(DOUBLE2), tmp2 /*rscratch*/);
jccb(Assembler::notEqual, B1_2); jccb(Assembler::notEqual, B1_2);
mulsd(xmm0, xmm0); mulsd(xmm0, xmm0);
jmp(B1_5); jmp(B1_5);
// Special case: pow(x, 0.5) => sqrt(x) // Special case: pow(x, 0.5) => sqrt(x)
bind(B1_2); bind(B1_2);
cmp64(tmp1, ExternalAddress(DOUBLE0DOT5)); cmp64(tmp1, ExternalAddress(DOUBLE0DOT5), tmp2 /*rscratch*/);
jccb(Assembler::notEqual, L_POW); // For pow(x, y), check whether y == 0.5 jccb(Assembler::notEqual, L_POW); // For pow(x, y), check whether y == 0.5
movdq(tmp2, xmm0); movdq(tmp2, xmm0);
cmp64(tmp2, ExternalAddress(DOUBLE0)); cmp64(tmp2, ExternalAddress(DOUBLE0), tmp3 /*rscratch*/);
jccb(Assembler::less, L_POW); // pow(x, 0.5) => sqrt(x) only for x >= 0.0 or x is +inf/NaN jccb(Assembler::less, L_POW); // pow(x, 0.5) => sqrt(x) only for x >= 0.0 or x is +inf/NaN
sqrtsd(xmm0, xmm0); sqrtsd(xmm0, xmm0);
jmp(B1_5); jmp(B1_5);
@ -861,9 +861,9 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addl(ecx, edx); addl(ecx, edx);
xorl(ecx, edx); xorl(ecx, edx);
por(xmm0, xmm2); por(xmm0, xmm2);
movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL movdqu(xmm6, ExternalAddress(HIGHSIGMASK), tmp2 /*rscratch*/); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
psrlq(xmm0, 27); psrlq(xmm0, 27);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
psrld(xmm0, 2); psrld(xmm0, 2);
addl(ecx, 16); addl(ecx, 16);
bsrl(ecx, ecx); bsrl(ecx, ecx);
@ -903,10 +903,10 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_4_0_2); bind(L_2TAG_PACKET_4_0_2);
mulsd(xmm3, xmm0); mulsd(xmm3, xmm0);
movdqu(xmm1, ExternalAddress(coeff)); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL movdqu(xmm1, ExternalAddress(coeff), tmp2 /*rscratch*/); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
lea(tmp4, ExternalAddress(L_tbl)); lea(tmp4, ExternalAddress(L_tbl));
subsd(xmm5, xmm2); subsd(xmm5, xmm2);
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL movdqu(xmm4, ExternalAddress(coeff + 16), tmp2 /*rscratch*/); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL
movl(ecx, eax); movl(ecx, eax);
sarl(eax, 31); sarl(eax, 31);
addl(ecx, eax); addl(ecx, eax);
@ -914,12 +914,12 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addl(eax, 1); addl(eax, 1);
bsrl(eax, eax); bsrl(eax, eax);
unpcklpd(xmm5, xmm3); unpcklpd(xmm5, xmm3);
movdqu(xmm6, ExternalAddress(32 + coeff)); //0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL movdqu(xmm6, ExternalAddress(coeff + 32), tmp2 /*rscratch*/); //0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL
addsd(xmm3, xmm5); addsd(xmm3, xmm5);
andl(edx, 16760832); andl(edx, 16760832);
shrl(edx, 10); shrl(edx, 10);
addpd(xmm5, Address(tmp4, edx, Address::times_1, -3648)); addpd(xmm5, Address(tmp4, edx, Address::times_1, -3648));
movdqu(xmm0, ExternalAddress(48 + coeff)); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL movdqu(xmm0, ExternalAddress(coeff + 48), tmp2 /*rscratch*/); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
pshufd(xmm2, xmm3, 68); pshufd(xmm2, xmm3, 68);
mulsd(xmm3, xmm3); mulsd(xmm3, xmm3);
mulpd(xmm1, xmm2); mulpd(xmm1, xmm2);
@ -932,7 +932,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movq(xmm1, Address(rsp, 16)); movq(xmm1, Address(rsp, 16));
movw(ecx, Address(rsp, 22)); movw(ecx, Address(rsp, 22));
pshufd(xmm7, xmm5, 238); pshufd(xmm7, xmm5, 238);
movq(xmm4, ExternalAddress(HIGHMASK_Y)); //0x00000000UL, 0xfffffff8UL, 0x00000000UL, 0xffffffffUL movq(xmm4, ExternalAddress(HIGHMASK_Y), tmp2 /*rscratch*/); //0x00000000UL, 0xfffffff8UL, 0x00000000UL, 0xffffffffUL
mulpd(xmm6, xmm2); mulpd(xmm6, xmm2);
pshufd(xmm3, xmm3, 68); pshufd(xmm3, xmm3, 68);
mulpd(xmm0, xmm2); mulpd(xmm0, xmm2);
@ -967,8 +967,8 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm5, Address(tmp4, edx, Address::times_8, 0)); movdqu(xmm5, Address(tmp4, edx, Address::times_8, 0));
addsd(xmm4, xmm1); addsd(xmm4, xmm1);
mulsd(xmm2, xmm0); mulsd(xmm2, xmm0);
movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL movdqu(xmm7, ExternalAddress(e_coeff), tmp2 /*rscratch*/); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL movdqu(xmm3, ExternalAddress(e_coeff + 16), tmp2 /*rscratch*/); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
shll(ecx, 12); shll(ecx, 12);
xorl(ecx, tmp1); xorl(ecx, tmp1);
andl(rcx, -1048576); andl(rcx, -1048576);
@ -1027,11 +1027,11 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
por(xmm0, xmm2); por(xmm0, xmm2);
movl(ecx, 18416); movl(ecx, 18416);
psrlq(xmm0, 27); psrlq(xmm0, 27);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
psrld(xmm0, 2); psrld(xmm0, 2);
rcpps(xmm0, xmm0); rcpps(xmm0, xmm0);
psllq(xmm3, 12); psllq(xmm3, 12);
movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL movdqu(xmm6, ExternalAddress(HIGHSIGMASK), tmp2 /*rscratch*/); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
psrlq(xmm3, 12); psrlq(xmm3, 12);
mulss(xmm0, xmm7); mulss(xmm0, xmm7);
movl(edx, -1024); movl(edx, -1024);
@ -1073,11 +1073,11 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
por(xmm0, xmm2); por(xmm0, xmm2);
movl(ecx, 18416); movl(ecx, 18416);
psrlq(xmm0, 27); psrlq(xmm0, 27);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
psrld(xmm0, 2); psrld(xmm0, 2);
rcpps(xmm0, xmm0); rcpps(xmm0, xmm0);
psllq(xmm3, 12); psllq(xmm3, 12);
movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL movdqu(xmm6, ExternalAddress(HIGHSIGMASK), tmp2 /*rscratch*/); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
psrlq(xmm3, 12); psrlq(xmm3, 12);
mulss(xmm0, xmm7); mulss(xmm0, xmm7);
movl(edx, -1024); movl(edx, -1024);
@ -1104,7 +1104,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cmpl(eax, 752); cmpl(eax, 752);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_12_0_2); jcc(Assembler::aboveEqual, L_2TAG_PACKET_12_0_2);
addsd(xmm0, xmm7); addsd(xmm0, xmm7);
movq(xmm2, ExternalAddress(HALFMASK)); //0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL movq(xmm2, ExternalAddress(HALFMASK), tmp2 /*rscratch*/); //0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL
addpd(xmm3, xmm0); addpd(xmm3, xmm0);
xorpd(xmm6, xmm6); xorpd(xmm6, xmm6);
movl(eax, 17080); movl(eax, 17080);
@ -1132,8 +1132,8 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subsd(xmm6, xmm7); subsd(xmm6, xmm7);
lea(tmp4, ExternalAddress(T_exp)); lea(tmp4, ExternalAddress(T_exp));
addsd(xmm2, xmm1); addsd(xmm2, xmm1);
movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL movdqu(xmm7, ExternalAddress(e_coeff + 0), tmp2 /*rscratch*/); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL movdqu(xmm3, ExternalAddress(e_coeff + 16), tmp2 /*rscratch*/); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
subsd(xmm4, xmm6); subsd(xmm4, xmm6);
pextrw(edx, xmm6, 3); pextrw(edx, xmm6, 3);
movl(ecx, eax); movl(ecx, eax);
@ -1148,7 +1148,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
shll(ecx, 20); shll(ecx, 20);
xorl(ecx, tmp1); xorl(ecx, tmp1);
movdl(xmm6, ecx); movdl(xmm6, ecx);
movq(xmm1, ExternalAddress(32 + e_coeff)); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL movq(xmm1, ExternalAddress(e_coeff + 32), tmp2 /*rscratch*/); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL
andl(edx, 32767); andl(edx, 32767);
cmpl(edx, 16529); cmpl(edx, 16529);
jcc(Assembler::above, L_2TAG_PACKET_12_0_2); jcc(Assembler::above, L_2TAG_PACKET_12_0_2);
@ -1405,7 +1405,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
xorpd(xmm1, xmm1); xorpd(xmm1, xmm1);
movl(edx, 30704); movl(edx, 30704);
pinsrw(xmm1, edx, 3); pinsrw(xmm1, edx, 3);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm4, Address(rsp, 8)); movq(xmm4, Address(rsp, 8));
pextrw(eax, xmm4, 3); pextrw(eax, xmm4, 3);
movl(edx, 8192); movl(edx, 8192);
@ -1438,7 +1438,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
xorpd(xmm1, xmm1); xorpd(xmm1, xmm1);
movl(edx, 30704); movl(edx, 30704);
pinsrw(xmm1, edx, 3); pinsrw(xmm1, edx, 3);
movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL movq(xmm2, ExternalAddress(LOG2_E), tmp2 /*rscratch*/); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
movq(xmm4, Address(rsp, 8)); movq(xmm4, Address(rsp, 8));
pextrw(eax, xmm4, 3); pextrw(eax, xmm4, 3);
movl(edx, 8192); movl(edx, 8192);
@ -1665,15 +1665,15 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cvtsi2sdl(xmm7, eax); cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
lea(r11, ExternalAddress(L_tbl)); lea(r11, ExternalAddress(L_tbl));
movq(xmm4, ExternalAddress(coeff_h)); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL movq(xmm4, ExternalAddress(coeff_h), tmp2 /*rscratch*/); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
mulsd(xmm3, xmm0); mulsd(xmm3, xmm0);
movq(xmm6, ExternalAddress(coeff_h)); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL movq(xmm6, ExternalAddress(coeff_h), tmp2 /*rscratch*/); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
subsd(xmm5, xmm2); subsd(xmm5, xmm2);
movq(xmm1, ExternalAddress(8 + coeff_h)); //0x00000000UL, 0xbf5dabe1UL movq(xmm1, ExternalAddress(coeff_h + 8), tmp2 /*rscratch*/); //0x00000000UL, 0xbf5dabe1UL
pshufd(xmm2, xmm3, 68); pshufd(xmm2, xmm3, 68);
unpcklpd(xmm5, xmm3); unpcklpd(xmm5, xmm3);
addsd(xmm3, xmm5); addsd(xmm3, xmm5);
movq(xmm0, ExternalAddress(8 + coeff_h)); //0x00000000UL, 0xbf5dabe1UL movq(xmm0, ExternalAddress(coeff_h + 8), tmp2 /*rscratch*/); //0x00000000UL, 0xbf5dabe1UL
andl(edx, 16760832); andl(edx, 16760832);
shrl(edx, 10); shrl(edx, 10);
addpd(xmm7, Address(tmp4, edx, Address::times_1, -3648)); addpd(xmm7, Address(tmp4, edx, Address::times_1, -3648));
@ -1698,13 +1698,13 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm5, xmm7); movdqu(xmm5, xmm7);
addsd(xmm7, xmm2); addsd(xmm7, xmm2);
addsd(xmm4, xmm0); addsd(xmm4, xmm0);
movdqu(xmm0, ExternalAddress(coeff)); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL movdqu(xmm0, ExternalAddress(coeff), tmp2 /*rscratch*/); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
subsd(xmm5, xmm7); subsd(xmm5, xmm7);
addsd(xmm6, xmm4); addsd(xmm6, xmm4);
movdqu(xmm4, xmm7); movdqu(xmm4, xmm7);
addsd(xmm5, xmm2); addsd(xmm5, xmm2);
addsd(xmm7, xmm1); addsd(xmm7, xmm1);
movdqu(xmm2, ExternalAddress(64 + coeff)); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL movdqu(xmm2, ExternalAddress(coeff + 64), tmp2 /*rscratch*/); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
subsd(xmm4, xmm7); subsd(xmm4, xmm7);
addsd(xmm6, xmm5); addsd(xmm6, xmm5);
addsd(xmm4, xmm1); addsd(xmm4, xmm1);
@ -1713,11 +1713,11 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm7, xmm5); addsd(xmm7, xmm5);
subsd(xmm1, xmm7); subsd(xmm1, xmm7);
addsd(xmm1, xmm5); addsd(xmm1, xmm5);
movdqu(xmm5, ExternalAddress(80 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL movdqu(xmm5, ExternalAddress(coeff + 80), tmp2 /*rscratch*/); //0x9f95985aUL, 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL
pshufd(xmm3, xmm3, 68); pshufd(xmm3, xmm3, 68);
addsd(xmm6, xmm4); addsd(xmm6, xmm4);
addsd(xmm6, xmm1); addsd(xmm6, xmm1);
movdqu(xmm1, ExternalAddress(32 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL movdqu(xmm1, ExternalAddress(coeff + 32), tmp2 /*rscratch*/); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL
mulpd(xmm0, xmm3); mulpd(xmm0, xmm3);
mulpd(xmm2, xmm3); mulpd(xmm2, xmm3);
pshufd(xmm4, xmm3, 68); pshufd(xmm4, xmm3, 68);
@ -1725,7 +1725,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addpd(xmm0, xmm1); addpd(xmm0, xmm1);
addpd(xmm5, xmm2); addpd(xmm5, xmm2);
mulsd(xmm4, xmm3); mulsd(xmm4, xmm3);
movq(xmm2, ExternalAddress(HIGHMASK_LOG_X)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xfffff800UL movq(xmm2, ExternalAddress(HIGHMASK_LOG_X), tmp2 /*rscratch*/); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xfffff800UL
mulpd(xmm3, xmm3); mulpd(xmm3, xmm3);
movq(xmm1, Address(rsp, 16)); movq(xmm1, Address(rsp, 16));
movw(ecx, Address(rsp, 22)); movw(ecx, Address(rsp, 22));
@ -1733,7 +1733,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
pextrw(eax, xmm7, 3); pextrw(eax, xmm7, 3);
mulpd(xmm5, xmm4); mulpd(xmm5, xmm4);
mulpd(xmm0, xmm3); mulpd(xmm0, xmm3);
movq(xmm4, ExternalAddress(8 + HIGHMASK_Y)); //0x00000000UL, 0xffffffffUL movq(xmm4, ExternalAddress(HIGHMASK_Y + 8), tmp2 /*rscratch*/); //0x00000000UL, 0xffffffffUL
pand(xmm2, xmm7); pand(xmm2, xmm7);
addsd(xmm5, xmm6); addsd(xmm5, xmm6);
subsd(xmm7, xmm2); subsd(xmm7, xmm2);
@ -1761,12 +1761,12 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulsd(xmm3, xmm7); mulsd(xmm3, xmm7);
addsd(xmm6, xmm4); addsd(xmm6, xmm4);
addsd(xmm1, xmm3); addsd(xmm1, xmm3);
movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL movdqu(xmm7, ExternalAddress(e_coeff), tmp2 /*rscratch*/); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
movdl(edx, xmm6); movdl(edx, xmm6);
subsd(xmm6, xmm5); subsd(xmm6, xmm5);
lea(tmp4, ExternalAddress(T_exp)); lea(tmp4, ExternalAddress(T_exp));
movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL movdqu(xmm3, ExternalAddress(e_coeff + 16), tmp2 /*rscratch*/); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
movq(xmm2, ExternalAddress(32 + e_coeff)); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL movq(xmm2, ExternalAddress(e_coeff + 32), tmp2 /*rscratch*/); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL
subsd(xmm4, xmm6); subsd(xmm4, xmm6);
movl(ecx, edx); movl(ecx, edx);
andl(edx, 255); andl(edx, 255);

View File

@ -819,9 +819,9 @@ enum {
movl(h, Address(CTX, 4*7)); movl(h, Address(CTX, 4*7));
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask; pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr +0)); //[PSHUFFLE_BYTE_FLIP_MASK wrt rip] vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip] vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); // [_SHUF_00BA wrt rip]
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip] vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); // [_SHUF_DC00 wrt rip]
movl(g, Address(CTX, 4*6)); movl(g, Address(CTX, 4*6));
@ -982,9 +982,9 @@ bind(only_one_block);
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask; pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //[PSHUFFLE_BYTE_FLIP_MASK wrt rip] vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip] vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); // [_SHUF_00BA wrt rip]
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip] vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); // [_SHUF_DC00 wrt rip]
movl(g, Address(CTX, 4*6)); // 0x1f83d9ab movl(g, Address(CTX, 4*6)); // 0x1f83d9ab
@ -1374,8 +1374,8 @@ void MacroAssembler::sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegiste
movq(h, Address(CTX, 8 * 7)); movq(h, Address(CTX, 8 * 7));
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512; pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512;
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //PSHUFFLE_BYTE_FLIP_MASK wrt rip vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // PSHUFFLE_BYTE_FLIP_MASK wrt rip
vmovdqu(YMM_MASK_LO, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); vmovdqu(YMM_MASK_LO, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));
movq(g, Address(CTX, 8 * 6)); movq(g, Address(CTX, 8 * 6));

View File

@ -185,14 +185,14 @@ ATTRIBUTE_ALIGNED(8) juint _ALL_ONES[] =
void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ebx, Register ecx, Register edx, Register tmp1) { Register eax, Register ebx, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1; Label L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1;
Label L_2TAG_PACKET_12_0_1, B1_4; Label L_2TAG_PACKET_12_0_1, B1_4;
assert_different_registers(tmp1, eax, ebx, ecx, edx); assert_different_registers(tmp, eax, ebx, ecx, edx);
address ALL_ONES = (address)_ALL_ONES; address ALL_ONES = (address)_ALL_ONES;
@ -200,30 +200,29 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subq(rsp, 16); subq(rsp, 16);
movsd(Address(rsp, 8), xmm0); movsd(Address(rsp, 8), xmm0);
movl(eax, Address(rsp, 12)); movl(eax, Address(rsp, 12));
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL movq(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x40245f30UL
movq(xmm2, ExternalAddress(SHIFTER)); //0x00000000UL, 0x43380000UL movq(xmm2, ExternalAddress(SHIFTER), tmp /*rscratch*/); //0x00000000UL, 0x43380000UL
andl(eax, 2147418112); andl(eax, 2147418112);
subl(eax, 808452096); subl(eax, 808452096);
cmpl(eax, 281346048); cmpl(eax, 281346048);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1); jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
mulsd(xmm1, xmm0); mulsd(xmm1, xmm0);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movdqu(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL movq(xmm4, ExternalAddress(SIGN_MASK), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0); pand(xmm4, xmm0);
por(xmm5, xmm4); por(xmm5, xmm4);
addpd(xmm1, xmm5); addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1); cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx); cvtsi2sdl(xmm1, edx);
movdqu(xmm6, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL movdqu(xmm6, ExternalAddress(P_2), tmp /*rscratch*/); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mov64(r8, 0x3fb921fb54400000); mov64(r8, 0x3fb921fb54400000);
movdq(xmm3, r8); movdq(xmm3, r8);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL movdqu(xmm5, ExternalAddress(SC_4), tmp /*rscratch*/); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
pshufd(xmm4, xmm0, 68); pshufd(xmm4, xmm0, 68);
mulsd(xmm3, xmm1); mulsd(xmm3, xmm1);
if (VM_Version::supports_sse3()) { if (VM_Version::supports_sse3()) {
movddup(xmm1, xmm1); movddup(xmm1, xmm1);
} } else {
else {
movlhps(xmm1, xmm1); movlhps(xmm1, xmm1);
} }
andl(edx, 63); andl(edx, 63);
@ -231,14 +230,13 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
lea(rax, ExternalAddress(Ctable)); lea(rax, ExternalAddress(Ctable));
addq(rax, rdx); addq(rax, rdx);
mulpd(xmm6, xmm1); mulpd(xmm6, xmm1);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL mulsd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3); subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8)); movq(xmm7, Address(rax, 8));
subsd(xmm0, xmm3); subsd(xmm0, xmm3);
if (VM_Version::supports_sse3()) { if (VM_Version::supports_sse3()) {
movddup(xmm3, xmm4); movddup(xmm3, xmm4);
} } else {
else {
movdqu(xmm3, xmm4); movdqu(xmm3, xmm4);
movlhps(xmm3, xmm3); movlhps(xmm3, xmm3);
} }
@ -252,7 +250,7 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0); mulpd(xmm0, xmm0);
subsd(xmm3, xmm6); subsd(xmm3, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL movdqu(xmm6, ExternalAddress(SC_2), tmp /*rscratch*/); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
subsd(xmm1, xmm3); subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24)); movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3); addsd(xmm2, xmm3);
@ -262,9 +260,9 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulsd(xmm3, xmm4); mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0); mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0); mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL addpd(xmm5, ExternalAddress(SC_3), tmp /*rscratch*/); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0)); mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL addpd(xmm6, ExternalAddress(SC_1), tmp /*rscratch*/); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3); movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8)); addsd(xmm3, Address(rax, 8));
@ -293,14 +291,14 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
shrl(eax, 20); shrl(eax, 20);
cmpl(eax, 3325); cmpl(eax, 3325);
jcc(Assembler::notEqual, L_2TAG_PACKET_2_0_1); jcc(Assembler::notEqual, L_2TAG_PACKET_2_0_1);
mulsd(xmm0, ExternalAddress(ALL_ONES)); //0xffffffffUL, 0x3fefffffUL mulsd(xmm0, ExternalAddress(ALL_ONES), tmp /*rscratch*/); //0xffffffffUL, 0x3fefffffUL
jmp(B1_4); jmp(B1_4);
bind(L_2TAG_PACKET_2_0_1); bind(L_2TAG_PACKET_2_0_1);
movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL movq(xmm3, ExternalAddress(TWO_POW_55), tmp /*rscratch*/); //0x00000000UL, 0x43600000UL
mulsd(xmm3, xmm0); mulsd(xmm3, xmm0);
subsd(xmm3, xmm0); subsd(xmm3, xmm0);
mulsd(xmm3, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL mulsd(xmm3, ExternalAddress(TWO_POW_M55), tmp /*rscratch*/); //0x00000000UL, 0x3c800000UL
jmp(B1_4); jmp(B1_4);
bind(L_2TAG_PACKET_1_0_1); bind(L_2TAG_PACKET_1_0_1);
@ -447,8 +445,8 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
orl(edx, rsi); orl(edx, rsi);
xorl(edx, ebx); xorl(edx, ebx);
pinsrw(xmm4, edx, 3); pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL movq(xmm2, ExternalAddress(PI_4), tmp /*rscratch*/); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL movq(xmm6, ExternalAddress(PI_4 + 8), tmp /*rscratch*/); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
xorpd(xmm5, xmm5); xorpd(xmm5, xmm5);
subl(edx, 1008); subl(edx, 1008);
pinsrw(xmm5, edx, 3); pinsrw(xmm5, edx, 3);
@ -472,17 +470,17 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm6, xmm2); addsd(xmm6, xmm2);
bind(L_2TAG_PACKET_12_0_1); bind(L_2TAG_PACKET_12_0_1);
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL movq(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x40245f30UL
mulsd(xmm1, xmm0); mulsd(xmm1, xmm0);
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movq(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL movq(xmm4, ExternalAddress(SIGN_MASK), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0); pand(xmm4, xmm0);
por(xmm5, xmm4); por(xmm5, xmm4);
addpd(xmm1, xmm5); addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1); cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx); cvtsi2sdl(xmm1, edx);
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL movq(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL movdqu(xmm2, ExternalAddress(P_2), tmp /*rscratch*/); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mulsd(xmm3, xmm1); mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1); unpcklpd(xmm1, xmm1);
shll(eax, 3); shll(eax, 3);
@ -490,13 +488,13 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
movdqu(xmm4, xmm0); movdqu(xmm4, xmm0);
addl(edx, eax); addl(edx, eax);
andl(edx, 63); andl(edx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0x54400000UL, 0x3fb921fbUL movdqu(xmm5, ExternalAddress(SC_4), tmp /*rscratch*/); //0x54400000UL, 0x3fb921fbUL
lea(rax, ExternalAddress(Ctable)); lea(rax, ExternalAddress(Ctable));
shll(edx, 5); shll(edx, 5);
addq(rax, rdx); addq(rax, rdx);
mulpd(xmm2, xmm1); mulpd(xmm2, xmm1);
subsd(xmm0, xmm3); subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL mulsd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3); subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8)); movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0); unpcklpd(xmm0, xmm0);
@ -515,15 +513,15 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm2, xmm3); addsd(xmm2, xmm3);
subsd(xmm7, xmm2); subsd(xmm7, xmm2);
subsd(xmm1, xmm6); subsd(xmm1, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL movdqu(xmm6, ExternalAddress(SC_2), tmp /*rscratch*/); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm2, xmm4); mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0); mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4); mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0); mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0); mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL addpd(xmm5, ExternalAddress(SC_3), tmp /*rscratch*/); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0)); mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL addpd(xmm6, ExternalAddress(SC_1), tmp /*rscratch*/); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0); mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3); movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8)); addsd(xmm3, Address(rax, 8));
@ -627,7 +625,7 @@ void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_3_0_1); bind(L_2TAG_PACKET_3_0_1);
movq(xmm0, Address(rsp, 8)); movq(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL mulsd(xmm0, ExternalAddress(NEG_ZERO), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0); movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_14_0_1); bind(L_2TAG_PACKET_14_0_1);
@ -1018,15 +1016,15 @@ void MacroAssembler::libm_reduce_pi04l(Register eax, Register ecx, Register edx,
assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp);
address zero_none = (address)_zero_none; address zero_none = (address)_zero_none;
address _4onpi_d = (address)__4onpi_d; address _4onpi_d = (address)__4onpi_d;
address TWO_32H = (address)_TWO_32H; address TWO_32H = (address)_TWO_32H;
address pi04_3d = (address)_pi04_3d; address pi04_3d = (address)_pi04_3d;
address pi04_5d = (address)_pi04_5d; address pi04_5d = (address)_pi04_5d;
address SCALE = (address)_SCALE; address SCALE = (address)_SCALE;
address zeros = (address)_zeros; address zeros = (address)_zeros;
address pi04_2d = (address)_pi04_2d; address pi04_2d = (address)_pi04_2d;
address TWO_12H = (address)_TWO_12H; address TWO_12H = (address)_TWO_12H;
address _4onpi_31l = (address)__4onpi_31l; address _4onpi_31l = (address)__4onpi_31l;
bind(B1_1); bind(B1_1);

View File

@ -462,14 +462,14 @@ ATTRIBUTE_ALIGNED(8) juint _QQ_2_tan[] =
void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register r8, Register r9, Register eax, Register ecx, Register edx, Register r8, Register r9,
Register r10, Register r11) { Register r10, Register r11, Register tmp) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1, B1_2, B1_4; Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1, B1_2, B1_4;
assert_different_registers(eax, ecx, edx, r8, r9, r10, r11); assert_different_registers(eax, ecx, edx, r8, r9, r10, r11, tmp);
address MUL16 = (address)_MUL16; address MUL16 = (address)_MUL16;
address sign_mask = (address)_sign_mask_tan; address sign_mask = (address)_sign_mask_tan;
@ -497,12 +497,12 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
subl(eax, 16314); subl(eax, 16314);
cmpl(eax, 270); cmpl(eax, 270);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1); jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movdqu(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL movdqu(xmm6, ExternalAddress(MUL16), tmp /*rscratch*/); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL
unpcklpd(xmm0, xmm0); unpcklpd(xmm0, xmm0);
movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL movdqu(xmm4, ExternalAddress(sign_mask), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL
andpd(xmm4, xmm0); andpd(xmm4, xmm0);
movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL movdqu(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL
mulpd(xmm1, xmm0); mulpd(xmm1, xmm0);
por(xmm5, xmm4); por(xmm5, xmm4);
addpd(xmm1, xmm5); addpd(xmm1, xmm5);
@ -512,10 +512,10 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cvttpd2dq(xmm1, xmm1); cvttpd2dq(xmm1, xmm1);
cvtdq2pd(xmm1, xmm1); cvtdq2pd(xmm1, xmm1);
mulpd(xmm1, xmm6); mulpd(xmm1, xmm6);
movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL movdqu(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL
movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL movq(xmm5, ExternalAddress(QQ_2), tmp /*rscratch*/); //0x676733afUL, 0x3d32e7b9UL
addq(rdx, 469248); addq(rdx, 469248);
movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL movdqu(xmm4, ExternalAddress(P_2), tmp /*rscratch*/); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL
mulpd(xmm3, xmm1); mulpd(xmm3, xmm1);
andq(rdx, 31); andq(rdx, 31);
mulsd(xmm5, xmm1); mulsd(xmm5, xmm1);
@ -523,17 +523,17 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulpd(xmm4, xmm1); mulpd(xmm4, xmm1);
shlq(rcx, 1); shlq(rcx, 1);
subpd(xmm0, xmm3); subpd(xmm0, xmm3);
mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL mulpd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL
addq(rdx, rcx); addq(rdx, rcx);
shlq(rcx, 2); shlq(rcx, 2);
addq(rdx, rcx); addq(rdx, rcx);
addsd(xmm5, xmm0); addsd(xmm5, xmm0);
movdqu(xmm2, xmm0); movdqu(xmm2, xmm0);
subpd(xmm0, xmm4); subpd(xmm0, xmm4);
movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL movq(xmm6, ExternalAddress(ONE), tmp /*rscratch*/); //0x00000000UL, 0x3ff00000UL
shlq(rdx, 4); shlq(rdx, 4);
lea(rax, ExternalAddress(Ctable)); lea(rax, ExternalAddress(Ctable));
andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL andpd(xmm5, ExternalAddress(MASK_35), tmp /*rscratch*/); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, xmm0); movdqu(xmm3, xmm0);
addq(rax, rdx); addq(rax, rdx);
subpd(xmm2, xmm0); subpd(xmm2, xmm0);
@ -586,7 +586,7 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm7, Address(rax, 136)); addsd(xmm7, Address(rax, 136));
addsd(xmm7, xmm1); addsd(xmm7, xmm1);
addsd(xmm0, xmm7); addsd(xmm0, xmm7);
movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL movq(xmm7, ExternalAddress(ONE), tmp /*rscratch*/); //0x00000000UL, 0x3ff00000UL
mulsd(xmm4, xmm6); mulsd(xmm4, xmm6);
movq(xmm2, Address(rax, 168)); movq(xmm2, Address(rax, 168));
andpd(xmm2, xmm6); andpd(xmm2, xmm6);
@ -616,26 +616,26 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
jcc(Assembler::below, L_2TAG_PACKET_3_0_1); jcc(Assembler::below, L_2TAG_PACKET_3_0_1);
movdqu(xmm2, xmm0); movdqu(xmm2, xmm0);
movdqu(xmm3, xmm0); movdqu(xmm3, xmm0);
movq(xmm1, ExternalAddress(Q_11)); //0xb8fe4d77UL, 0x3f82609aUL movq(xmm1, ExternalAddress(Q_11), tmp /*rscratch*/); //0xb8fe4d77UL, 0x3f82609aUL
mulsd(xmm2, xmm0); mulsd(xmm2, xmm0);
mulsd(xmm3, xmm2); mulsd(xmm3, xmm2);
mulsd(xmm1, xmm2); mulsd(xmm1, xmm2);
addsd(xmm1, ExternalAddress(Q_9)); //0xbf847a43UL, 0x3f9664a0UL addsd(xmm1, ExternalAddress(Q_9), tmp /*rscratch*/); //0xbf847a43UL, 0x3f9664a0UL
mulsd(xmm1, xmm2); mulsd(xmm1, xmm2);
addsd(xmm1, ExternalAddress(Q_7)); //0x52c4c8abUL, 0x3faba1baUL addsd(xmm1, ExternalAddress(Q_7), tmp /*rscratch*/); //0x52c4c8abUL, 0x3faba1baUL
mulsd(xmm1, xmm2); mulsd(xmm1, xmm2);
addsd(xmm1, ExternalAddress(Q_5)); //0x11092746UL, 0x3fc11111UL addsd(xmm1, ExternalAddress(Q_5), tmp /*rscratch*/); //0x11092746UL, 0x3fc11111UL
mulsd(xmm1, xmm2); mulsd(xmm1, xmm2);
addsd(xmm1, ExternalAddress(Q_3)); //0x55555612UL, 0x3fd55555UL addsd(xmm1, ExternalAddress(Q_3), tmp /*rscratch*/); //0x55555612UL, 0x3fd55555UL
mulsd(xmm1, xmm3); mulsd(xmm1, xmm3);
addsd(xmm0, xmm1); addsd(xmm0, xmm1);
jmp(B1_4); jmp(B1_4);
bind(L_2TAG_PACKET_3_0_1); bind(L_2TAG_PACKET_3_0_1);
movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL movq(xmm3, ExternalAddress(TWO_POW_55), tmp /*rscratch*/); //0x00000000UL, 0x43600000UL
mulsd(xmm3, xmm0); mulsd(xmm3, xmm0);
addsd(xmm0, xmm3); addsd(xmm0, xmm3);
mulsd(xmm0, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL mulsd(xmm0, ExternalAddress(TWO_POW_M55), tmp /*rscratch*/); //0x00000000UL, 0x3c800000UL
jmp(B1_4); jmp(B1_4);
bind(L_2TAG_PACKET_2_0_1); bind(L_2TAG_PACKET_2_0_1);
@ -786,8 +786,8 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
orl(edx, rsi); orl(edx, rsi);
xorl(edx, rbx); xorl(edx, rbx);
pinsrw(xmm4, edx, 3); pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL movq(xmm2, ExternalAddress(PI_4), tmp /*rscratch*/); //0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL
movq(xmm7, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL movq(xmm7, ExternalAddress(PI_4 + 8), tmp /*rscratch*/); //0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL
xorpd(xmm5, xmm5); xorpd(xmm5, xmm5);
subl(edx, 1008); subl(edx, 1008);
pinsrw(xmm5, edx, 3); pinsrw(xmm5, edx, 3);
@ -809,14 +809,14 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm0, xmm7); addsd(xmm0, xmm7);
subsd(xmm2, xmm0); subsd(xmm2, xmm0);
addsd(xmm7, xmm2); addsd(xmm7, xmm2);
movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL movdqu(xmm1, ExternalAddress(PI32INV), tmp /*rscratch*/); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL
if (VM_Version::supports_sse3()) { if (VM_Version::supports_sse3()) {
movddup(xmm0, xmm0); movddup(xmm0, xmm0);
} }
else { else {
movlhps(xmm0, xmm0); movlhps(xmm0, xmm0);
} }
movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL movdqu(xmm4, ExternalAddress(sign_mask), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL
andpd(xmm4, xmm0); andpd(xmm4, xmm0);
mulpd(xmm1, xmm0); mulpd(xmm1, xmm0);
if (VM_Version::supports_sse3()) { if (VM_Version::supports_sse3()) {
@ -825,8 +825,8 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
else { else {
movlhps(xmm7, xmm7); movlhps(xmm7, xmm7);
} }
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movdqu(xmm5, ExternalAddress(ONEHALF), tmp /*rscratch*/); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL movdqu(xmm6, ExternalAddress(MUL16), tmp /*rscratch*/); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL
por(xmm5, xmm4); por(xmm5, xmm4);
addpd(xmm1, xmm5); addpd(xmm1, xmm5);
movdqu(xmm5, xmm1); movdqu(xmm5, xmm1);
@ -835,11 +835,11 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
cvttpd2dq(xmm1, xmm1); cvttpd2dq(xmm1, xmm1);
cvtdq2pd(xmm1, xmm1); cvtdq2pd(xmm1, xmm1);
mulpd(xmm1, xmm6); mulpd(xmm1, xmm6);
movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL movdqu(xmm3, ExternalAddress(P_1), tmp /*rscratch*/); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL
movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL movq(xmm5, ExternalAddress(QQ_2), tmp /*rscratch*/); //0x676733afUL, 0x3d32e7b9UL
shll(eax, 4); shll(eax, 4);
addl(edx, 469248); addl(edx, 469248);
movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL movdqu(xmm4, ExternalAddress(P_2), tmp /*rscratch*/); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL
mulpd(xmm3, xmm1); mulpd(xmm3, xmm1);
addl(edx, eax); addl(edx, eax);
andl(edx, 31); andl(edx, 31);
@ -848,17 +848,17 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
mulpd(xmm4, xmm1); mulpd(xmm4, xmm1);
shll(ecx, 1); shll(ecx, 1);
subpd(xmm0, xmm3); subpd(xmm0, xmm3);
mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL mulpd(xmm1, ExternalAddress(P_3), tmp /*rscratch*/); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL
addl(edx, ecx); addl(edx, ecx);
shll(ecx, 2); shll(ecx, 2);
addl(edx, ecx); addl(edx, ecx);
addsd(xmm5, xmm0); addsd(xmm5, xmm0);
movdqu(xmm2, xmm0); movdqu(xmm2, xmm0);
subpd(xmm0, xmm4); subpd(xmm0, xmm4);
movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL movq(xmm6, ExternalAddress(ONE), tmp /*rscratch*/); //0x00000000UL, 0x3ff00000UL
shll(edx, 4); shll(edx, 4);
lea(rax, ExternalAddress(Ctable)); lea(rax, ExternalAddress(Ctable));
andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL andpd(xmm5, ExternalAddress(MASK_35), tmp /*rscratch*/); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, xmm0); movdqu(xmm3, xmm0);
addq(rax, rdx); addq(rax, rdx);
subpd(xmm2, xmm0); subpd(xmm2, xmm0);
@ -912,7 +912,7 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
addsd(xmm7, Address(rax, 136)); addsd(xmm7, Address(rax, 136));
addsd(xmm7, xmm1); addsd(xmm7, xmm1);
addsd(xmm0, xmm7); addsd(xmm0, xmm7);
movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL movq(xmm7, ExternalAddress(ONE), tmp /*rscratch*/); //0x00000000UL, 0x3ff00000UL
mulsd(xmm4, xmm6); mulsd(xmm4, xmm6);
movq(xmm2, Address(rax, 168)); movq(xmm2, Address(rax, 168));
andpd(xmm2, xmm6); andpd(xmm2, xmm6);
@ -1009,7 +1009,7 @@ void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
bind(L_2TAG_PACKET_4_0_1); bind(L_2TAG_PACKET_4_0_1);
movq(xmm0, Address(rsp, 8)); movq(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL mulsd(xmm0, ExternalAddress(NEG_ZERO), tmp /*rscratch*/); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0); movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_14_0_1); bind(L_2TAG_PACKET_14_0_1);

View File

@ -3545,21 +3545,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); __ fast_exp(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -3572,21 +3561,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); __ fast_log(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -3599,21 +3577,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); __ fast_log10(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -3626,21 +3593,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); __ fast_pow(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -3665,11 +3621,8 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ libm_sincos_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); __ libm_sincos_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
return start; return start;
@ -3680,19 +3633,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rdx); __ fast_sin(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rbx, rdx);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -3705,21 +3649,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); __ fast_cos(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -3732,11 +3665,8 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); __ libm_tancot_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
return start; return start;
@ -3747,21 +3677,10 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); __ fast_tan(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, rbx);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);

View File

@ -7263,22 +7263,11 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); __ fast_exp(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r11);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -7292,23 +7281,11 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r11;
const Register tmp2 = r8;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2); __ fast_log(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r11, r8);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -7322,22 +7299,11 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); __ fast_log10(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r11, r8);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -7351,25 +7317,11 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); __ fast_pow(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r8, r9, r10, r11);
__ leave(); // required for proper stackwalking of RuntimeStub frame __ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0); __ ret(0);
@ -7383,18 +7335,6 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
@ -7402,8 +7342,8 @@ address generate_avx_ghash_processBlocks() {
__ push(rsi); __ push(rsi);
__ push(rdi); __ push(rdi);
#endif #endif
__ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1); __ fast_sin(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rbx, rcx, rdx, r8);
#ifdef _WIN64 #ifdef _WIN64
__ pop(rdi); __ pop(rdi);
__ pop(rsi); __ pop(rsi);
@ -7421,21 +7361,6 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
@ -7443,7 +7368,8 @@ address generate_avx_ghash_processBlocks() {
__ push(rsi); __ push(rsi);
__ push(rdi); __ push(rdi);
#endif #endif
__ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); __ fast_cos(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r8, r9, r10, r11, rbx);
#ifdef _WIN64 #ifdef _WIN64
__ pop(rdi); __ pop(rdi);
@ -7462,21 +7388,6 @@ address generate_avx_ghash_processBlocks() {
address start = __ pc(); address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame __ enter(); // required for proper stackwalking of RuntimeStub frame
@ -7484,7 +7395,8 @@ address generate_avx_ghash_processBlocks() {
__ push(rsi); __ push(rsi);
__ push(rdi); __ push(rdi);
#endif #endif
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); __ fast_tan(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
rax, rcx, rdx, r8, r9, r10, r11, rbx);
#ifdef _WIN64 #ifdef _WIN64
__ pop(rdi); __ pop(rdi);