This commit is contained in:
Alejandro Murillo 2016-04-28 14:44:52 -07:00
commit 59292b53e3
371 changed files with 18430 additions and 13672 deletions

View File

@ -83,6 +83,21 @@ suite = {
"workingSets" : "API,JVMCI",
},
"jdk.vm.ci.code.test" : {
"subDir" : "test/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
"jdk.vm.ci.amd64",
"jdk.vm.ci.sparc",
"jdk.vm.ci.code",
"jdk.vm.ci.hotspot",
],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "1.8",
"workingSets" : "API,JVMCI",
},
"jdk.vm.ci.runtime" : {
"subDir" : "src/jdk.vm.ci/share/classes",
"sourceDirs" : ["src"],
@ -164,7 +179,7 @@ suite = {
"subDir" : "test/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:TESTNG",
"TESTNG",
"jdk.vm.ci.hotspot",
],
"checkstyle" : "jdk.vm.ci.services",

View File

@ -3325,9 +3325,15 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
const bool Matcher::match_rule_supported(int opcode) {
// TODO
// identify extra cases that we might want to provide match rules for
// e.g. Op_StrEquals and other intrinsics
switch (opcode) {
case Op_StrComp:
case Op_StrIndexOf:
if (CompactStrings) return false;
break;
default:
break;
}
if (!has_match_rule(opcode)) {
return false;
}
@ -3346,6 +3352,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
return ret_value; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@ -4190,55 +4200,6 @@ encode %{
}
%}
enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
MacroAssembler _masm(&cbuf);
Register cnt_reg = as_Register($cnt$$reg);
Register base_reg = as_Register($base$$reg);
// base is word aligned
// cnt is count of words
Label loop;
Label entry;
// Algorithm:
//
// scratch1 = cnt & 7;
// cnt -= scratch1;
// p += scratch1;
// switch (scratch1) {
// do {
// cnt -= 8;
// p[-8] = 0;
// case 7:
// p[-7] = 0;
// case 6:
// p[-6] = 0;
// // ...
// case 1:
// p[-1] = 0;
// case 0:
// p += 8;
// } while (cnt);
// }
const int unroll = 8; // Number of str(zr) instructions we'll unroll
__ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll
__ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll
// base_reg always points to the end of the region we're about to zero
__ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
__ adr(rscratch2, entry);
__ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
__ br(rscratch2);
__ bind(loop);
__ sub(cnt_reg, cnt_reg, unroll);
for (int i = -unroll; i < 0; i++)
__ str(zr, Address(base_reg, i * wordSize));
__ bind(entry);
__ add(base_reg, base_reg, unroll * wordSize);
__ cbnz(cnt_reg, loop);
%}
/// mov envcodings
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
@ -12123,21 +12084,21 @@ instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlag
%}
%}
instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
%{
match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
expand %{
rorL_rReg(dst, src, shift, cr);
rorI_rReg(dst, src, shift, cr);
%}
%}
instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
%{
match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
expand %{
rorL_rReg(dst, src, shift, cr);
rorI_rReg(dst, src, shift, cr);
%}
%}
@ -13363,7 +13324,23 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
ins_encode %{
__ zero_words($base$$Register, $cnt$$Register);
%}
ins_pipe(pipe_class_memory);
%}
instruct clearArray_imm_reg(immL cnt, iRegP base, Universe dummy, rFlagsReg cr)
%{
match(Set dummy (ClearArray cnt base));
ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
ins_encode %{
__ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
%}
ins_pipe(pipe_class_memory);
%}
@ -14797,10 +14774,10 @@ instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp,
ins_pipe(pipe_class_memory);
%}
instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
%{
predicate(!CompactStrings);
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -14819,7 +14796,7 @@ instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cn
instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
%{
predicate(!CompactStrings);
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
@ -14839,7 +14816,7 @@ instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
iRegI tmp3, iRegI tmp4, rFlagsReg cr)
%{
predicate(!CompactStrings);
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
@ -14856,10 +14833,27 @@ instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
ins_pipe(pipe_class_memory);
%}
instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
iRegI_R0 result, rFlagsReg cr)
%{
predicate(!CompactStrings);
predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
format %{ "String Equals $str1,$str2,$cnt -> $result" %}
ins_encode %{
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
__ arrays_equals($str1$$Register, $str2$$Register,
$result$$Register, $cnt$$Register,
1, /*is_string*/true);
%}
ins_pipe(pipe_class_memory);
%}
instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
iRegI_R0 result, rFlagsReg cr)
%{
predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
@ -14907,6 +14901,40 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
%}
// fast char[] to byte[] compression
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
vRegD_V0 tmp1, vRegD_V1 tmp2,
vRegD_V2 tmp3, vRegD_V3 tmp4,
iRegI_R0 result, rFlagsReg cr)
%{
match(Set result (StrCompressedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Compress $src,$dst -> $result // KILL R1, R2, R3, R4" %}
ins_encode %{
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
$tmp3$$FloatRegister, $tmp4$$FloatRegister,
$result$$Register);
%}
ins_pipe( pipe_slow );
%}
// fast byte[] to char[] inflation
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
%{
match(Set dummy (StrInflatedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
ins_encode %{
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
%}
ins_pipe(pipe_class_memory);
%}
// encode char[] to byte[] in ISO_8859_1
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -177,4 +177,6 @@ void AbstractInterpreter::layout_activation(Method* method,
}
*interpreter_frame->interpreter_frame_cache_addr() =
method->constants()->cache();
*interpreter_frame->interpreter_frame_mirror_addr() =
method->method_holder()->java_mirror();
}

View File

@ -2245,18 +2245,18 @@ public:
rf(Vn, 5), rf(Rd, 0);
}
#define INSN(NAME, opc, opc2) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
starti; \
/* The encodings for the immh:immb fields (bits 22:16) are \
* 0001 xxx 8B/16B, shift = xxx \
* 001x xxx 4H/8H, shift = xxxx \
* 01xx xxx 2S/4S, shift = xxxxx \
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \
*/ \
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
#define INSN(NAME, opc, opc2) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
starti; \
/* The encodings for the immh:immb fields (bits 22:16) are \
* 0001 xxx 8B/16B, shift = xxx \
* 001x xxx 4H/8H, shift = xxxx \
* 01xx xxx 2S/4S, shift = xxxxx \
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \
*/ \
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
}
INSN(shl, 0, 0b010101);
@ -2347,6 +2347,24 @@ public:
f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
// AdvSIMD ZIP/UZP/TRN
#define INSN(NAME, opcode) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
starti; \
f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0b001110, 15, 10); \
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); \
f(T & 1, 30), f(T >> 1, 23, 22); \
}
INSN(uzp1, 0b001);
INSN(trn1, 0b010);
INSN(zip1, 0b011);
INSN(uzp2, 0b101);
INSN(trn2, 0b110);
INSN(zip2, 0b111);
#undef INSN
// CRC32 instructions
#define INSN(NAME, c, sf, sz) \
void NAME(Register Rd, Register Rn, Register Rm) { \

View File

@ -2942,6 +2942,10 @@ void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
__ mov(result_reg->as_register(), rthread);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -331,7 +331,7 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
length.load_item();
}
if (needs_store_check) {
if (needs_store_check || x->check_boolean()) {
value.load_item();
} else {
value.load_for_store(x->elt_type());
@ -380,7 +380,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
// Seems to be a precise
post_barrier(LIR_OprFact::address(array_addr), value.result());
} else {
__ move(value.result(), array_addr, null_check_info);
LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
__ move(result, array_addr, null_check_info);
}
}
@ -1127,7 +1128,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -32,22 +32,6 @@
#include "runtime/mutexLocker.hpp"
#include "runtime/safepoint.hpp"
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
#define __ _masm.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -78,7 +78,9 @@
interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1,
interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1,
interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1,
interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1,
interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1,
interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1,
interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -188,6 +188,12 @@ inline Method** frame::interpreter_frame_method_addr() const {
return (Method**)addr_at(interpreter_frame_method_offset);
}
// Mirror
inline oop* frame::interpreter_frame_mirror_addr() const {
return (oop*)addr_at(interpreter_frame_mirror_offset);
}
// top of expression stack
inline intptr_t* frame::interpreter_frame_tos_address() const {
intptr_t* last_sp = interpreter_frame_last_sp();

View File

@ -48,9 +48,9 @@ define_pd_global(intx, InlineFrequencyCount, 100);
#define DEFAULT_STACK_SHADOW_PAGES (4 DEBUG_ONLY(+5))
#define DEFAULT_STACK_RESERVED_PAGES (0)
#define MIN_STACK_YELLOW_PAGES 1
#define MIN_STACK_RED_PAGES 1
#define MIN_STACK_SHADOW_PAGES 1
#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES
#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
#define MIN_STACK_RESERVED_PAGES (0)
define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
@ -76,7 +76,8 @@ define_pd_global(bool, CompactStrings, false);
// avoid biased locking while we are bootstrapping the aarch64 build
define_pd_global(bool, UseBiasedLocking, false);
define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
// Clear short arrays bigger than one word in an arch-specific way
define_pd_global(intx, InitArrayShortSize, BytesPerLong);
#if defined(COMPILER1) || defined(COMPILER2)
define_pd_global(intx, InlineSmallCode, 1000);

View File

@ -40,7 +40,43 @@
#include "runtime/thread.inline.hpp"
// Implementation of InterpreterMacroAssembler
void InterpreterMacroAssembler::narrow(Register result) {
// Get method->_constMethod->_result_type
ldr(rscratch1, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
ldr(rscratch1, Address(rscratch1, Method::const_offset()));
ldrb(rscratch1, Address(rscratch1, ConstMethod::result_type_offset()));
Label done, notBool, notByte, notChar;
// common case first
cmpw(rscratch1, T_INT);
br(Assembler::EQ, done);
// mask integer result to narrower return type.
cmpw(rscratch1, T_BOOLEAN);
br(Assembler::NE, notBool);
andw(result, result, 0x1);
b(done);
bind(notBool);
cmpw(rscratch1, T_BYTE);
br(Assembler::NE, notByte);
sbfx(result, result, 0, 8);
b(done);
bind(notByte);
cmpw(rscratch1, T_CHAR);
br(Assembler::NE, notChar);
ubfx(result, result, 0, 16); // truncate upper 16 bits
b(done);
bind(notChar);
sbfx(result, result, 0, 16); // sign-extend short
// Nothing to do for T_INT
bind(done);
}
void InterpreterMacroAssembler::jump_to_entry(address entry) {
assert(entry, "Entry must have been generated by now");
@ -81,6 +117,7 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
verify_oop(r0, state); break;
case ltos: ldr(r0, val_addr); break;
case btos: // fall through
case ztos: // fall through
case ctos: // fall through
case stos: // fall through
case itos: ldrw(r0, val_addr); break;
@ -314,6 +351,7 @@ void InterpreterMacroAssembler::pop(TosState state) {
switch (state) {
case atos: pop_ptr(); break;
case btos:
case ztos:
case ctos:
case stos:
case itos: pop_i(); break;
@ -331,6 +369,7 @@ void InterpreterMacroAssembler::push(TosState state) {
switch (state) {
case atos: push_ptr(); break;
case btos:
case ztos:
case ctos:
case stos:
case itos: push_i(); break;

View File

@ -245,6 +245,9 @@ class InterpreterMacroAssembler: public MacroAssembler {
void update_mdp_by_constant(Register mdp_in, int constant);
void update_mdp_for_ret(Register return_bci);
// narrow int return value
void narrow(Register result);
void profile_taken_branch(Register mdp, Register bumped_count);
void profile_not_taken_branch(Register mdp);
void profile_call(Register mdp);

View File

@ -3217,6 +3217,14 @@ void MacroAssembler::load_klass(Register dst, Register src) {
}
}
void MacroAssembler::load_mirror(Register dst, Register method) {
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
ldr(dst, Address(rmethod, Method::const_offset()));
ldr(dst, Address(dst, ConstMethod::constants_offset()));
ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
ldr(dst, Address(dst, mirror_offset));
}
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
if (UseCompressedClassPointers) {
ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
@ -4585,7 +4593,16 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
assert(elem_size == 1 || elem_size == 2, "must be char or byte");
assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
BLOCK_COMMENT(is_string ? "string_equals {" : "array_equals {");
#ifndef PRODUCT
{
const char kind = (elem_size == 2) ? 'U' : 'L';
char comment[64];
snprintf(comment, sizeof comment, "%s%c%s {",
is_string ? "string_equals" : "array_equals",
kind, "{");
BLOCK_COMMENT(comment);
}
#endif
mov(result, false);
@ -4670,8 +4687,97 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Count in 8-byte unit.
void MacroAssembler::zero_words(Register base, Register cnt)
{
fill_words(base, cnt, zr);
}
// encode char[] to byte[] in ISO_8859_1
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Immediate count in 8-byte unit.
#define ShortArraySize (18 * BytesPerLong)
void MacroAssembler::zero_words(Register base, u_int64_t cnt)
{
int i = cnt & 1; // store any odd word to start
if (i) str(zr, Address(base));
if (cnt <= ShortArraySize / BytesPerLong) {
for (; i < (int)cnt; i += 2)
stp(zr, zr, Address(base, i * wordSize));
} else {
const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
int remainder = cnt % (2 * unroll);
for (; i < remainder; i += 2)
stp(zr, zr, Address(base, i * wordSize));
Label loop;
Register cnt_reg = rscratch1;
Register loop_base = rscratch2;
cnt = cnt - remainder;
mov(cnt_reg, cnt);
// adjust base and prebias by -2 * wordSize so we can pre-increment
add(loop_base, base, (remainder - 2) * wordSize);
bind(loop);
sub(cnt_reg, cnt_reg, 2 * unroll);
for (i = 1; i < unroll; i++)
stp(zr, zr, Address(loop_base, 2 * i * wordSize));
stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
cbnz(cnt_reg, loop);
}
}
// base: Address of a buffer to be filled, 8 bytes aligned.
// cnt: Count in 8-byte unit.
// value: Value to be filled with.
// base will point to the end of the buffer after filling.
void MacroAssembler::fill_words(Register base, Register cnt, Register value)
{
// Algorithm:
//
// scratch1 = cnt & 7;
// cnt -= scratch1;
// p += scratch1;
// switch (scratch1) {
// do {
// cnt -= 8;
// p[-8] = v;
// case 7:
// p[-7] = v;
// case 6:
// p[-6] = v;
// // ...
// case 1:
// p[-1] = v;
// case 0:
// p += 8;
// } while (cnt);
// }
assert_different_registers(base, cnt, value, rscratch1, rscratch2);
Label entry, loop;
const int unroll = 8; // Number of str instructions we'll unroll
andr(rscratch1, cnt, unroll - 1); // tmp1 = cnt % unroll
cbz(rscratch1, entry);
sub(cnt, cnt, rscratch1); // cnt -= tmp1
// base always points to the end of the region we're about to fill
add(base, base, rscratch1, Assembler::LSL, 3);
adr(rscratch2, entry);
sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
br(rscratch2);
bind(loop);
add(base, base, unroll * 8);
sub(cnt, cnt, unroll);
for (int i = -unroll; i < 0; i++)
str(value, Address(base, i * 8));
bind(entry);
cbnz(cnt, loop);
}
// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
// java/lang/StringUTF16.compress.
void MacroAssembler::encode_iso_array(Register src, Register dst,
Register len, Register result,
FloatRegister Vtmp1, FloatRegister Vtmp2,
@ -4734,6 +4840,90 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
BIND(DONE);
sub(result, result, len); // Return index where we stopped
// Return len == 0 if we processed all
// characters
}
// Inflate byte[] array to char[].
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
Register tmp4) {
Label big, done;
assert_different_registers(src, dst, len, tmp4, rscratch1);
fmovd(vtmp1 , zr);
lsrw(rscratch1, len, 3);
cbnzw(rscratch1, big);
// Short string: less than 8 bytes.
{
Label loop, around, tiny;
subsw(len, len, 4);
andw(len, len, 3);
br(LO, tiny);
// Use SIMD to do 4 bytes.
ldrs(vtmp2, post(src, 4));
zip1(vtmp3, T8B, vtmp2, vtmp1);
strd(vtmp3, post(dst, 8));
cbzw(len, done);
// Do the remaining bytes by steam.
bind(loop);
ldrb(tmp4, post(src, 1));
strh(tmp4, post(dst, 2));
subw(len, len, 1);
bind(tiny);
cbnz(len, loop);
bind(around);
b(done);
}
// Unpack the bytes 8 at a time.
bind(big);
andw(len, len, 7);
{
Label loop, around;
bind(loop);
ldrd(vtmp2, post(src, 8));
sub(rscratch1, rscratch1, 1);
zip1(vtmp3, T16B, vtmp2, vtmp1);
st1(vtmp3, T8H, post(dst, 16));
cbnz(rscratch1, loop);
bind(around);
}
// Do the tail of up to 8 bytes.
sub(src, src, 8);
add(src, src, len, ext::uxtw, 0);
ldrd(vtmp2, Address(src));
sub(dst, dst, 16);
add(dst, dst, len, ext::uxtw, 1);
zip1(vtmp3, T16B, vtmp2, vtmp1);
st1(vtmp3, T8H, Address(dst));
bind(done);
}
// Compress char[] array to byte[].
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
Register result) {
encode_iso_array(src, dst, len, result,
tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
cmp(len, zr);
csel(result, result, zr, EQ);
}
// get_thread() can be called anywhere inside generated code so we

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -770,6 +770,8 @@ public:
void store_klass(Register dst, Register src);
void cmp_klass(Register oop, Register trial_klass, Register tmp);
void load_mirror(Register dst, Register method);
void load_heap_oop(Register dst, Address src);
void load_heap_oop_not_null(Register dst, Address src);
@ -1184,6 +1186,19 @@ public:
Register result, Register cnt1,
int elem_size, bool is_string);
void fill_words(Register base, Register cnt, Register value);
void zero_words(Register base, Register cnt);
void zero_words(Register base, u_int64_t cnt);
void byte_array_inflate(Register src, Register dst, Register len,
FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, Register tmp4);
void char_array_compress(Register src, Register dst, Register len,
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
Register result);
void encode_iso_array(Register src, Register dst,
Register len, Register result,
FloatRegister Vtmp1, FloatRegister Vtmp2,

View File

@ -2021,6 +2021,136 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
//
// Generate stub for array fill. If "aligned" is true, the
// "to" address is assumed to be heapword aligned.
//
// Arguments for generated stub:
// to: c_rarg0
// value: c_rarg1
// count: c_rarg2 treated as signed
//
address generate_fill(BasicType t, bool aligned, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
BLOCK_COMMENT("Entry:");
const Register to = c_rarg0; // source array address
const Register value = c_rarg1; // value
const Register count = c_rarg2; // elements count
const Register cnt_words = c_rarg3; // temp register
__ enter();
Label L_fill_elements, L_exit1;
int shift = -1;
switch (t) {
case T_BYTE:
shift = 0;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bfi(value, value, 8, 8); // 8 bit -> 16 bit
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
__ br(Assembler::LO, L_fill_elements);
break;
case T_SHORT:
shift = 1;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
__ br(Assembler::LO, L_fill_elements);
break;
case T_INT:
shift = 2;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ br(Assembler::LO, L_fill_elements);
break;
default: ShouldNotReachHere();
}
// Align source address at 8 bytes address boundary.
Label L_skip_align1, L_skip_align2, L_skip_align4;
if (!aligned) {
switch (t) {
case T_BYTE:
// One byte misalignment happens only for byte arrays.
__ tbz(to, 0, L_skip_align1);
__ strb(value, Address(__ post(to, 1)));
__ subw(count, count, 1);
__ bind(L_skip_align1);
// Fallthrough
case T_SHORT:
// Two bytes misalignment happens only for byte and short (char) arrays.
__ tbz(to, 1, L_skip_align2);
__ strh(value, Address(__ post(to, 2)));
__ subw(count, count, 2 >> shift);
__ bind(L_skip_align2);
// Fallthrough
case T_INT:
// Align to 8 bytes, we know we are 4 byte aligned to start.
__ tbz(to, 2, L_skip_align4);
__ strw(value, Address(__ post(to, 4)));
__ subw(count, count, 4 >> shift);
__ bind(L_skip_align4);
break;
default: ShouldNotReachHere();
}
}
//
// Fill large chunks
//
__ lsrw(cnt_words, count, 3 - shift); // number of words
__ bfi(value, value, 32, 32); // 32 bit -> 64 bit
__ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
__ fill_words(to, cnt_words, value);
// Remaining count is less than 8 bytes. Fill it by a single store.
// Note that the total length is no less than 8 bytes.
if (t == T_BYTE || t == T_SHORT) {
Label L_exit1;
__ cbzw(count, L_exit1);
__ add(to, to, count, Assembler::LSL, shift); // points to the end
__ str(value, Address(to, -8)); // overwrite some elements
__ bind(L_exit1);
__ leave();
__ ret(lr);
}
// Handle copies less than 8 bytes.
Label L_fill_2, L_fill_4, L_exit2;
__ bind(L_fill_elements);
switch (t) {
case T_BYTE:
__ tbz(count, 0, L_fill_2);
__ strb(value, Address(__ post(to, 1)));
__ bind(L_fill_2);
__ tbz(count, 1, L_fill_4);
__ strh(value, Address(__ post(to, 2)));
__ bind(L_fill_4);
__ tbz(count, 2, L_exit2);
__ strw(value, Address(to));
break;
case T_SHORT:
__ tbz(count, 0, L_fill_4);
__ strh(value, Address(__ post(to, 2)));
__ bind(L_fill_4);
__ tbz(count, 1, L_exit2);
__ strw(value, Address(to));
break;
case T_INT:
__ cbzw(count, L_exit2);
__ strw(value, Address(to));
break;
default: ShouldNotReachHere();
}
__ bind(L_exit2);
__ leave();
__ ret(lr);
return start;
}
void generate_arraycopy_stubs() {
address entry;
address entry_jbyte_arraycopy;
@ -2124,6 +2254,12 @@ class StubGenerator: public StubCodeGenerator {
entry_jlong_arraycopy,
entry_checkcast_arraycopy);
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
}
void generate_math_stubs() { Unimplemented(); }

View File

@ -759,18 +759,13 @@ void TemplateInterpreterGenerator::lock_method() {
// get synchronization object
{
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
Label done;
__ ldrw(r0, access_flags);
__ tst(r0, JVM_ACC_STATIC);
// get receiver (assume this is frequent case)
__ ldr(r0, Address(rlocals, Interpreter::local_offset_in_bytes(0)));
__ br(Assembler::EQ, done);
__ ldr(r0, Address(rmethod, Method::const_offset()));
__ ldr(r0, Address(r0, ConstMethod::constants_offset()));
__ ldr(r0, Address(r0,
ConstantPool::pool_holder_offset_in_bytes()));
__ ldr(r0, Address(r0, mirror_offset));
__ load_mirror(r0, rmethod);
#ifdef ASSERT
{
@ -807,16 +802,16 @@ void TemplateInterpreterGenerator::lock_method() {
void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
// initialize fixed part of activation frame
if (native_call) {
__ sub(esp, sp, 12 * wordSize);
__ sub(esp, sp, 14 * wordSize);
__ mov(rbcp, zr);
__ stp(esp, zr, Address(__ pre(sp, -12 * wordSize)));
__ stp(esp, zr, Address(__ pre(sp, -14 * wordSize)));
// add 2 zero-initialized slots for native calls
__ stp(zr, zr, Address(sp, 10 * wordSize));
__ stp(zr, zr, Address(sp, 12 * wordSize));
} else {
__ sub(esp, sp, 10 * wordSize);
__ sub(esp, sp, 12 * wordSize);
__ ldr(rscratch1, Address(rmethod, Method::const_offset())); // get ConstMethod
__ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase
__ stp(esp, rbcp, Address(__ pre(sp, -10 * wordSize)));
__ stp(esp, rbcp, Address(__ pre(sp, -12 * wordSize)));
}
if (ProfileInterpreter) {
@ -825,22 +820,26 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
__ cbz(rscratch1, method_data_continue);
__ lea(rscratch1, Address(rscratch1, in_bytes(MethodData::data_offset())));
__ bind(method_data_continue);
__ stp(rscratch1, rmethod, Address(sp, 4 * wordSize)); // save Method* and mdp (method data pointer)
__ stp(rscratch1, rmethod, Address(sp, 6 * wordSize)); // save Method* and mdp (method data pointer)
} else {
__ stp(zr, rmethod, Address(sp, 4 * wordSize)); // save Method* (no mdp)
__ stp(zr, rmethod, Address(sp, 6 * wordSize)); // save Method* (no mdp)
}
// Get mirror and store it in the frame as GC root for this Method*
__ load_mirror(rscratch1, rmethod);
__ stp(rscratch1, zr, Address(sp, 4 * wordSize));
__ ldr(rcpool, Address(rmethod, Method::const_offset()));
__ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset()));
__ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset_in_bytes()));
__ stp(rlocals, rcpool, Address(sp, 2 * wordSize));
__ stp(rfp, lr, Address(sp, 8 * wordSize));
__ lea(rfp, Address(sp, 8 * wordSize));
__ stp(rfp, lr, Address(sp, 10 * wordSize));
__ lea(rfp, Address(sp, 10 * wordSize));
// set sender sp
// leave last_sp as null
__ stp(zr, r13, Address(sp, 6 * wordSize));
__ stp(zr, r13, Address(sp, 8 * wordSize));
// Move SP out of the way
if (! native_call) {
@ -1242,15 +1241,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// pass mirror handle if static call
{
Label L;
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldrw(t, Address(rmethod, Method::access_flags_offset()));
__ tst(t, JVM_ACC_STATIC);
__ br(Assembler::EQ, L);
// get mirror
__ ldr(t, Address(rmethod, Method::const_offset()));
__ ldr(t, Address(t, ConstMethod::constants_offset()));
__ ldr(t, Address(t, ConstantPool::pool_holder_offset_in_bytes()));
__ ldr(t, Address(t, mirror_offset));
__ load_mirror(t, rmethod);
// copy mirror into activation frame
__ str(t, Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize));
// pass handle to mirror

View File

@ -229,6 +229,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
switch (bc) {
case Bytecodes::_fast_aputfield:
case Bytecodes::_fast_bputfield:
case Bytecodes::_fast_zputfield:
case Bytecodes::_fast_cputfield:
case Bytecodes::_fast_dputfield:
case Bytecodes::_fast_fputfield:
@ -1082,6 +1083,17 @@ void TemplateTable::bastore()
// r1: index
// r3: array
index_check(r3, r1); // prefer index in r1
// Need to check whether array is boolean or byte
// since both types share the bastore bytecode.
__ load_klass(r2, r3);
__ ldrw(r2, Address(r2, Klass::layout_helper_offset()));
int diffbit_index = exact_log2(Klass::layout_helper_boolean_diffbit());
Label L_skip;
__ tbz(r2, diffbit_index, L_skip);
__ andw(r0, r0, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1
__ bind(L_skip);
__ lea(rscratch1, Address(r3, r1, Address::uxtw(0)));
__ strb(r0, Address(rscratch1,
arrayOopDesc::base_offset_in_bytes(T_BYTE)));
@ -2193,6 +2205,13 @@ void TemplateTable::_return(TosState state)
if (_desc->bytecode() == Bytecodes::_return)
__ membar(MacroAssembler::StoreStore);
// Narrow result if state is itos but result type is smaller.
// Need to narrow in the return bytecode rather than in generate_return_entry
// since compiled code callers expect the result to already be narrowed.
if (state == itos) {
__ narrow(r0);
}
__ remove_activation(state);
__ ret(lr);
}
@ -2386,7 +2405,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
const Address field(obj, off);
Label Done, notByte, notInt, notShort, notChar,
Label Done, notByte, notBool, notInt, notShort, notChar,
notLong, notFloat, notObj, notDouble;
// x86 uses a shift and mask or wings it with a shift plus assert
@ -2409,6 +2428,20 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
__ b(Done);
__ bind(notByte);
__ cmp(flags, ztos);
__ br(Assembler::NE, notBool);
// ztos (same code as btos)
__ ldrsb(r0, field);
__ push(ztos);
// Rewrite bytecode to be faster
if (!is_static) {
// use btos rewriting, no truncating to t/f bit is needed for getfield.
patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
}
__ b(Done);
__ bind(notBool);
__ cmp(flags, atos);
__ br(Assembler::NE, notObj);
// atos
@ -2604,7 +2637,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
// field address
const Address field(obj, off);
Label notByte, notInt, notShort, notChar,
Label notByte, notBool, notInt, notShort, notChar,
notLong, notFloat, notObj, notDouble;
// x86 uses a shift and mask or wings it with a shift plus assert
@ -2629,6 +2662,22 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
}
__ bind(notByte);
__ cmp(flags, ztos);
__ br(Assembler::NE, notBool);
// ztos
{
__ pop(ztos);
if (!is_static) pop_and_check_object(obj);
__ andw(r0, r0, 0x1);
__ strb(r0, field);
if (!is_static) {
patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no);
}
__ b(Done);
}
__ bind(notBool);
__ cmp(flags, atos);
__ br(Assembler::NE, notObj);
@ -2783,6 +2832,7 @@ void TemplateTable::jvmti_post_fast_field_mod()
switch (bytecode()) { // load values into the jvalue object
case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
case Bytecodes::_fast_bputfield: // fall through
case Bytecodes::_fast_zputfield: // fall through
case Bytecodes::_fast_sputfield: // fall through
case Bytecodes::_fast_cputfield: // fall through
case Bytecodes::_fast_iputfield: __ push_i(r0); break;
@ -2808,6 +2858,7 @@ void TemplateTable::jvmti_post_fast_field_mod()
switch (bytecode()) { // restore tos values
case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
case Bytecodes::_fast_bputfield: // fall through
case Bytecodes::_fast_zputfield: // fall through
case Bytecodes::_fast_sputfield: // fall through
case Bytecodes::_fast_cputfield: // fall through
case Bytecodes::_fast_iputfield: __ pop_i(r0); break;
@ -2863,6 +2914,9 @@ void TemplateTable::fast_storefield(TosState state)
case Bytecodes::_fast_iputfield:
__ strw(r0, field);
break;
case Bytecodes::_fast_zputfield:
__ andw(r0, r0, 0x1); // boolean is true if LSB is 1
// fall through to bputfield
case Bytecodes::_fast_bputfield:
__ strb(r0, field);
break;
@ -2982,7 +3036,7 @@ void TemplateTable::fast_xaccess(TosState state)
__ null_check(r0);
switch (state) {
case itos:
__ ldr(r0, Address(r0, r1, Address::lsl(0)));
__ ldrw(r0, Address(r0, r1, Address::lsl(0)));
break;
case atos:
__ load_heap_oop(r0, Address(r0, r1, Address::lsl(0)));
@ -3000,7 +3054,7 @@ void TemplateTable::fast_xaccess(TosState state)
__ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
ConstantPoolCacheEntry::flags_offset())));
__ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
__ membar(MacroAssembler::LoadLoad);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ bind(notVolatile);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -152,6 +152,7 @@ void AbstractInterpreter::layout_activation(Method* method,
intptr_t* top_frame_sp = is_top_frame ? sp : sp + (frame::abi_minframe_size - frame::abi_reg_args_size) / Interpreter::stackElementSize;
interpreter_frame->interpreter_frame_set_method(method);
interpreter_frame->interpreter_frame_set_mirror(method->method_holder()->java_mirror());
interpreter_frame->interpreter_frame_set_locals(locals_base);
interpreter_frame->interpreter_frame_set_cpcache(method->constants()->cache());
interpreter_frame->interpreter_frame_set_esp(esp);

View File

@ -2845,6 +2845,9 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::StoreLoad);
}
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
LIR_Address* addr = addr_opr->as_address_ptr();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -1056,7 +1056,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -35,22 +35,6 @@
#include "opto/matcher.hpp"
#endif
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
// A PPC CompiledStaticCall looks like this:

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -261,6 +261,7 @@
uint64_t ijava_reserved2; // Inserted for alignment.
#endif
uint64_t method;
uint64_t mirror;
uint64_t locals;
uint64_t monitors;
uint64_t cpoolCache;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -148,6 +148,11 @@ inline BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
inline Method** frame::interpreter_frame_method_addr() const {
return (Method**) &(get_ijava_state()->method);
}
inline oop* frame::interpreter_frame_mirror_addr() const {
return (oop*) &(get_ijava_state()->mirror);
}
inline ConstantPoolCache** frame::interpreter_frame_cpoolcache_addr() const {
return (ConstantPoolCache**) &(get_ijava_state()->cpoolCache);
}

View File

@ -3118,6 +3118,14 @@ void MacroAssembler::load_klass(Register dst, Register src) {
}
}
void MacroAssembler::load_mirror(Register mirror, Register method) {
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
ld(mirror, in_bytes(Method::const_offset()), method);
ld(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld(mirror, mirror_offset, mirror);
}
// Clear Array
// Kills both input registers. tmp == R0 is allowed.
void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {

View File

@ -647,6 +647,9 @@ class MacroAssembler: public Assembler {
void load_klass(Register dst, Register src);
void store_klass(Register dst_oop, Register klass, Register tmp = R0);
void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
void load_mirror(Register mirror, Register method);
static int instr_size_for_decode_klass_not_null();
void decode_klass_not_null(Register dst, Register src = noreg);
Register encode_klass_not_null(Register dst, Register src = noreg);

View File

@ -2047,6 +2047,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
return ret_value; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}

View File

@ -871,7 +871,6 @@ void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratc
// Get synchronization object to Rscratch2.
{
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
Label Lstatic;
Label Ldone;
@ -883,10 +882,7 @@ void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratc
__ b(Ldone);
__ bind(Lstatic); // Static case: Lock the java mirror
__ ld(Robj_to_lock, in_bytes(Method::const_offset()), R19_method);
__ ld(Robj_to_lock, in_bytes(ConstMethod::constants_offset()), Robj_to_lock);
__ ld(Robj_to_lock, ConstantPool::pool_holder_offset_in_bytes(), Robj_to_lock);
__ ld(Robj_to_lock, mirror_offset, Robj_to_lock);
__ load_mirror(Robj_to_lock, R19_method);
__ bind(Ldone);
__ verify_oop(Robj_to_lock);
@ -1051,10 +1047,14 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
__ addi(R26_monitor, R1_SP, - frame::ijava_state_size);
__ addi(R15_esp, R26_monitor, - Interpreter::stackElementSize);
// Get mirror and store it in the frame as GC root for this Method*
__ load_mirror(R12_scratch2, R19_method);
// Store values.
// R15_esp, R14_bcp, R26_monitor, R28_mdx are saved at java calls
// in InterpreterMacroAssembler::call_from_interpreter.
__ std(R19_method, _ijava_state_neg(method), R1_SP);
__ std(R12_scratch2, _ijava_state_neg(mirror), R1_SP);
__ std(R21_sender_SP, _ijava_state_neg(sender_sp), R1_SP);
__ std(R27_constPoolCache, _ijava_state_neg(cpoolCache), R1_SP);
__ std(R18_locals, _ijava_state_neg(locals), R1_SP);
@ -1319,21 +1319,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT);
__ bfalse(CCR0, method_is_not_static);
// constants = method->constants();
__ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
__ ld(R11_scratch1, in_bytes(ConstMethod::constants_offset()), R11_scratch1);
// pool_holder = method->constants()->pool_holder();
__ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(),
R11_scratch1/*constants*/);
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
// mirror = pool_holder->klass_part()->java_mirror();
__ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/);
__ load_mirror(R12_sratch2, R19_method);
// state->_native_mirror = mirror;
__ ld(R11_scratch1, 0, R1_SP);
__ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1);
__ std(R12_scratch2/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1);
// R4_ARG2 = &state->_oop_temp;
__ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp));
BIND(method_is_not_static);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -281,11 +281,12 @@ void AbstractInterpreter::layout_activation(Method* method,
// Llast_SP will be same as SP as there is no adapter space
*interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS;
*interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache();
// save the mirror in the interpreter frame
*interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
#ifdef FAST_DISPATCH
*interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table();
#endif
#ifdef ASSERT
BasicObjectLock* mp = (BasicObjectLock*)monitors;

View File

@ -3313,6 +3313,9 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
// Pack two sequential registers containing 32 bit values
// into a single 64 bit register.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -1034,7 +1034,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,22 +34,6 @@
#include "opto/matcher.hpp"
#endif
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
#define __ _masm.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -783,7 +783,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) {
if (is_interpreted_frame()) {
DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
DESCRIBE_FP_OFFSET(interpreter_frame_padding);
DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
DESCRIBE_FP_OFFSET(interpreter_frame_oop_temp);
// esp, according to Lesp (e.g. not depending on bci), if seems valid

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -209,7 +209,8 @@
// 2 words, also used to save float regs across calls to C
interpreter_frame_d_scratch_fp_offset = -2,
interpreter_frame_l_scratch_fp_offset = -4,
interpreter_frame_padding_offset = -5, // for native calls only
interpreter_frame_mirror_offset = -5, // keep interpreted method alive
interpreter_frame_oop_temp_offset = -6, // for native calls only
interpreter_frame_vm_locals_fp_offset = -6, // should be same as above, and should be zero mod 8

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -163,6 +163,10 @@ inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) {
*interpreter_frame_monitors_addr() = monitors;
}
inline oop* frame::interpreter_frame_mirror_addr() const {
return (oop*)(fp() + interpreter_frame_mirror_offset);
}
// Constant pool cache
// where LcpoolCache is saved:

View File

@ -3257,12 +3257,12 @@ void MacroAssembler::eden_allocate(
if (var_size_in_bytes->is_valid()) {
// size is unknown at compile time
cmp(free, var_size_in_bytes);
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
brx(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
delayed()->add(obj, var_size_in_bytes, end);
} else {
// size is known at compile time
cmp(free, con_size_in_bytes);
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
brx(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
delayed()->add(obj, con_size_in_bytes, end);
}
// Compare obj with the value at top_addr; if still equal, swap the value of
@ -3972,6 +3972,14 @@ void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_v
card_table_write(bs->byte_map_base, tmp, store_addr);
}
void MacroAssembler::load_mirror(Register mirror, Register method) {
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
ld_ptr(method, in_bytes(Method::const_offset()), mirror);
ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld_ptr(mirror, mirror_offset, mirror);
}
void MacroAssembler::load_klass(Register src_oop, Register klass) {
// The number of bytes in this code is used by
// MachCallDynamicJavaNode::ret_addr_offset()

View File

@ -1012,6 +1012,8 @@ public:
inline void ldbool(const Address& a, Register d);
inline void movbool( bool boolconst, Register d);
void load_mirror(Register mirror, Register method);
// klass oop manipulations if compressed
void load_klass(Register src_oop, Register klass);
void store_klass(Register klass, Register dst_oop);

View File

@ -1904,6 +1904,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
return ret_value; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}

View File

@ -557,17 +557,12 @@ void TemplateInterpreterGenerator::lock_method() {
// get synchronization object to O0
{ Label done;
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ btst(JVM_ACC_STATIC, O0);
__ br( Assembler::zero, true, Assembler::pt, done);
__ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case
__ ld_ptr( Lmethod, in_bytes(Method::const_offset()), O0);
__ ld_ptr( O0, in_bytes(ConstMethod::constants_offset()), O0);
__ ld_ptr( O0, ConstantPool::pool_holder_offset_in_bytes(), O0);
// lock the mirror, not the Klass*
__ ld_ptr( O0, mirror_offset, O0);
__ load_mirror(O0, Lmethod);
#ifdef ASSERT
__ tst(O0);
@ -881,6 +876,10 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
__ add(Lbcp, in_bytes(ConstMethod::codes_offset()), Lbcp);
}
__ mov( G5_method, Lmethod); // set Lmethod
// Get mirror and store it in the frame as GC root for this Method*
Register mirror = LcpoolCache;
__ load_mirror(mirror, Lmethod);
__ st_ptr(mirror, FP, (frame::interpreter_frame_mirror_offset * wordSize) + STACK_BIAS);
__ get_constant_pool_cache( LcpoolCache ); // set LcpoolCache
__ sub(FP, rounded_vm_local_words * BytesPerWord, Lmonitors ); // set Lmonitors
#ifdef _LP64
@ -1297,12 +1296,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// get native function entry point(O0 is a good temp until the very end)
__ delayed()->ld_ptr(Lmethod, in_bytes(Method::native_function_offset()), O0);
// for static methods insert the mirror argument
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ld_ptr(Lmethod, Method:: const_offset(), O1);
__ ld_ptr(O1, ConstMethod::constants_offset(), O1);
__ ld_ptr(O1, ConstantPool::pool_holder_offset_in_bytes(), O1);
__ ld_ptr(O1, mirror_offset, O1);
__ load_mirror(O1, Lmethod);
#ifdef ASSERT
if (!PrintSignatureHandlers) // do not dirty the output with this
{ Label L;

View File

@ -112,11 +112,14 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
}
if (AllocatePrefetchInstr == 1) {
// Need a space at the end of TLAB for BIS since it
// will fault when accessing memory outside of heap.
// Need extra space at the end of TLAB for BIS, otherwise prefetching
// instructions will fault (due to accessing memory outside of heap).
// The amount of space is the max of the number of lines to
// prefetch for array and for instance allocations. (Extra space must be
// reserved to accomodate both types of allocations.)
// +1 for rounding up to next cache line, +1 to be safe
int lines = AllocatePrefetchLines + 2;
int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
int step_size = AllocatePrefetchStepSize;
int distance = AllocatePrefetchDistance;
_reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -113,6 +113,8 @@ void AbstractInterpreter::layout_activation(Method* method,
}
*interpreter_frame->interpreter_frame_cache_addr() =
method->constants()->cache();
*interpreter_frame->interpreter_frame_mirror_addr() =
method->method_holder()->java_mirror();
}
#ifndef _LP64

View File

@ -1240,6 +1240,7 @@ void Assembler::addr_nop_8() {
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
@ -1250,6 +1251,7 @@ void Assembler::addsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_operand(dst, src);
@ -1599,6 +1601,7 @@ void Assembler::comisd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x2F);
emit_operand(dst, src);
@ -1607,6 +1610,7 @@ void Assembler::comisd(XMMRegister dst, Address src) {
void Assembler::comisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x2F);
emit_int8((unsigned char)(0xC0 | encode));
@ -1733,6 +1737,7 @@ void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5A);
emit_int8((unsigned char)(0xC0 | encode));
@ -1743,6 +1748,7 @@ void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5A);
emit_operand(dst, src);
@ -1827,6 +1833,15 @@ void Assembler::cvttss2sil(Register dst, XMMRegister src) {
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xE6);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::decl(Address dst) {
// Don't use it directly. Use MacroAssembler::decrement() instead.
InstructionMark im(this);
@ -1840,6 +1855,7 @@ void Assembler::divsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_operand(dst, src);
@ -1848,6 +1864,7 @@ void Assembler::divsd(XMMRegister dst, Address src) {
void Assembler::divsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_int8((unsigned char)(0xC0 | encode));
@ -2122,6 +2139,7 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x28);
emit_int8((unsigned char)(0xC0 | encode));
@ -2156,6 +2174,7 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x12);
emit_int8(0xC0 | encode);
@ -2193,6 +2212,15 @@ void Assembler::kmovwl(Register dst, KRegister src) {
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovwl(KRegister dst, Address src) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x90);
emit_operand((Register)dst, src);
}
void Assembler::kmovdl(KRegister dst, Register src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@ -2251,6 +2279,14 @@ void Assembler::kmovql(Register dst, KRegister src) {
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::knotwl(KRegister dst, KRegister src) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x44);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestbl(KRegister src1, KRegister src2) {
assert(VM_Version::supports_avx512dq(), "");
@ -2423,6 +2459,7 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
@ -2435,6 +2472,7 @@ void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
@ -2447,6 +2485,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
@ -2455,6 +2494,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
@ -2466,6 +2506,7 @@ void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
@ -2478,6 +2519,7 @@ void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
@ -2509,8 +2551,8 @@ void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
@ -2529,8 +2571,8 @@ void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
@ -2541,8 +2583,8 @@ void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
@ -2592,6 +2634,7 @@ void Assembler::movlpd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x12);
emit_operand(dst, src);
@ -2622,6 +2665,7 @@ void Assembler::movq(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7E);
emit_operand(dst, src);
@ -2632,6 +2676,7 @@ void Assembler::movq(Address dst, XMMRegister src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD6);
emit_operand(src, dst);
@ -2656,6 +2701,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x10);
emit_int8((unsigned char)(0xC0 | encode));
@ -2666,6 +2712,7 @@ void Assembler::movsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x10);
emit_operand(dst, src);
@ -2676,6 +2723,7 @@ void Assembler::movsd(Address dst, XMMRegister src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x11);
emit_operand(src, dst);
@ -2799,6 +2847,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_operand(dst, src);
@ -2807,6 +2856,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -3786,6 +3836,7 @@ void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x6C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4081,6 +4132,7 @@ void Assembler::smovl() {
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x51);
emit_int8((unsigned char)(0xC0 | encode));
@ -4091,6 +4143,7 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x51);
emit_operand(dst, src);
@ -4166,6 +4219,7 @@ void Assembler::subl(Register dst, Register src) {
void Assembler::subsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4176,6 +4230,7 @@ void Assembler::subsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_operand(dst, src);
@ -4263,6 +4318,7 @@ void Assembler::ucomisd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x2E);
emit_operand(dst, src);
@ -4271,6 +4327,7 @@ void Assembler::ucomisd(XMMRegister dst, Address src) {
void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x2E);
emit_int8((unsigned char)(0xC0 | encode));
@ -4382,6 +4439,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_operand(dst, src);
@ -4390,6 +4448,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
@ -4418,6 +4477,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_operand(dst, src);
@ -4426,6 +4486,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_int8((unsigned char)(0xC0 | encode));
@ -4454,6 +4515,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_operand(dst, src);
@ -4462,6 +4524,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -4490,6 +4553,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_operand(dst, src);
@ -4498,6 +4562,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4528,6 +4593,7 @@ void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::addpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
@ -4537,6 +4603,7 @@ void Assembler::addpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
@ -4555,6 +4622,7 @@ void Assembler::addps(XMMRegister dst, XMMRegister src) {
void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
@ -4573,6 +4641,7 @@ void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_operand(dst, src);
@ -4591,6 +4660,7 @@ void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::subpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4607,6 +4677,7 @@ void Assembler::subps(XMMRegister dst, XMMRegister src) {
void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4625,6 +4696,7 @@ void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_operand(dst, src);
@ -4643,6 +4715,7 @@ void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -4653,6 +4726,7 @@ void Assembler::mulpd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_operand(dst, src);
@ -4669,6 +4743,7 @@ void Assembler::mulps(XMMRegister dst, XMMRegister src) {
void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -4687,6 +4762,7 @@ void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_operand(dst, src);
@ -4705,6 +4781,7 @@ void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::divpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_int8((unsigned char)(0xC0 | encode));
@ -4721,6 +4798,7 @@ void Assembler::divps(XMMRegister dst, XMMRegister src) {
void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_int8((unsigned char)(0xC0 | encode));
@ -4739,6 +4817,7 @@ void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_operand(dst, src);
@ -4757,6 +4836,7 @@ void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x51);
emit_int8((unsigned char)(0xC0 | encode));
@ -4767,6 +4847,7 @@ void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x51);
emit_operand(dst, src);
@ -4775,6 +4856,7 @@ void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
void Assembler::andpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x54);
emit_int8((unsigned char)(0xC0 | encode));
@ -4803,6 +4885,7 @@ void Assembler::andpd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x54);
emit_operand(dst, src);
@ -4811,6 +4894,7 @@ void Assembler::andpd(XMMRegister dst, Address src) {
void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x54);
emit_int8((unsigned char)(0xC0 | encode));
@ -4829,6 +4913,7 @@ void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x54);
emit_operand(dst, src);
@ -4847,6 +4932,7 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x15);
emit_int8((unsigned char)(0xC0 | encode));
@ -4855,6 +4941,7 @@ void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x14);
emit_int8((unsigned char)(0xC0 | encode));
@ -4863,6 +4950,7 @@ void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x57);
emit_int8((unsigned char)(0xC0 | encode));
@ -4881,6 +4969,7 @@ void Assembler::xorpd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x57);
emit_operand(dst, src);
@ -4899,6 +4988,7 @@ void Assembler::xorps(XMMRegister dst, Address src) {
void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x57);
emit_int8((unsigned char)(0xC0 | encode));
@ -4917,6 +5007,7 @@ void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x57);
emit_operand(dst, src);
@ -4987,13 +5078,14 @@ void Assembler::paddd(XMMRegister dst, Address src) {
void Assembler::paddq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD4);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
assert(VM_Version::supports_sse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x01);
@ -5001,7 +5093,7 @@ void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
}
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
assert(VM_Version::supports_sse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x02);
@ -5035,6 +5127,7 @@ void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD4);
emit_int8((unsigned char)(0xC0 | encode));
@ -5075,6 +5168,7 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD4);
emit_operand(dst, src);
@ -5106,6 +5200,7 @@ void Assembler::psubd(XMMRegister dst, XMMRegister src) {
void Assembler::psubq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFB);
emit_int8((unsigned char)(0xC0 | encode));
@ -5138,6 +5233,7 @@ void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFB);
emit_int8((unsigned char)(0xC0 | encode));
@ -5178,6 +5274,7 @@ void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFB);
emit_operand(dst, src);
@ -5216,8 +5313,9 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
}
void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 2, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
assert(UseAVX > 2, "requires some form of EVEX");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x40);
emit_int8((unsigned char)(0xC0 | encode));
@ -5244,10 +5342,11 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vecto
}
void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
assert(UseAVX > 2, "requires some form of EVEX");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_is_evex_instruction();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x40);
emit_operand(dst, src);
@ -5303,6 +5402,7 @@ void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF3);
emit_int8((unsigned char)(0xC0 | encode));
@ -5332,6 +5432,7 @@ void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_l
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
// XMM6 is for /6 encoding: 66 0F 73 /6 ib
int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x73);
@ -5358,6 +5459,7 @@ void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF3);
emit_int8((unsigned char)(0xC0 | encode));
@ -5389,6 +5491,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
// shifts 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x73);
@ -5415,6 +5518,7 @@ void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD3);
emit_int8((unsigned char)(0xC0 | encode));
@ -5443,6 +5547,7 @@ void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_l
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x73);
@ -5469,6 +5574,7 @@ void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD3);
emit_int8((unsigned char)(0xC0 | encode));
@ -5578,6 +5684,7 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
void Assembler::pandn(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xDF);
emit_int8((unsigned char)(0xC0 | encode));
@ -5867,9 +5974,9 @@ void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
}
void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512dq(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode));
@ -5957,9 +6064,9 @@ void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
}
void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512dq(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
@ -6084,7 +6191,8 @@ void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
// duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -6094,7 +6202,8 @@ void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
// swap src<->dst for encoding
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@ -6129,7 +6238,8 @@ void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
// duplicate double precision data from src into programmed locations in dest : requires AVX512VL
void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
@ -6139,8 +6249,9 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
// swap src<->dst for encoding
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x19);
@ -6154,12 +6265,9 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
if (attributes.is_evex_instruction()) {
emit_int8(0x7A);
} else {
emit_int8(0x78);
}
emit_int8(0x7A);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -6167,12 +6275,9 @@ void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
if (attributes.is_evex_instruction()) {
emit_int8(0x7B);
} else {
emit_int8(0x79);
}
emit_int8(0x7B);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -6180,12 +6285,9 @@ void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
if (attributes.is_evex_instruction()) {
emit_int8(0x7C);
} else {
emit_int8(0x58);
}
emit_int8(0x7C);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -6193,12 +6295,9 @@ void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
if (attributes.is_evex_instruction()) {
emit_int8(0x7C);
} else {
emit_int8(0x59);
}
emit_int8(0x7C);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -6853,6 +6952,9 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
attributes->set_is_evex_instruction();
evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
} else {
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
attributes->set_rex_vex_w(false);
}
vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
}
}
@ -6912,6 +7014,9 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
attributes->set_is_evex_instruction();
evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
} else {
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
attributes->set_rex_vex_w(false);
}
vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
}
@ -6966,6 +7071,21 @@ void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
emit_int8((unsigned char)(0xF0 & src2_enc<<4));
}
void Assembler::shlxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::shlxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_int8((unsigned char)(0xC0 | encode));
}
#ifndef _LP64

View File

@ -1048,6 +1048,8 @@ private:
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
void cvttpd2dq(XMMRegister dst, XMMRegister src);
// Divide Scalar Double-Precision Floating-Point Values
void divsd(XMMRegister dst, Address src);
void divsd(XMMRegister dst, XMMRegister src);
@ -1335,6 +1337,7 @@ private:
void kmovbl(KRegister dst, Register src);
void kmovbl(Register dst, KRegister src);
void kmovwl(KRegister dst, Register src);
void kmovwl(KRegister dst, Address src);
void kmovwl(Register dst, KRegister src);
void kmovdl(KRegister dst, Register src);
void kmovdl(Register dst, KRegister src);
@ -1344,6 +1347,8 @@ private:
void kmovql(KRegister dst, Register src);
void kmovql(Register dst, KRegister src);
void knotwl(KRegister dst, KRegister src);
void kortestbl(KRegister dst, KRegister src);
void kortestwl(KRegister dst, KRegister src);
void kortestdl(KRegister dst, KRegister src);
@ -2050,6 +2055,8 @@ private:
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void shlxl(Register dst, Register src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);
protected:
// Next instructions require address alignment 16 bytes SSE mode.
@ -2075,6 +2082,7 @@ public:
:
_avx_vector_len(vector_len),
_rex_vex_w(rex_vex_w),
_rex_vex_w_reverted(false),
_legacy_mode(legacy_mode),
_no_reg_mask(no_reg_mask),
_uses_vl(uses_vl),
@ -2098,6 +2106,7 @@ public:
private:
int _avx_vector_len;
bool _rex_vex_w;
bool _rex_vex_w_reverted;
bool _legacy_mode;
bool _no_reg_mask;
bool _uses_vl;
@ -2114,6 +2123,7 @@ public:
// query functions for field accessors
int get_vector_len(void) const { return _avx_vector_len; }
bool is_rex_vex_w(void) const { return _rex_vex_w; }
bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
bool is_legacy_mode(void) const { return _legacy_mode; }
bool is_no_reg_mask(void) const { return _no_reg_mask; }
bool uses_vl(void) const { return _uses_vl; }
@ -2127,6 +2137,12 @@ public:
// Set the vector len manually
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
// Set revert rex_vex_w for avx encoding
void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
// Set rex_vex_w based on state
void set_rex_vex_w(bool state) { _rex_vex_w = state; }
// Set the instruction to be encoded in AVX mode
void set_is_legacy_mode(void) { _legacy_mode = true; }

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -169,18 +169,18 @@ void FpuStackSim::clear() {
intArray* FpuStackSim::write_state() {
intArray* res = new intArray(1 + FrameMap::nof_fpu_regs);
(*res)[0] = stack_size();
res->append(stack_size());
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
(*res)[1 + i] = regs_at(i);
res->append(regs_at(i));
}
return res;
}
void FpuStackSim::read_state(intArray* fpu_stack_state) {
_stack_size = (*fpu_stack_state)[0];
_stack_size = fpu_stack_state->at(0);
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
set_regs_at(i, (*fpu_stack_state)[1 + i]);
set_regs_at(i, fpu_stack_state->at(1 + i));
}
}

View File

@ -2365,13 +2365,8 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
} else if (value->is_double_fpu()) {
assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
switch(code) {
case lir_log10 : __ flog10() ; break;
case lir_abs : __ fabs() ; break;
case lir_sqrt : __ fsqrt(); break;
case lir_tan :
// Should consider not saving rbx, if not necessary
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
break;
default : ShouldNotReachHere();
}
} else {
@ -3886,6 +3881,10 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}
void LIR_Assembler::on_spin_wait() {
__ pause ();
}
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
assert(result_reg->is_register(), "check");
#ifdef _LP64

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -813,7 +813,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
x->id() == vmIntrinsics::_dsin) {
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
x->id() == vmIntrinsics::_dlog10) {
do_LibmIntrinsic(x);
return;
}
@ -821,58 +822,17 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
LIRItem value(x->argument_at(0), this);
bool use_fpu = false;
if (UseSSE >= 2) {
switch(x->id()) {
case vmIntrinsics::_dtan:
case vmIntrinsics::_dlog10:
use_fpu = true;
break;
}
} else {
if (UseSSE < 2) {
value.set_destroys_register();
}
value.load_item();
LIR_Opr calc_input = value.result();
LIR_Opr calc_input2 = NULL;
if (x->id() == vmIntrinsics::_dpow) {
LIRItem extra_arg(x->argument_at(1), this);
if (UseSSE < 2) {
extra_arg.set_destroys_register();
}
extra_arg.load_item();
calc_input2 = extra_arg.result();
}
LIR_Opr calc_result = rlock_result(x);
// sin, cos, pow and exp need two free fpu stack slots, so register
// two temporary operands
LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
if (use_fpu) {
LIR_Opr tmp = FrameMap::fpu0_double_opr;
int tmp_start = 1;
if (calc_input2 != NULL) {
__ move(calc_input2, tmp);
tmp_start = 2;
calc_input2 = tmp;
}
__ move(calc_input, tmp);
calc_input = tmp;
calc_result = tmp;
tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
}
switch(x->id()) {
case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
default: ShouldNotReachHere();
}
@ -913,21 +873,28 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
result_reg = tmp;
switch(x->id()) {
case vmIntrinsics::_dexp:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dexp() != NULL) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dlog() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog10:
if (StubRoutines::dlog10() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dpow:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dpow() != NULL) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
@ -947,18 +914,44 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != NULL) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
#else
switch (x->id()) {
case vmIntrinsics::_dexp:
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
if (StubRoutines::dexp() != NULL) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog:
if (StubRoutines::dlog() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog10:
if (StubRoutines::dlog10() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dpow:
if (StubRoutines::dpow() != NULL) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dsin:
if (StubRoutines::dsin() != NULL) {
@ -974,6 +967,13 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != NULL) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
#endif // _LP64
@ -1261,7 +1261,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View File

@ -786,58 +786,6 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
break;
}
case lir_log10: {
// log and log10 need one temporary fpu stack slot, so
// there is one temporary registers stored in temp of the
// operation. the stack allocator must guarantee that the stack
// slots are really free, otherwise there might be a stack
// overflow.
assert(right->is_illegal(), "must be");
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
assert(op2->tmp1_opr()->is_fpu_register(), "must be");
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
new_left = to_fpu_stack_top(res);
new_res = new_left;
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
break;
}
case lir_tan: {
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
// registers (stored in right and temp of the operation).
// the stack allocator must guarantee that the stack slots are really free,
// otherwise there might be a stack overflow.
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
// assert(left->is_last_use(), "old value gets destroyed");
assert(right->is_fpu_register(), "right is used as the first temporary register");
assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
insert_free_if_dead(right);
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
new_left = to_fpu_stack_top(res);
new_res = new_left;
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
break;
}
default: {
assert(false, "missed a fpu-operation");
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -31,22 +31,6 @@
#include "runtime/mutexLocker.hpp"
#include "runtime/safepoint.hpp"
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
#define __ _masm.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -640,6 +640,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) {
DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
DESCRIBE_FP_OFFSET(interpreter_frame_method);
DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
DESCRIBE_FP_OFFSET(interpreter_frame_cache);
DESCRIBE_FP_OFFSET(interpreter_frame_locals);

View File

@ -70,7 +70,8 @@
// outgoing sp before a call to an invoked method
interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1,
interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1,
interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1,
interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1,
interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -184,6 +184,12 @@ inline Method** frame::interpreter_frame_method_addr() const {
return (Method**)addr_at(interpreter_frame_method_offset);
}
// Mirror
inline oop* frame::interpreter_frame_mirror_addr() const {
return (oop*)addr_at(interpreter_frame_mirror_offset);
}
// top of expression stack
inline intptr_t* frame::interpreter_frame_tos_address() const {
intptr_t* last_sp = interpreter_frame_last_sp();

View File

@ -194,9 +194,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
product(bool, UseBMI2Instructions, false, \
"Use BMI2 instructions") \
\
diagnostic(bool, UseLibmSinIntrinsic, true, \
"Use Libm Sin Intrinsic") \
\
diagnostic(bool, UseLibmCosIntrinsic, true, \
"Use Libm Cos Intrinsic")
diagnostic(bool, UseLibmIntrinsic, true, \
"Use Libm Intrinsics")
#endif // CPU_X86_VM_GLOBALS_X86_HPP

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -3399,6 +3399,18 @@ void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
}
}
void MacroAssembler::setvectmask(Register dst, Register src) {
Assembler::movl(dst, 1);
Assembler::shlxl(dst, dst, src);
Assembler::decl(dst);
Assembler::kmovdl(k1, dst);
Assembler::movl(dst, src);
}
void MacroAssembler::restorevectmask() {
Assembler::knotwl(k1, k0);
}
void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
if (UseXmmLoadAndClearUpper) {
@ -6693,6 +6705,14 @@ void MacroAssembler::restore_cpu_control_state_after_jni() {
#endif // _LP64
}
void MacroAssembler::load_mirror(Register mirror, Register method) {
// get mirror
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
movptr(mirror, Address(method, Method::const_offset()));
movptr(mirror, Address(mirror, ConstMethod::constants_offset()));
movptr(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
movptr(mirror, Address(mirror, mirror_offset));
}
void MacroAssembler::load_klass(Register dst, Register src) {
#ifdef _LP64

View File

@ -156,6 +156,10 @@ class MacroAssembler: public Assembler {
void incrementq(Register reg, int value = 1);
void incrementq(Address dst, int value = 1);
// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
// Support optimal SSE move instructions.
void movflt(XMMRegister dst, XMMRegister src) {
if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
@ -319,6 +323,8 @@ class MacroAssembler: public Assembler {
void movbool(Address dst, Register src);
void testbool(Register dst);
void load_mirror(Register mirror, Register method);
// oop manipulations
void load_klass(Register dst, Register src);
void store_klass(Register dst, Register src);
@ -928,6 +934,10 @@ class MacroAssembler: public Assembler {
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register r11);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
@ -941,11 +951,19 @@ class MacroAssembler: public Assembler {
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
#else
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1);
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp);
@ -964,6 +982,14 @@ class MacroAssembler: public Assembler {
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
Register esi, Register edi, Register ebp, Register esp);
void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
Register edx, Register ebx, Register esi, Register edi,
Register ebp, Register esp);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
#endif
void increase_precision();

View File

@ -0,0 +1,889 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - COS()
// ---------------------
//
// 1. RANGE REDUCTION
//
// We perform an initial range reduction from X to r with
//
// X =~= N * pi/32 + r
//
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
// where |N| <= 932560. Beyond this, the range reduction is
// insufficiently accurate. For extremely small inputs,
// denormalization can occur internally, impacting performance.
// This means that the main path is actually only taken for
// 2^-252 <= |X| < 90112.
//
// To avoid branches, we perform the range reduction to full
// accuracy each time.
//
// X - N * (P_1 + P_2 + P_3)
//
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
// is exact) and P_3 is a 53-bit number. Together, these
// approximate pi well enough for all cases in the restricted
// range.
//
// The main reduction sequence is:
//
// y = 32/pi * x
// N = integer(y)
// (computed by adding and subtracting off SHIFTER)
//
// m_1 = N * P_1
// m_2 = N * P_2
// r_1 = x - m_1
// r = r_1 - m_2
// (this r can be used for most of the calculation)
//
// c_1 = r_1 - r
// m_3 = N * P_3
// c_2 = c_1 - m_2
// c = c_2 - m_3
//
// 2. MAIN ALGORITHM
//
// The algorithm uses a table lookup based on B = M * pi / 32
// where M = N mod 64. The stored values are:
// sigma closest power of 2 to cos(B)
// C_hl 53-bit cos(B) - sigma
// S_hi + S_lo 2 * 53-bit sin(B)
//
// The computation is organized as follows:
//
// sin(B + r + c) = [sin(B) + sigma * r] +
// r * (cos(B) - sigma) +
// sin(B) * [cos(r + c) - 1] +
// cos(B) * [sin(r + c) - r]
//
// which is approximately:
//
// [S_hi + sigma * r] +
// C_hl * r +
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
// (C_hl + sigma) * [(sin(r) - r) + c]
//
// and this is what is actually computed. We separate this sum
// into four parts:
//
// hi + med + pols + corr
//
// where
//
// hi = S_hi + sigma r
// med = C_hl * r
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
//
// 3. POLYNOMIAL
//
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
// (sin(r) - r) can be rearranged freely, since it is quite
// small, so we exploit parallelism to the fullest.
//
// psc4 = SC_4 * r_1
// msc4 = psc4 * r
// r2 = r * r
// msc2 = SC_2 * r2
// r4 = r2 * r2
// psc3 = SC_3 + msc4
// psc1 = SC_1 + msc2
// msc3 = r4 * psc3
// sincospols = psc1 + msc3
// pols = sincospols *
// <S_hi * r^2 | (C_hl + sigma) * r^3>
//
// 4. CORRECTION TERM
//
// This is where the "c" component of the range reduction is
// taken into account; recall that just "r" is used for most of
// the calculation.
//
// -c = m_3 - c_2
// -d = S_hi * r - (C_hl + sigma)
// corr = -c * -d + S_lo
//
// 5. COMPENSATED SUMMATIONS
//
// The two successive compensated summations add up the high
// and medium parts, leaving just the low parts to add up at
// the end.
//
// rs = sigma * r
// res_int = S_hi + rs
// k_0 = S_hi - res_int
// k_2 = k_0 + rs
// med = C_hl * r
// res_hi = res_int + med
// k_1 = res_int - res_hi
// k_3 = k_1 + med
//
// 6. FINAL SUMMATION
//
// We now add up all the small parts:
//
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
//
// Now the overall result is just:
//
// res_hi + res_lo
//
// 7. SMALL ARGUMENTS
//
// Inputs with |X| < 2^-252 are treated specially as
// 1 - |x|.
//
// Special cases:
// cos(NaN) = quiet NaN, and raise invalid exception
// cos(INF) = NaN and raise invalid exception
// cos(0) = 1
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(8) juint _ONE[] =
{
0x00000000UL, 0x3ff00000UL
};
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start;
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
address ONEHALF = StubRoutines::x86::_ONEHALF_addr();
address P_2 = StubRoutines::x86::_P_2_addr();
address SC_4 = StubRoutines::x86::_SC_4_addr();
address Ctable = StubRoutines::x86::_Ctable_addr();
address SC_2 = StubRoutines::x86::_SC_2_addr();
address SC_3 = StubRoutines::x86::_SC_3_addr();
address SC_1 = StubRoutines::x86::_SC_1_addr();
address PI_INV_TABLE = StubRoutines::x86::_PI_INV_TABLE_addr();
address PI_4 = (address)StubRoutines::x86::_PI_4_addr();
address PI32INV = (address)StubRoutines::x86::_PI32INV_addr();
address SIGN_MASK = (address)StubRoutines::x86::_SIGN_MASK_addr();
address P_1 = (address)StubRoutines::x86::_P_1_addr();
address P_3 = (address)StubRoutines::x86::_P_3_addr();
address ONE = (address)_ONE;
address NEG_ZERO = (address)StubRoutines::x86::_NEG_ZERO_addr();
bind(start);
push(rbx);
subq(rsp, 16);
movsd(Address(rsp, 8), xmm0);
bind(B1_2);
movl(eax, Address(rsp, 12));
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
andl(eax, 2147418112);
subl(eax, 808452096);
cmpl(eax, 281346048);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
mulsd(xmm1, xmm0);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addq(rdx, 1865232);
movdqu(xmm4, xmm0);
andq(rdx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shlq(rdx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
movdqu(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(rax, 0));
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
mulpd(xmm1, xmm7);
movdqu(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movq(xmm5, Address(rax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(rax, 16));
mulpd(xmm6, xmm2);
addsd(xmm0, xmm5);
addsd(xmm3, xmm7);
addsd(xmm0, xmm1);
addsd(xmm0, xmm3);
addsd(xmm0, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm0, xmm6);
addsd(xmm0, xmm4);
jmp(B1_4);
bind(L_2TAG_PACKET_0_0_1);
jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
subsd(xmm1, xmm0);
movdqu(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_1_0_1);
pextrw(eax, xmm0, 3);
andl(eax, 32752);
cmpl(eax, 32752);
jcc(Assembler::equal, L_2TAG_PACKET_2_0_1);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
subl(ecx, 16224);
shrl(ecx, 7);
andl(ecx, 65532);
lea(r11, ExternalAddress(PI_INV_TABLE));
addq(rcx, r11);
movdq(rax, xmm0);
movl(r10, Address(rcx, 20));
movl(r8, Address(rcx, 24));
movl(edx, eax);
shrq(rax, 21);
orl(eax, INT_MIN);
shrl(eax, 11);
movl(r9, r10);
imulq(r10, rdx);
imulq(r9, rax);
imulq(r8, rax);
movl(rsi, Address(rcx, 16));
movl(rdi, Address(rcx, 12));
movl(r11, r10);
shrq(r10, 32);
addq(r9, r10);
addq(r11, r8);
movl(r8, r11);
shrq(r11, 32);
addq(r9, r11);
movl(r10, rsi);
imulq(rsi, rdx);
imulq(r10, rax);
movl(r11, rdi);
imulq(rdi, rdx);
movl(rbx, rsi);
shrq(rsi, 32);
addq(r9, rbx);
movl(rbx, r9);
shrq(r9, 32);
addq(r10, rsi);
addq(r10, r9);
shlq(rbx, 32);
orq(r8, rbx);
imulq(r11, rax);
movl(r9, Address(rcx, 8));
movl(rsi, Address(rcx, 4));
movl(rbx, rdi);
shrq(rdi, 32);
addq(r10, rbx);
movl(rbx, r10);
shrq(r10, 32);
addq(r11, rdi);
addq(r11, r10);
movq(rdi, r9);
imulq(r9, rdx);
imulq(rdi, rax);
movl(r10, r9);
shrq(r9, 32);
addq(r11, r10);
movl(r10, r11);
shrq(r11, 32);
addq(rdi, r9);
addq(rdi, r11);
movq(r9, rsi);
imulq(rsi, rdx);
imulq(r9, rax);
shlq(r10, 32);
orq(r10, rbx);
movl(eax, Address(rcx, 0));
movl(r11, rsi);
shrq(rsi, 32);
addq(rdi, r11);
movl(r11, rdi);
shrq(rdi, 32);
addq(r9, rsi);
addq(r9, rdi);
imulq(rdx, rax);
pextrw(rbx, xmm0, 3);
lea(rdi, ExternalAddress(PI_INV_TABLE));
subq(rcx, rdi);
addl(ecx, ecx);
addl(ecx, ecx);
addl(ecx, ecx);
addl(ecx, 19);
movl(rsi, 32768);
andl(rsi, rbx);
shrl(rbx, 4);
andl(rbx, 2047);
subl(rbx, 1023);
subl(ecx, rbx);
addq(r9, rdx);
movl(edx, ecx);
addl(edx, 32);
cmpl(ecx, 1);
jcc(Assembler::less, L_2TAG_PACKET_3_0_1);
negl(ecx);
addl(ecx, 29);
shll(r9);
movl(rdi, r9);
andl(r9, 536870911);
testl(r9, 268435456);
jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1);
shrl(r9);
movl(rbx, 0);
shlq(r9, 32);
orq(r9, r11);
bind(L_2TAG_PACKET_5_0_1);
bind(L_2TAG_PACKET_6_0_1);
cmpq(r9, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_1);
bind(L_2TAG_PACKET_8_0_1);
bsrq(r11, r9);
movl(ecx, 29);
subl(ecx, r11);
jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1);
shlq(r9);
movq(rax, r10);
shlq(r10);
addl(edx, ecx);
negl(ecx);
addl(ecx, 64);
shrq(rax);
shrq(r8);
orq(r9, rax);
orq(r10, r8);
bind(L_2TAG_PACKET_10_0_1);
cvtsi2sdq(xmm0, r9);
shrq(r10, 1);
cvtsi2sdq(xmm3, r10);
xorpd(xmm4, xmm4);
shll(edx, 4);
negl(edx);
addl(edx, 16368);
orl(edx, rsi);
xorl(edx, rbx);
pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
xorpd(xmm5, xmm5);
subl(edx, 1008);
pinsrw(xmm5, edx, 3);
mulsd(xmm0, xmm4);
shll(rsi, 16);
sarl(rsi, 31);
mulsd(xmm3, xmm5);
movdqu(xmm1, xmm0);
mulsd(xmm0, xmm2);
shrl(rdi, 29);
addsd(xmm1, xmm3);
mulsd(xmm3, xmm2);
addl(rdi, rsi);
xorl(rdi, rsi);
mulsd(xmm6, xmm1);
movl(eax, rdi);
addsd(xmm6, xmm3);
movdqu(xmm2, xmm0);
addsd(xmm0, xmm6);
subsd(xmm2, xmm0);
addsd(xmm6, xmm2);
bind(L_2TAG_PACKET_11_0_1);
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
mulsd(xmm1, xmm0);
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2siq(rdx, xmm1);
cvtsi2sdq(xmm1, rdx);
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
shll(eax, 3);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
addl(edx, eax);
andl(edx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shll(edx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
movdqu(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(rax, 0));
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
subsd(xmm1, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
mulpd(xmm1, xmm7);
movdqu(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movq(xmm5, Address(rax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(rax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
movdqu(xmm0, xmm4);
addsd(xmm1, xmm6);
addsd(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_7_0_1);
addl(edx, 64);
movq(r9, r10);
movq(r10, r8);
movl(r8, 0);
cmpq(r9, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
addl(edx, 64);
movq(r9, r10);
movq(r10, r8);
cmpq(r9, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
xorpd(xmm0, xmm0);
xorpd(xmm6, xmm6);
jmp(L_2TAG_PACKET_11_0_1);
bind(L_2TAG_PACKET_9_0_1);
jcc(Assembler::equal, L_2TAG_PACKET_10_0_1);
negl(ecx);
shrq(r10);
movq(rax, r9);
shrq(r9);
subl(edx, ecx);
negl(ecx);
addl(ecx, 64);
shlq(rax);
orq(r10, rax);
jmp(L_2TAG_PACKET_10_0_1);
bind(L_2TAG_PACKET_3_0_1);
negl(ecx);
shlq(r9, 32);
orq(r9, r11);
shlq(r9);
movq(rdi, r9);
testl(r9, INT_MIN);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1);
shrl(r9);
movl(rbx, 0);
shrq(rdi, 3);
jmp(L_2TAG_PACKET_6_0_1);
bind(L_2TAG_PACKET_4_0_1);
shrl(r9);
movl(rbx, 536870912);
shrl(rbx);
shlq(r9, 32);
orq(r9, r11);
shlq(rbx, 32);
addl(rdi, 536870912);
movl(rcx, 0);
movl(r11, 0);
subq(rcx, r8);
sbbq(r11, r10);
sbbq(rbx, r9);
movq(r8, rcx);
movq(r10, r11);
movq(r9, rbx);
movl(rbx, 32768);
jmp(L_2TAG_PACKET_5_0_1);
bind(L_2TAG_PACKET_12_0_1);
shrl(r9);
mov64(rbx, 0x100000000);
shrq(rbx);
movl(rcx, 0);
movl(r11, 0);
subq(rcx, r8);
sbbq(r11, r10);
sbbq(rbx, r9);
movq(r8, rcx);
movq(r10, r11);
movq(r9, rbx);
movl(rbx, 32768);
shrq(rdi, 3);
addl(rdi, 536870912);
jmp(L_2TAG_PACKET_6_0_1);
bind(L_2TAG_PACKET_2_0_1);
movsd(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_13_0_1);
bind(B1_4);
addq(rsp, 16);
pop(rbx);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_cos[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_cos = (address)_static_const_table_cos;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 56), tmp);
lea(tmp, ExternalAddress(static_const_table_cos));
movsd(xmm0, Address(rsp, 128));
pextrw(eax, xmm0, 3);
andl(eax, 32767);
subl(eax, 12336);
cmpl(eax, 4293);
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
movsd(xmm1, Address(tmp, 2160));
mulsd(xmm1, xmm0);
movdqu(xmm5, Address(tmp, 2240));
movsd(xmm4, Address(tmp, 2224));
pand(xmm4, xmm0);
por(xmm5, xmm4);
movsd(xmm3, Address(tmp, 2128));
movdqu(xmm2, Address(tmp, 2112));
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
andl(edx, 63);
movdqu(xmm5, Address(tmp, 2096));
lea(eax, Address(tmp, 0));
shll(edx, 5);
addl(eax, edx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, Address(tmp, 2144));
subsd(xmm4, xmm3);
movsd(xmm7, Address(eax, 8));
unpcklpd(xmm0, xmm0);
movapd(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, Address(tmp, 2064));
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(eax, 0));
subsd(xmm1, xmm3);
movsd(xmm3, Address(eax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, Address(tmp, 2080));
mulsd(xmm4, Address(eax, 0));
addpd(xmm6, Address(tmp, 2048));
mulpd(xmm5, xmm0);
movapd(xmm0, xmm3);
addsd(xmm3, Address(eax, 8));
mulpd(xmm1, xmm7);
movapd(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movsd(xmm5, Address(eax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(eax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm1, xmm6);
addsd(xmm4, xmm1);
movsd(Address(rsp, 0), xmm4);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_0_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movsd(xmm1, Address(tmp, 2192));
subsd(xmm1, xmm0);
movsd(Address(rsp, 0), xmm1);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2146435072);
cmpl(eax, 2146435072);
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
subl(rsp, 32);
movsd(Address(rsp, 0), xmm0);
lea(eax, Address(rsp, 40));
movl(Address(rsp, 8), eax);
movl(eax, 1);
movl(Address(rsp, 12), eax);
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
addl(rsp, 32);
fld_d(Address(rsp, 8));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_3_0_2);
fld_d(Address(rsp, 128));
fmul_d(Address(tmp, 2208));
bind(L_2TAG_PACKET_1_0_2);
movl(tmp, Address(rsp, 56));
}
#endif

View File

@ -0,0 +1,674 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - EXP()
// ---------------------
//
// Description:
// Let K = 64 (table size).
// x x/log(2) n
// e = 2 = 2 * T[j] * (1 + P(y))
// where
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
// j/K
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
//
// P(y) is a minimax polynomial approximation of exp(x)-1
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
//
// To avoid problems with arithmetic overflow and underflow,
// n n1 n2
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
// where BIAS is a value of exponent bias.
//
// Special cases:
// exp(NaN) = NaN
// exp(+INF) = +INF
// exp(-INF) = 0
// exp(x) = 1 for subnormals
// for finite argument, only exp(0)=1 is exact
// For IEEE double
// if x > 709.782712893383973096 then exp(x) overflow
// if x < -745.133219101941108420 then exp(x) underflow
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _cv[] =
{
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
};
ALIGNED_(16) juint _shifter[] =
{
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
};
ALIGNED_(16) juint _mmask[] =
{
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _bias[] =
{
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _Tbl_addr[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL
};
ALIGNED_(16) juint _ALLONES[] =
{
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
};
ALIGNED_(16) juint _ebias[] =
{
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
};
ALIGNED_(4) juint _XMAX[] =
{
0xffffffffUL, 0x7fefffffUL
};
ALIGNED_(4) juint _XMIN[] =
{
0x00000000UL, 0x00100000UL
};
ALIGNED_(4) juint _INF[] =
{
0x00000000UL, 0x7ff00000UL
};
ALIGNED_(4) juint _ZERO[] =
{
0x00000000UL, 0x00000000UL
};
ALIGNED_(4) juint _ONE_val[] =
{
0x00000000UL, 0x3ff00000UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address cv = (address)_cv;
address Shifter = (address)_shifter;
address mmask = (address)_mmask;
address bias = (address)_bias;
address Tbl_addr = (address)_Tbl_addr;
address ALLONES = (address)_ALLONES;
address ebias = (address)_ebias;
address XMAX = (address)_XMAX;
address XMIN = (address)_XMIN;
address INF = (address)_INF;
address ZERO = (address)_ZERO;
address ONE_val = (address)_ONE_val;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 8), xmm0);
unpcklpd(xmm0, xmm0);
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
lea(tmp, ExternalAddress(Tbl_addr));
movdqu(xmm2, Address(ecx, tmp));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(B1_5);
bind(L_2TAG_PACKET_1_0_2);
xorpd(xmm3, xmm3);
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
movl(edx, -1022);
subl(edx, eax);
movdl(xmm5, edx);
psllq(xmm4, xmm5);
movl(ecx, eax);
sarl(eax, 1);
pinsrw(xmm3, eax, 3);
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
psllq(xmm3, 4);
psubd(xmm2, xmm3);
mulsd(xmm0, xmm2);
cmpl(edx, 52);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pand(xmm4, xmm2);
paddd(xmm3, xmm6);
subsd(xmm2, xmm4);
addsd(xmm0, xmm2);
cmpl(ecx, 1023);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
pextrw(ecx, xmm0, 3);
andl(ecx, 32768);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
movapd(xmm6, xmm0);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
mulsd(xmm6, xmm3);
mulsd(xmm4, xmm3);
movdqu(xmm0, xmm6);
pxor(xmm6, xmm4);
psrad(xmm6, 31);
pshufd(xmm6, xmm6, 85);
psllq(xmm0, 1);
psrlq(xmm0, 1);
pxor(xmm0, xmm6);
psrlq(xmm6, 63);
paddq(xmm0, xmm6);
paddq(xmm0, xmm4);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
jmp(B1_5);
bind(L_2TAG_PACKET_3_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_2_0_2);
paddd(xmm3, xmm6);
addpd(xmm0, xmm2);
mulsd(xmm0, xmm3);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_8_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
movl(eax, Address(rsp, 12));
cmpl(eax, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
bind(L_2TAG_PACKET_7_0_2);
movl(Address(rsp, 0), 14);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
mulsd(xmm0, xmm0);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_9_0_2);
movl(edx, Address(rsp, 8));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movl(eax, Address(rsp, 12));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
jmp(B1_5);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(rsp, 8));
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 12));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
movsd(Address(rsp, 8), xmm0);
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_6_0_2);
movq(Address(rsp, 16), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 16));
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table[] =
{
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
0x00100000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 64), tmp);
lea(tmp, ExternalAddress(static_const_table));
movdqu(xmm0, Address(rsp, 128));
unpcklpd(xmm0, xmm0);
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_1_0_2);
fnstcw(Address(rsp, 24));
movzwl(edx, Address(rsp, 24));
orl(edx, 768);
movw(Address(rsp, 28), edx);
fldcw(Address(rsp, 28));
movl(edx, eax);
sarl(eax, 1);
subl(edx, eax);
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
pandn(xmm6, xmm2);
addl(eax, 1023);
movdl(xmm3, eax);
psllq(xmm3, 52);
por(xmm6, xmm3);
addl(edx, 1023);
movdl(xmm4, edx);
psllq(xmm4, 52);
movsd(Address(rsp, 8), xmm0);
fld_d(Address(rsp, 8));
movsd(Address(rsp, 16), xmm6);
fld_d(Address(rsp, 16));
fmula(1);
faddp(1);
movsd(Address(rsp, 8), xmm4);
fld_d(Address(rsp, 8));
fmulp(1);
fstp_d(Address(rsp, 8));
movsd(xmm0, Address(rsp, 8));
fldcw(Address(rsp, 24));
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_2_0_2);
cmpl(ecx, INT_MIN);
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
cmpl(ecx, -1064950997);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
movl(edx, Address(rsp, 128));
cmpl(edx, -17155601);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jmp(L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_3_0_2);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_4_0_2);
movl(edx, 15);
bind(L_2TAG_PACKET_5_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 128));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_7_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, INT_MIN);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1216));
mulsd(xmm0, xmm0);
movl(edx, 15);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_8_0_2);
movl(edx, Address(rsp, 128));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 48), xmm0);
fld_d(Address(rsp, 48));
bind(L_2TAG_PACKET_6_0_2);
movl(tmp, Address(rsp, 64));
}
#endif

View File

@ -0,0 +1,655 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG()
// ---------------------
//
// x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*2^7+0.5))/2^7
//
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
//
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log(NaN) = quiet NaN, and raise invalid exception
// log(+INF) = that INF
// log(0) = -INF with divide-by-zero exception raised
// log(1) = +0
// log(x) = NaN with invalid exception raised if x < -0, including -INF
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _L_tbl[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL
};
ALIGNED_(16) juint _log2[] =
{
0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
};
ALIGNED_(16) juint _coeff[] =
{
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, r8, r11
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp1, tmp2, eax, ecx, edx);
jmp(start);
address L_tbl = (address)_L_tbl;
address log2 = (address)_log2;
address coeff = (address)_coeff;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 0), xmm0);
mov64(rax, 0x3ff0000000000000);
movdq(xmm2, rax);
mov64(rdx, 0x77f0000000000000);
movdq(xmm3, rdx);
movl(ecx, 32768);
movdl(xmm4, rcx);
mov64(tmp1, 0xffffe00000000000);
movdq(xmm5, tmp1);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
lea(tmp2, ExternalAddress(L_tbl));
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp2, edx));
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
if (VM_Version::supports_sse3()) {
movddup(xmm5, xmm1);
}
else {
movdqu(xmm5, xmm1);
movlhps(xmm5, xmm5);
}
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
if (VM_Version::supports_sse3()) {
movddup(xmm6, xmm0);
}
else {
movdqu(xmm6, xmm0);
movlhps(xmm6, xmm6);
}
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movq(xmm0, Address(rsp, 0));
movq(xmm1, Address(rsp, 0));
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_3_0_2);
xorpd(xmm1, xmm1);
addsd(xmm1, xmm0);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psrlq(xmm0, 27);
movl(ecx, 18416);
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_6_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movl(Address(rsp, 16), 3);
jmp(L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(Address(rsp, 16), 2);
bind(L_2TAG_PACKET_8_0_2);
movq(Address(rsp, 8), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 8));
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_log[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
0xffffe000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
Label L_2TAG_PACKET_10_0_2, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table_log;
bind(start);
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movapd(xmm1, xmm0);
movl(ecx, 32768);
movdl(xmm4, ecx);
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 16352);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx));
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
pshufd(xmm6, xmm0, 228);
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 3);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 2);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movapd(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 18416);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}
#endif

View File

@ -0,0 +1,687 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG10()
// ---------------------
//
// Let x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*LH*2^7+0.5))/2^7
// LH is a short approximation for log10(e)
//
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
//
// Result: k*log10(2) - log(B) + p(r)
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log10(0) = -INF with divide-by-zero exception raised
// log10(1) = +0
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
// log10(+INF) = +INF
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _HIGHSIGMASK_log10[] =
{
0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
};
ALIGNED_(16) juint _LOG10_E[] =
{
0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
ALIGNED_(16) juint _L_tbl_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL
};
ALIGNED_(16) juint _log2_log10[] =
{
0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
};
ALIGNED_(16) juint _coeff_log10[] =
{
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start;
assert_different_registers(r11, eax, ecx, edx);
address HIGHSIGMASK = (address)_HIGHSIGMASK_log10;
address LOG10_E = (address)_LOG10_E;
address L_tbl = (address)_L_tbl_log10;
address log2 = (address)_log2_log10;
address coeff = (address)_coeff_log10;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 0), xmm0);
bind(B1_2);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
lea(r11, ExternalAddress(L_tbl));
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movq(xmm0, Address(rsp, 0));
movq(xmm1, Address(rsp, 0));
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_3_0_2);
xorpd(xmm1, xmm1);
addsd(xmm1, xmm0);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_6_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movl(Address(rsp, 16), 9);
jmp(L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(Address(rsp, 16), 8);
bind(L_2TAG_PACKET_8_0_2);
movq(Address(rsp, 8), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 8));
bind(L_2TAG_PACKET_9_0_2);
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_log10 = (address)_static_const_table_log10;
bind(start);
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table_log10));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 9);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 8);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2092,25 +2092,6 @@ class StubGenerator: public StubCodeGenerator {
entry_checkcast_arraycopy);
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ flog10();
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ trigfunc('t');
__ ret(0);
}
}
// AES intrinsic stubs
enum {AESBlockSize = 16};
@ -3533,6 +3514,31 @@ class StubGenerator: public StubCodeGenerator {
}
address generate_libmLog10() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmPow() {
address start = __ pc();
@ -3627,6 +3633,44 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_libm_tan_cot_huge() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
BLOCK_COMMENT("Entry:");
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
return start;
}
address generate_libmTan() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
// Safefetch stubs.
@ -3852,24 +3896,25 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
StubRoutines::_dexp = generate_libmExp();
StubRoutines::_dlog = generate_libmLog();
StubRoutines::_dlog10 = generate_libmLog10();
StubRoutines::_dpow = generate_libmPow();
if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) {
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
}
if (UseLibmSinIntrinsic) {
StubRoutines::_dsin = generate_libmSin();
}
if (UseLibmCosIntrinsic) {
StubRoutines::_dcos = generate_libmCos();
}
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
StubRoutines::_dsin = generate_libmSin();
StubRoutines::_dcos = generate_libmCos();
StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
StubRoutines::_dtan = generate_libmTan();
}
}
void generate_all() {
// Generates all stubs and initializes the entry points
@ -3888,8 +3933,6 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
generate_math_stubs();
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others

View File

@ -2971,35 +2971,6 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ flog10();
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ trigfunc('t');
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
}
// AES intrinsic stubs
enum {AESBlockSize = 16};
@ -4730,6 +4701,46 @@ class StubGenerator: public StubCodeGenerator {
#endif
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmLog10() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
@ -4809,6 +4820,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
@ -4821,6 +4834,8 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -4852,6 +4867,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
@ -4864,6 +4881,55 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmTan() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -5064,16 +5130,28 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
StubRoutines::_dexp = generate_libmExp();
StubRoutines::_dlog = generate_libmLog();
StubRoutines::_dlog10 = generate_libmLog10();
StubRoutines::_dpow = generate_libmPow();
if (UseLibmSinIntrinsic) {
StubRoutines::_dsin = generate_libmSin();
}
if (UseLibmCosIntrinsic) {
StubRoutines::_dcos = generate_libmCos();
}
StubRoutines::_dtan = generate_libmTan();
StubRoutines::_dsin = generate_libmSin();
StubRoutines::_dcos = generate_libmCos();
}
}
@ -5118,8 +5196,6 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
generate_math_stubs();
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others

View File

@ -48,6 +48,29 @@ address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
address StubRoutines::x86::_k256_adr = NULL;
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
//tables common for sin and cos
address StubRoutines::x86::_ONEHALF_adr = NULL;
address StubRoutines::x86::_P_2_adr = NULL;
address StubRoutines::x86::_SC_4_adr = NULL;
address StubRoutines::x86::_Ctable_adr = NULL;
address StubRoutines::x86::_SC_2_adr = NULL;
address StubRoutines::x86::_SC_3_adr = NULL;
address StubRoutines::x86::_SC_1_adr = NULL;
address StubRoutines::x86::_PI_INV_TABLE_adr = NULL;
address StubRoutines::x86::_PI_4_adr = NULL;
address StubRoutines::x86::_PI32INV_adr = NULL;
address StubRoutines::x86::_SIGN_MASK_adr = NULL;
address StubRoutines::x86::_P_1_adr = NULL;
address StubRoutines::x86::_P_3_adr = NULL;
address StubRoutines::x86::_NEG_ZERO_adr = NULL;
//tables common for sincos and tancot
address StubRoutines::x86::_L_2il0floatpacket_0_adr = NULL;
address StubRoutines::x86::_Pi4Inv_adr = NULL;
address StubRoutines::x86::_Pi4x3_adr = NULL;
address StubRoutines::x86::_Pi4x4_adr = NULL;
address StubRoutines::x86::_ones_adr = NULL;
uint64_t StubRoutines::x86::_crc_by128_masks[] =
{
/* The fields in this structure are arranged so that they can be

View File

@ -57,6 +57,48 @@
// byte flip mask for sha256
static address _pshuffle_byte_flip_mask_addr;
//tables common for LIBM sin and cos
static juint _ONEHALF[];
static address _ONEHALF_adr;
static juint _P_2[];
static address _P_2_adr;
static juint _SC_4[];
static address _SC_4_adr;
static juint _Ctable[];
static address _Ctable_adr;
static juint _SC_2[];
static address _SC_2_adr;
static juint _SC_3[];
static address _SC_3_adr;
static juint _SC_1[];
static address _SC_1_adr;
static juint _PI_INV_TABLE[];
static address _PI_INV_TABLE_adr;
static juint _PI_4[];
static address _PI_4_adr;
static juint _PI32INV[];
static address _PI32INV_adr;
static juint _SIGN_MASK[];
static address _SIGN_MASK_adr;
static juint _P_1[];
static address _P_1_adr;
static juint _P_3[];
static address _P_3_adr;
static juint _NEG_ZERO[];
static address _NEG_ZERO_adr;
//tables common for LIBM sincos and tancot
static juint _L_2il0floatpacket_0[];
static address _L_2il0floatpacket_0_adr;
static juint _Pi4Inv[];
static address _Pi4Inv_adr;
static juint _Pi4x3[];
static address _Pi4x3_adr;
static juint _Pi4x4[];
static address _Pi4x4_adr;
static juint _ones[];
static address _ones_adr;
public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
@ -69,4 +111,24 @@
static address k256_addr() { return _k256_adr; }
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
static address _ONEHALF_addr() { return _ONEHALF_adr; }
static address _P_2_addr() { return _P_2_adr; }
static address _SC_4_addr() { return _SC_4_adr; }
static address _Ctable_addr() { return _Ctable_adr; }
static address _SC_2_addr() { return _SC_2_adr; }
static address _SC_3_addr() { return _SC_3_adr; }
static address _SC_1_addr() { return _SC_1_adr; }
static address _PI_INV_TABLE_addr() { return _PI_INV_TABLE_adr; }
static address _PI_4_addr() { return _PI_4_adr; }
static address _PI32INV_addr() { return _PI32INV_adr; }
static address _SIGN_MASK_addr() { return _SIGN_MASK_adr; }
static address _P_1_addr() { return _P_1_adr; }
static address _P_3_addr() { return _P_3_adr; }
static address _NEG_ZERO_addr() { return _NEG_ZERO_adr; }
static address _L_2il0floatpacket_0_addr() { return _L_2il0floatpacket_0_adr; }
static address _Pi4Inv_addr() { return _Pi4Inv_adr; }
static address _Pi4x3_addr() { return _Pi4x3_adr; }
static address _Pi4x4_addr() { return _Pi4x4_adr; }
static address _ones_addr() { return _ones_adr; }
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP

View File

@ -608,18 +608,13 @@ void TemplateInterpreterGenerator::lock_method() {
// get synchronization object
{
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
Label done;
__ movl(rax, access_flags);
__ testl(rax, JVM_ACC_STATIC);
// get receiver (assume this is frequent case)
__ movptr(rax, Address(rlocals, Interpreter::local_offset_in_bytes(0)));
__ jcc(Assembler::zero, done);
__ movptr(rax, Address(rbx, Method::const_offset()));
__ movptr(rax, Address(rax, ConstMethod::constants_offset()));
__ movptr(rax, Address(rax,
ConstantPool::pool_holder_offset_in_bytes()));
__ movptr(rax, Address(rax, mirror_offset));
__ load_mirror(rax, rbx);
#ifdef ASSERT
{
@ -662,6 +657,9 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
__ movptr(rbcp, Address(rbx, Method::const_offset())); // get ConstMethod*
__ lea(rbcp, Address(rbcp, ConstMethod::codes_offset())); // get codebase
__ push(rbx); // save Method*
// Get mirror and store it in the frame as GC root for this Method*
__ load_mirror(rdx, rbx);
__ push(rdx);
if (ProfileInterpreter) {
Label method_data_continue;
__ movptr(rdx, Address(rbx, in_bytes(Method::method_data_offset())));
@ -999,15 +997,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// pass mirror handle if static call
{
Label L;
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movl(t, Address(method, Method::access_flags_offset()));
__ testl(t, JVM_ACC_STATIC);
__ jcc(Assembler::zero, L);
// get mirror
__ movptr(t, Address(method, Method::const_offset()));
__ movptr(t, Address(t, ConstMethod::constants_offset()));
__ movptr(t, Address(t, ConstantPool::pool_holder_offset_in_bytes()));
__ movptr(t, Address(t, mirror_offset));
__ load_mirror(t, method);
// copy mirror into activation frame
__ movptr(Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize),
t);

View File

@ -345,13 +345,34 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ fld_d(Address(rsp, 1*wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ trigfunc('s');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_cos :
__ trigfunc('c');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_tan :
__ trigfunc('t');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dtan() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_sqrt:
__ fsqrt();
@ -362,26 +383,29 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_log:
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dlog() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
}
else {
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_log10:
__ flog10();
// Store to stack to convert 80bit precision back to 64bits
__ push_fTOS();
__ pop_fTOS();
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dlog10() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_pow:
__ fld_d(Address(rsp, 3*wordSize)); // second argument
__ subptr(rsp, 4 * wordSize);
__ fstp_d(Address(rsp, 0));
__ fstp_d(Address(rsp, 2 * wordSize));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dpow() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
@ -391,7 +415,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_exp:
__ subptr(rsp, 2*wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dexp() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));

View File

@ -29,6 +29,7 @@
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/templateInterpreterGenerator.hpp"
#include "runtime/arguments.hpp"
#include "runtime/sharedRuntime.hpp"
#define __ _masm->
@ -373,32 +374,60 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ sqrtsd(xmm0, Address(rsp, wordSize));
} else if (kind == Interpreter::java_lang_math_exp) {
__ movdbl(xmm0, Address(rsp, wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
if (StubRoutines::dexp() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
}
} else if (kind == Interpreter::java_lang_math_log) {
__ movdbl(xmm0, Address(rsp, wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
if (StubRoutines::dlog() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
}
} else if (kind == Interpreter::java_lang_math_log10) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dlog10() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
}
} else if (kind == Interpreter::java_lang_math_sin) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dsin() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
}
} else if (kind == Interpreter::java_lang_math_cos) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dcos() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
}
} else if (kind == Interpreter::java_lang_math_pow) {
__ movdbl(xmm1, Address(rsp, wordSize));
__ movdbl(xmm0, Address(rsp, 3 * wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
if (StubRoutines::dpow() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
}
} else if (kind == Interpreter::java_lang_math_tan) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dtan() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
}
} else {
__ fld_d(Address(rsp, wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ trigfunc('s');
break;
case Interpreter::java_lang_math_cos :
__ trigfunc('c');
break;
case Interpreter::java_lang_math_tan :
__ trigfunc('t');
break;
case Interpreter::java_lang_math_abs:
__ fabs();
break;
case Interpreter::java_lang_math_log10:
__ flog10();
break;
default :
ShouldNotReachHere();
}

View File

@ -844,6 +844,11 @@ public:
static uint32_t get_xsave_header_upper_segment() {
return _cpuid_info.xem_xcr0_edx;
}
// SSE2 and later processors implement a 'pause' instruction
// that can be used for efficient implementation of
// the intrinsic for java.lang.Thread.onSpinWait()
static bool supports_on_spin_wait() { return supports_sse2(); }
};
#endif // CPU_X86_VM_VM_VERSION_X86_HPP

View File

@ -1719,6 +1719,10 @@ const bool Matcher::match_rule_supported(int opcode) {
if (!(UseSSE > 4))
ret_value = false;
break;
case Op_OnSpinWait:
if (VM_Version::supports_on_spin_wait() == false)
ret_value = false;
break;
}
return ret_value; // Per default match rules are supported.
@ -1754,6 +1758,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
return ret_value; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
bool ret_value = false;
if (UseAVX > 2) {
ret_value = VM_Version::supports_avx512vl();
}
return ret_value;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
int float_pressure_threshold = default_pressure_threshold;
#ifdef _LP64
@ -1871,7 +1884,7 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
break;
case Op_VecZ:
__ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
break;
default:
ShouldNotReachHere();
@ -1926,7 +1939,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
break;
case Op_VecZ:
__ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
break;
default:
ShouldNotReachHere();
@ -1946,7 +1959,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
break;
case Op_VecZ:
__ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
__ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
break;
default:
ShouldNotReachHere();
@ -2172,6 +2185,19 @@ instruct ShouldNotReachHere() %{
ins_pipe(pipe_slow);
%}
// =================================EVEX special===============================
instruct setMask(rRegI dst, rRegI src) %{
predicate(Matcher::has_predicated_vectors());
match(Set dst (SetVectMaskI src));
effect(TEMP dst);
format %{ "setvectmask $dst, $src" %}
ins_encode %{
__ setvectmask($dst$$Register, $src$$Register);
%}
ins_pipe(pipe_slow);
%}
// ============================================================================
instruct addF_reg(regF dst, regF src) %{
@ -2996,6 +3022,24 @@ instruct sqrtD_imm(regD dst, immD con) %{
ins_pipe(pipe_slow);
%}
instruct onspinwait() %{
match(OnSpinWait);
ins_cost(200);
format %{
$$template
if (os::is_MP()) {
$$emit$$"pause\t! membar_onspinwait"
} else {
$$emit$$"MEMBAR-onspinwait ! (empty encoding)"
}
%}
ins_encode %{
__ pause();
%}
ins_pipe(pipe_slow);
%}
// ====================VECTOR INSTRUCTIONS=====================================
// Load vectors (4 bytes long)
@ -3047,11 +3091,11 @@ instruct loadV32(vecY dst, memory mem) %{
%}
// Load vectors (64 bytes long)
instruct loadV64(vecZ dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 64);
instruct loadV64_dword(vecZ dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4);
match(Set dst (LoadVector mem));
ins_cost(125);
format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %}
ins_encode %{
int vector_len = 2;
__ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len);
@ -3059,6 +3103,19 @@ instruct loadV64(vecZ dst, memory mem) %{
ins_pipe( pipe_slow );
%}
// Load vectors (64 bytes long)
instruct loadV64_qword(vecZ dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4);
match(Set dst (LoadVector mem));
ins_cost(125);
format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %}
ins_encode %{
int vector_len = 2;
__ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Store vectors
instruct storeV4(memory mem, vecS src) %{
predicate(n->as_StoreVector()->memory_size() == 4);
@ -3104,11 +3161,11 @@ instruct storeV32(memory mem, vecY src) %{
ins_pipe( pipe_slow );
%}
instruct storeV64(memory mem, vecZ src) %{
predicate(n->as_StoreVector()->memory_size() == 64);
instruct storeV64_dword(memory mem, vecZ src) %{
predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4);
match(Set mem (StoreVector mem src));
ins_cost(145);
format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %}
ins_encode %{
int vector_len = 2;
__ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len);
@ -3116,6 +3173,18 @@ instruct storeV64(memory mem, vecZ src) %{
ins_pipe( pipe_slow );
%}
instruct storeV64_qword(memory mem, vecZ src) %{
predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4);
match(Set mem (StoreVector mem src));
ins_cost(145);
format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %}
ins_encode %{
int vector_len = 2;
__ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// ====================LEGACY REPLICATE=======================================
instruct Repl4B_mem(vecS dst, memory mem) %{

View File

@ -1021,10 +1021,10 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
__ vmovdqu(xmm0, Address(rsp, -32));
break;
case Op_VecZ:
__ evmovdqul(Address(rsp, -64), xmm0, 2);
__ evmovdqul(xmm0, Address(rsp, src_offset), 2);
__ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
__ evmovdqul(xmm0, Address(rsp, -64), 2);
__ evmovdquq(Address(rsp, -64), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, -64), 2);
break;
default:
ShouldNotReachHere();
@ -9828,27 +9828,6 @@ instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
ins_pipe( pipe_slow );
%}
instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst(TanD src));
format %{ "DTAN $dst" %}
ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8)); // fstp st
ins_pipe( pipe_slow );
%}
instruct tanD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst(TanD dst));
effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
format %{ "DTAN $dst" %}
ins_encode( Push_SrcD(dst),
Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8), // fstp st
Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
instruct atanDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE<=1);
match(Set dst(AtanD dst src));
@ -9880,41 +9859,6 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
ins_pipe( pipe_slow );
%}
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack
match(Set dst (Log10D src));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fxch ; swap ST(0) with ST(1)
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "FLDLG2 \t\t\t#Log10\n\t"
"FXCH \n\t"
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
Opcode(0xD9), Opcode(0xC9), // fxch
Opcode(0xD9), Opcode(0xF1)); // fyl2x
ins_pipe( pipe_slow );
%}
instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
effect(KILL cr);
match(Set dst (Log10D src));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "FLDLG2 \t\t\t#Log10\n\t"
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
Push_SrcD(src),
Opcode(0xD9), Opcode(0xF1), // fyl2x
Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
//-------------Float Instructions-------------------------------
// Float Math
@ -12103,6 +12047,7 @@ instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
@ -12118,6 +12063,7 @@ instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
@ -12132,6 +12078,7 @@ instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
%}
instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
@ -12145,6 +12092,60 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
ins_pipe( pipe_jcc );
%}
// mask version
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
ins_cost(400);
format %{ "J$cop $labl\t# Loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe( pipe_jcc );
%}
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(400);
format %{ "J$cop,u $labl\t# Loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe( pipe_jcc );
%}
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(300);
format %{ "J$cop,u $labl\t# Loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe( pipe_jcc );
%}
// Jump Direct Conditional - using unsigned comparison
instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
match(If cop cmp);

View File

@ -1081,10 +1081,10 @@ static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
__ vmovdqu(xmm0, Address(rsp, -32));
break;
case Op_VecZ:
__ evmovdqul(Address(rsp, -64), xmm0, 2);
__ evmovdqul(xmm0, Address(rsp, src_offset), 2);
__ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
__ evmovdqul(xmm0, Address(rsp, -64), 2);
__ evmovdquq(Address(rsp, -64), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, -64), 2);
break;
default:
ShouldNotReachHere();
@ -9897,34 +9897,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
ins_pipe(pipe_slow);
%}
// -----------Trig and Trancendental Instructions------------------------------
instruct tanD_reg(regD dst) %{
match(Set dst (TanD dst));
format %{ "dtan $dst\n\t" %}
ins_encode( Push_SrcXD(dst),
Opcode(0xD9), Opcode(0xF2), //fptan
Opcode(0xDD), Opcode(0xD8), //fstp st
Push_ResultXD(dst) );
ins_pipe( pipe_slow );
%}
instruct log10D_reg(regD dst) %{
// The source and result Double operands in XMM registers
match(Set dst (Log10D dst));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "fldlg2\t\t\t#Log10\n\t"
"fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
%}
ins_encode(Opcode(0xD9), Opcode(0xEC), // fldlg2
Push_SrcXD(dst),
Opcode(0xD9), Opcode(0xF1), // fyl2x
Push_ResultXD(dst));
ins_pipe( pipe_slow );
%}
//----------Arithmetic Conversion Instructions---------------------------------
instruct roundFloat_nop(regF dst)
@ -11471,6 +11443,7 @@ instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
%{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
@ -11486,6 +11459,7 @@ instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
@ -11500,6 +11474,7 @@ instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
%}
instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
@ -11513,6 +11488,61 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
ins_pipe(pipe_jcc);
%}
// mask version
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
%{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
ins_cost(400);
format %{ "j$cop $labl\t# loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe(pipe_jcc);
%}
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(400);
format %{ "j$cop,u $labl\t# loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe(pipe_jcc);
%}
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(300);
format %{ "j$cop,u $labl\t# loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe(pipe_jcc);
%}
// Jump Direct Conditional - using unsigned comparison
instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
match(If cop cmp);

View File

@ -49,6 +49,7 @@ const char *BytecodeInterpreter::name_of_field_at_address(address addr) {
DO(_locals);
DO(_constants);
DO(_method);
DO(_mirror);
DO(_mdx);
DO(_stack);
DO(_msg);
@ -77,6 +78,7 @@ void BytecodeInterpreter::layout_interpreterState(interpreterState istate,
bool is_top_frame) {
istate->set_locals(locals);
istate->set_method(method);
istate->set_mirror(method->method_holder()->java_mirror());
istate->set_self_link(istate);
istate->set_prev_link(NULL);
// thread will be set by a hacky repurposing of frame::patch_pc()

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2011 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -44,6 +44,9 @@
inline void set_method(Method* new_method) {
_method = new_method;
}
inline void set_mirror(oop new_mirror) {
_mirror = new_mirror;
}
inline interpreterState self_link() {
return _self_link;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -42,22 +42,6 @@
#include "utilities/events.hpp"
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {

View File

@ -221,9 +221,16 @@ void CppInterpreter::main_loop(int recurse, TRAPS) {
// Push our result
for (int i = 0; i < result_slots; i++) {
// Adjust result to smaller
intptr_t res = result[-i];
union {
intptr_t res;
jint res_jint;
};
res = result[-i];
if (result_slots == 1) {
res = narrow(method->result_type(), res);
BasicType t = method->result_type();
if (is_subword_type(t)) {
res_jint = (jint)narrow(t, res_jint);
}
}
stack->push(res);
}
@ -748,6 +755,7 @@ InterpreterFrame *InterpreterFrame::build(Method* const method, TRAPS) {
istate->set_locals(locals);
istate->set_method(method);
istate->set_mirror(method->method_holder()->java_mirror());
istate->set_self_link(istate);
istate->set_prev_link(NULL);
istate->set_thread(thread);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -115,6 +115,10 @@ inline Method** frame::interpreter_frame_method_addr() const {
return &(get_interpreterState()->_method);
}
inline oop* frame::interpreter_frame_mirror_addr() const {
return &(get_interpreterState()->_mirror);
}
inline intptr_t* frame::interpreter_frame_mdp_addr() const {
return (intptr_t*) &(get_interpreterState()->_mdx);
}

View File

@ -43,7 +43,12 @@ define_pd_global(intx, CodeEntryAlignment, 32);
define_pd_global(intx, OptoLoopAlignment, 16);
define_pd_global(intx, InlineFrequencyCount, 100);
define_pd_global(intx, InlineSmallCode, 1000);
define_pd_global(intx, InitArrayShortSize, -1); // not used
// not used, but must satisfy following constraints:
// 1.) <VALUE> must be in the allowed range for intx *and*
// 2.) <VALUE> % BytesPerLong == 0 so as to not
// violate the constraint verifier on JVM start-up.
define_pd_global(intx, InitArrayShortSize, 0);
#define DEFAULT_STACK_YELLOW_PAGES (2)
#define DEFAULT_STACK_RED_PAGES (1)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -85,6 +85,12 @@ public class SymbolTable extends sun.jvm.hotspot.utilities.Hashtable {
tables. */
public Symbol probe(byte[] name) {
long hashValue = hashSymbol(name);
Symbol s = sharedTable.probe(name, hashValue);
if (s != null) {
return s;
}
for (HashtableEntry e = (HashtableEntry) bucket(hashToIndex(hashValue)); e != null; e = (HashtableEntry) e.next()) {
if (e.hash() == hashValue) {
Symbol sym = Symbol.create(e.literalValue());
@ -94,7 +100,7 @@ public class SymbolTable extends sun.jvm.hotspot.utilities.Hashtable {
}
}
return sharedTable.probe(name, hashValue);
return null;
}
public interface SymbolVisitor {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2016 Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -46,6 +46,11 @@ public class BreakpointInfo extends VMObject {
}
private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
if (!VM.getVM().isJvmtiSupported()) {
// no BreakpointInfo support without JVMTI
return;
}
Type type = db.lookupType("BreakpointInfo");
origBytecodeField = type.getCIntegerField("_orig_bytecode");

View File

@ -85,7 +85,9 @@ public class InstanceKlass extends Klass {
isMarkedDependent = new CIntField(type.getCIntegerField("_is_marked_dependent"), 0);
initState = new CIntField(type.getCIntegerField("_init_state"), 0);
itableLen = new CIntField(type.getCIntegerField("_itable_len"), 0);
breakpoints = type.getAddressField("_breakpoints");
if (VM.getVM().isJvmtiSupported()) {
breakpoints = type.getAddressField("_breakpoints");
}
genericSignatureIndex = new CIntField(type.getCIntegerField("_generic_signature_index"), 0);
majorVersion = new CIntField(type.getCIntegerField("_major_version"), 0);
minorVersion = new CIntField(type.getCIntegerField("_minor_version"), 0);
@ -837,6 +839,9 @@ public class InstanceKlass extends Klass {
/** Breakpoint support (see methods on Method* for details) */
public BreakpointInfo getBreakpoints() {
if (!VM.getVM().isJvmtiSupported()) {
return null;
}
Address addr = getAddress().getAddressAt(breakpoints.getOffset());
return (BreakpointInfo) VMObjectFactory.newObject(BreakpointInfo.class, addr);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -87,6 +87,8 @@ public class VM {
private StubRoutines stubRoutines;
private Bytes bytes;
/** Flag indicating if JVMTI support is included in the build */
private boolean isJvmtiSupported;
/** Flags indicating whether we are attached to a core, C1, or C2 build */
private boolean usingClientCompiler;
private boolean usingServerCompiler;
@ -336,6 +338,16 @@ public class VM {
stackBias = db.lookupIntConstant("STACK_BIAS").intValue();
invocationEntryBCI = db.lookupIntConstant("InvocationEntryBci").intValue();
// We infer the presence of JVMTI from the presence of the InstanceKlass::_breakpoints field.
{
Type type = db.lookupType("InstanceKlass");
if (type.getField("_breakpoints", false, false) == null) {
isJvmtiSupported = false;
} else {
isJvmtiSupported = true;
}
}
// We infer the presence of C1 or C2 from a couple of fields we
// already have present in the type database
{
@ -701,6 +713,11 @@ public class VM {
return isBigEndian;
}
/** Returns true if JVMTI is supported, false otherwise */
public boolean isJvmtiSupported() {
return isJvmtiSupported;
}
/** Returns true if this is a "core" build, false if either C1 or C2
is present */
public boolean isCore() {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -44,21 +44,23 @@ public class CompactHashTable extends VMObject {
Type type = db.lookupType("SymbolCompactHashTable");
baseAddressField = type.getAddressField("_base_address");
bucketCountField = type.getCIntegerField("_bucket_count");
tableEndOffsetField = type.getCIntegerField("_table_end_offset");
entryCountField = type.getCIntegerField("_entry_count");
bucketsField = type.getAddressField("_buckets");
uintSize = db.lookupType("juint").getSize();
entriesField = type.getAddressField("_entries");
uintSize = db.lookupType("u4").getSize();
}
// Fields
private static CIntegerField bucketCountField;
private static CIntegerField tableEndOffsetField;
private static CIntegerField entryCountField;
private static AddressField baseAddressField;
private static AddressField bucketsField;
private static AddressField entriesField;
private static long uintSize;
private static int BUCKET_OFFSET_MASK = 0x3FFFFFFF;
private static int BUCKET_TYPE_SHIFT = 30;
private static int COMPACT_BUCKET_TYPE = 1;
private static int VALUE_ONLY_BUCKET_TYPE = 1;
public CompactHashTable(Address addr) {
super(addr);
@ -68,12 +70,8 @@ public class CompactHashTable extends VMObject {
return (int)bucketCountField.getValue(addr);
}
private int tableEndOffset() {
return (int)tableEndOffsetField.getValue(addr);
}
private boolean isCompactBucket(int bucket_info) {
return (bucket_info >> BUCKET_TYPE_SHIFT) == COMPACT_BUCKET_TYPE;
private boolean isValueOnlyBucket(int bucket_info) {
return (bucket_info >> BUCKET_TYPE_SHIFT) == VALUE_ONLY_BUCKET_TYPE;
}
private int bucketOffset(int bucket_info) {
@ -81,9 +79,8 @@ public class CompactHashTable extends VMObject {
}
public Symbol probe(byte[] name, long hash) {
if (bucketCount() == 0) {
// The table is invalid, so don't try to lookup
if (bucketCount() <= 0) {
// This CompactHashTable is not in use
return null;
}
@ -91,34 +88,33 @@ public class CompactHashTable extends VMObject {
Symbol sym;
Address baseAddress = baseAddressField.getValue(addr);
Address bucket = bucketsField.getValue(addr);
Address bucketEnd = bucket;
long index = hash % bucketCount();
int bucketInfo = (int)bucket.getCIntegerAt(index * uintSize, uintSize, true);
int bucketOffset = bucketOffset(bucketInfo);
int nextBucketInfo = (int)bucket.getCIntegerAt((index+1) * uintSize, uintSize, true);
int nextBucketOffset = bucketOffset(nextBucketInfo);
bucket = bucket.addOffsetTo(bucketOffset * uintSize);
Address entry = entriesField.getValue(addr).addOffsetTo(bucketOffset * uintSize);
if (isCompactBucket(bucketInfo)) {
symOffset = bucket.getCIntegerAt(0, uintSize, true);
if (isValueOnlyBucket(bucketInfo)) {
symOffset = entry.getCIntegerAt(0, uintSize, true);
sym = Symbol.create(baseAddress.addOffsetTo(symOffset));
if (sym.equals(name)) {
return sym;
}
} else {
bucketEnd = bucket.addOffsetTo(nextBucketOffset * uintSize);
while (bucket.lessThan(bucketEnd)) {
long symHash = bucket.getCIntegerAt(0, uintSize, true);
Address entryMax = entriesField.getValue(addr).addOffsetTo(nextBucketOffset * uintSize);
while (entry.lessThan(entryMax)) {
long symHash = entry.getCIntegerAt(0, uintSize, true);
if (symHash == hash) {
symOffset = bucket.getCIntegerAt(uintSize, uintSize, true);
symOffset = entry.getCIntegerAt(uintSize, uintSize, true);
Address symAddr = baseAddress.addOffsetTo(symOffset);
sym = Symbol.create(symAddr);
if (sym.equals(name)) {
return sym;
}
}
bucket = bucket.addOffsetTo(2 * uintSize);
entry = entry.addOffsetTo(2 * uintSize);
}
}
return null;

View File

@ -22,7 +22,7 @@
*/
package jdk.vm.ci.common;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Utilities for operating on raw memory with {@link Unsafe}.

View File

@ -39,7 +39,7 @@ import jdk.vm.ci.inittimer.InitTimer;
import jdk.vm.ci.meta.JavaType;
import jdk.vm.ci.meta.ResolvedJavaMethod;
import jdk.vm.ci.meta.ResolvedJavaType;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Calls from Java into HotSpot. The behavior of all the methods in this class that take a native

View File

@ -120,7 +120,9 @@ public class HotSpotCodeCacheProvider implements CodeCacheProvider {
resultInstalledCode = installedCode;
}
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, (HotSpotSpeculationLog) log);
HotSpotSpeculationLog speculationLog = (log != null && log.hasSpeculations()) ? (HotSpotSpeculationLog) log : null;
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, speculationLog);
if (result != config.codeInstallResultOk) {
String resultDesc = config.getCodeInstallResultDescription(result);
if (compiledCode instanceof HotSpotCompiledNmethod) {

View File

@ -25,7 +25,7 @@ package jdk.vm.ci.hotspot;
import static jdk.vm.ci.hotspot.UnsafeAccess.UNSAFE;
import jdk.vm.ci.code.InstalledCode;
import jdk.vm.ci.inittimer.SuppressFBWarnings;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Implementation of {@link InstalledCode} for HotSpot.

View File

@ -30,7 +30,7 @@ import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.JavaType;
import jdk.vm.ci.meta.ResolvedJavaType;
import jdk.vm.ci.runtime.JVMCIRuntime;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
//JaCoCo Exclude

View File

@ -39,7 +39,7 @@ import jdk.vm.ci.meta.JavaTypeProfile.ProfiledType;
import jdk.vm.ci.meta.ResolvedJavaMethod;
import jdk.vm.ci.meta.ResolvedJavaType;
import jdk.vm.ci.meta.TriState;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Access to a HotSpot MethodData structure (defined in methodData.hpp).

View File

@ -38,7 +38,7 @@ public class HotSpotSpeculationLog implements SpeculationLog {
/** All speculations that have been a deoptimization reason. */
private Set<SpeculationReason> failedSpeculations;
/** Strong references to all reasons embededded in the current nmethod. */
/** Strong references to all reasons embedded in the current nmethod. */
private volatile Collection<SpeculationReason> speculations;
@Override
@ -81,4 +81,9 @@ public class HotSpotSpeculationLog implements SpeculationLog {
return HotSpotObjectConstantImpl.forObject(reason);
}
@Override
public synchronized boolean hasSpeculations() {
return speculations != null && !speculations.isEmpty();
}
}

View File

@ -38,7 +38,7 @@ import jdk.vm.ci.hotspotvmconfig.HotSpotVMData;
import jdk.vm.ci.hotspotvmconfig.HotSpotVMField;
import jdk.vm.ci.hotspotvmconfig.HotSpotVMFlag;
import jdk.vm.ci.hotspotvmconfig.HotSpotVMType;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
//JaCoCo Exclude

View File

@ -40,7 +40,7 @@ import jdk.internal.org.objectweb.asm.Label;
import jdk.internal.org.objectweb.asm.MethodVisitor;
import jdk.internal.org.objectweb.asm.Opcodes;
import jdk.internal.org.objectweb.asm.Type;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* A {@link ClassVisitor} that verifies {@link HotSpotVMConfig} does not access {@link Unsafe} from

View File

@ -24,7 +24,7 @@ package jdk.vm.ci.hotspot;
import static jdk.vm.ci.hotspot.HotSpotJVMCIRuntime.runtime;
import static jdk.vm.ci.hotspot.UnsafeAccess.UNSAFE;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Class to access the C++ {@code vmSymbols} table.

View File

@ -24,7 +24,7 @@ package jdk.vm.ci.hotspot;
import java.lang.reflect.Field;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Package private access to the {@link Unsafe} capability.

View File

@ -56,4 +56,11 @@ public interface SpeculationLog {
* argument to the deoptimization function.
*/
JavaConstant speculate(SpeculationReason reason);
/**
* Returns if this log has speculations.
*
* @return true if there are speculations, false otherwise
*/
boolean hasSpeculations();
}

View File

@ -24,9 +24,6 @@
*/
module jdk.vm.ci {
// 8153756
requires jdk.unsupported;
uses jdk.vm.ci.hotspot.HotSpotVMEventListener;
uses jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
uses jdk.vm.ci.runtime.JVMCICompilerFactory;

View File

@ -4875,3 +4875,16 @@ bool os::start_debugging(char *buf, int buflen) {
}
return yes;
}
static inline time_t get_mtime(const char* filename) {
struct stat st;
int ret = os::stat(filename, &st);
assert(ret == 0, "failed to stat() file '%s': %s", filename, strerror(errno));
return st.st_mtime;
}
int os::compare_file_modified_times(const char* file1, const char* file2) {
time_t t1 = get_mtime(file1);
time_t t2 = get_mtime(file2);
return t1 - t2;
}

View File

@ -2013,8 +2013,8 @@ void os::print_os_info(outputStream* st) {
// their own specific XXX-release file as well as a redhat-release file.
// Because of this the XXX-release file needs to be searched for before the
// redhat-release file.
// Since Red Hat has a lsb-release file that is not very descriptive the
// search for redhat-release needs to be before lsb-release.
// Since Red Hat and SuSE have an lsb-release file that is not very descriptive the
// search for redhat-release / SuSE-release needs to be before lsb-release.
// Since the lsb-release file is the new standard it needs to be searched
// before the older style release files.
// Searching system-release (Red Hat) and os-release (other Linuxes) are a
@ -2031,8 +2031,8 @@ const char* distro_files[] = {
"/etc/mandrake-release",
"/etc/sun-release",
"/etc/redhat-release",
"/etc/lsb-release",
"/etc/SuSE-release",
"/etc/lsb-release",
"/etc/turbolinux-release",
"/etc/gentoo-release",
"/etc/ltib-release",
@ -2062,14 +2062,11 @@ void os::Linux::print_distro_info(outputStream* st) {
st->cr();
}
static void parse_os_info(char* distro, size_t length, const char* file) {
FILE* fp = fopen(file, "r");
if (fp != NULL) {
char buf[256];
// get last line of the file.
while (fgets(buf, sizeof(buf), fp)) { }
// Edit out extra stuff in expected ubuntu format
if (strstr(buf, "DISTRIB_DESCRIPTION=") != NULL) {
static void parse_os_info_helper(FILE* fp, char* distro, size_t length, bool get_first_line) {
char buf[256];
while (fgets(buf, sizeof(buf), fp)) {
// Edit out extra stuff in expected format
if (strstr(buf, "DISTRIB_DESCRIPTION=") != NULL || strstr(buf, "PRETTY_NAME=") != NULL) {
char* ptr = strstr(buf, "\""); // the name is in quotes
if (ptr != NULL) {
ptr++; // go beyond first quote
@ -2083,13 +2080,26 @@ static void parse_os_info(char* distro, size_t length, const char* file) {
if (nl != NULL) *nl = '\0';
strncpy(distro, ptr, length);
}
} else {
// if not in expected Ubuntu format, print out whole line minus \n
return;
} else if (get_first_line) {
char* nl = strchr(buf, '\n');
if (nl != NULL) *nl = '\0';
strncpy(distro, buf, length);
return;
}
// close distro file
}
// print last line and close
char* nl = strchr(buf, '\n');
if (nl != NULL) *nl = '\0';
strncpy(distro, buf, length);
}
static void parse_os_info(char* distro, size_t length, const char* file) {
FILE* fp = fopen(file, "r");
if (fp != NULL) {
// if suse format, print out first line
bool get_first_line = (strcmp(file, "/etc/SuSE-release") == 0);
parse_os_info_helper(fp, distro, length, get_first_line);
fclose(fp);
}
}
@ -3041,6 +3051,48 @@ static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
return addr == MAP_FAILED ? NULL : addr;
}
// Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address
// (req_addr != NULL) or with a given alignment.
// - bytes shall be a multiple of alignment.
// - req_addr can be NULL. If not NULL, it must be a multiple of alignment.
// - alignment sets the alignment at which memory shall be allocated.
// It must be a multiple of allocation granularity.
// Returns address of memory or NULL. If req_addr was not NULL, will only return
// req_addr or NULL.
static char* anon_mmap_aligned(size_t bytes, size_t alignment, char* req_addr) {
size_t extra_size = bytes;
if (req_addr == NULL && alignment > 0) {
extra_size += alignment;
}
char* start = (char*) ::mmap(req_addr, extra_size, PROT_NONE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE,
-1, 0);
if (start == MAP_FAILED) {
start = NULL;
} else {
if (req_addr != NULL) {
if (start != req_addr) {
::munmap(start, extra_size);
start = NULL;
}
} else {
char* const start_aligned = (char*) align_ptr_up(start, alignment);
char* const end_aligned = start_aligned + bytes;
char* const end = start + extra_size;
if (start_aligned > start) {
::munmap(start, start_aligned - start);
}
if (end_aligned < end) {
::munmap(end_aligned, end - end_aligned);
}
start = start_aligned;
}
}
return start;
}
static int anon_munmap(char * addr, size_t size) {
return ::munmap(addr, size) == 0;
}
@ -3317,29 +3369,113 @@ void os::large_page_init() {
#define SHM_HUGETLB 04000
#endif
#define shm_warning_format(format, ...) \
do { \
if (UseLargePages && \
(!FLAG_IS_DEFAULT(UseLargePages) || \
!FLAG_IS_DEFAULT(UseSHM) || \
!FLAG_IS_DEFAULT(LargePageSizeInBytes))) { \
warning(format, __VA_ARGS__); \
} \
} while (0)
#define shm_warning(str) shm_warning_format("%s", str)
#define shm_warning_with_errno(str) \
do { \
int err = errno; \
shm_warning_format(str " (error = %d)", err); \
} while (0)
static char* shmat_with_alignment(int shmid, size_t bytes, size_t alignment) {
assert(is_size_aligned(bytes, alignment), "Must be divisible by the alignment");
if (!is_size_aligned(alignment, SHMLBA)) {
assert(false, "Code below assumes that alignment is at least SHMLBA aligned");
return NULL;
}
// To ensure that we get 'alignment' aligned memory from shmat,
// we pre-reserve aligned virtual memory and then attach to that.
char* pre_reserved_addr = anon_mmap_aligned(bytes, alignment, NULL);
if (pre_reserved_addr == NULL) {
// Couldn't pre-reserve aligned memory.
shm_warning("Failed to pre-reserve aligned memory for shmat.");
return NULL;
}
// SHM_REMAP is needed to allow shmat to map over an existing mapping.
char* addr = (char*)shmat(shmid, pre_reserved_addr, SHM_REMAP);
if ((intptr_t)addr == -1) {
int err = errno;
shm_warning_with_errno("Failed to attach shared memory.");
assert(err != EACCES, "Unexpected error");
assert(err != EIDRM, "Unexpected error");
assert(err != EINVAL, "Unexpected error");
// Since we don't know if the kernel unmapped the pre-reserved memory area
// we can't unmap it, since that would potentially unmap memory that was
// mapped from other threads.
return NULL;
}
return addr;
}
static char* shmat_at_address(int shmid, char* req_addr) {
if (!is_ptr_aligned(req_addr, SHMLBA)) {
assert(false, "Requested address needs to be SHMLBA aligned");
return NULL;
}
char* addr = (char*)shmat(shmid, req_addr, 0);
if ((intptr_t)addr == -1) {
shm_warning_with_errno("Failed to attach shared memory.");
return NULL;
}
return addr;
}
static char* shmat_large_pages(int shmid, size_t bytes, size_t alignment, char* req_addr) {
// If a req_addr has been provided, we assume that the caller has already aligned the address.
if (req_addr != NULL) {
assert(is_ptr_aligned(req_addr, os::large_page_size()), "Must be divisible by the large page size");
assert(is_ptr_aligned(req_addr, alignment), "Must be divisible by given alignment");
return shmat_at_address(shmid, req_addr);
}
// Since shmid has been setup with SHM_HUGETLB, shmat will automatically
// return large page size aligned memory addresses when req_addr == NULL.
// However, if the alignment is larger than the large page size, we have
// to manually ensure that the memory returned is 'alignment' aligned.
if (alignment > os::large_page_size()) {
assert(is_size_aligned(alignment, os::large_page_size()), "Must be divisible by the large page size");
return shmat_with_alignment(shmid, bytes, alignment);
} else {
return shmat_at_address(shmid, NULL);
}
}
char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment,
char* req_addr, bool exec) {
// "exec" is passed in but not used. Creating the shared image for
// the code cache doesn't have an SHM_X executable permission to check.
assert(UseLargePages && UseSHM, "only for SHM large pages");
assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
assert(is_ptr_aligned(req_addr, alignment), "Unaligned address");
if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) {
if (!is_size_aligned(bytes, os::large_page_size())) {
return NULL; // Fallback to small pages.
}
key_t key = IPC_PRIVATE;
char *addr;
bool warn_on_failure = UseLargePages &&
(!FLAG_IS_DEFAULT(UseLargePages) ||
!FLAG_IS_DEFAULT(UseSHM) ||
!FLAG_IS_DEFAULT(LargePageSizeInBytes));
char msg[128];
// Create a large shared memory region to attach to based on size.
// Currently, size is the total size of the heap
int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
// Currently, size is the total size of the heap.
int shmid = shmget(IPC_PRIVATE, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
if (shmid == -1) {
// Possible reasons for shmget failure:
// 1. shmmax is too small for Java heap.
@ -3355,16 +3491,12 @@ char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment,
// they are so fragmented after a long run that they can't
// coalesce into large pages. Try to reserve large pages when
// the system is still "fresh".
if (warn_on_failure) {
jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno);
warning("%s", msg);
}
shm_warning_with_errno("Failed to reserve shared memory.");
return NULL;
}
// attach to the region
addr = (char*)shmat(shmid, req_addr, 0);
int err = errno;
// Attach to the region.
char* addr = shmat_large_pages(shmid, bytes, alignment, req_addr);
// Remove shmid. If shmat() is successful, the actual shared memory segment
// will be deleted when it's detached by shmdt() or when the process
@ -3372,14 +3504,6 @@ char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment,
// segment immediately.
shmctl(shmid, IPC_RMID, NULL);
if ((intptr_t)addr == -1) {
if (warn_on_failure) {
jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err);
warning("%s", msg);
}
return NULL;
}
return addr;
}
@ -3422,50 +3546,6 @@ char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes,
return addr;
}
// Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed().
// Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address
// (req_addr != NULL) or with a given alignment.
// - bytes shall be a multiple of alignment.
// - req_addr can be NULL. If not NULL, it must be a multiple of alignment.
// - alignment sets the alignment at which memory shall be allocated.
// It must be a multiple of allocation granularity.
// Returns address of memory or NULL. If req_addr was not NULL, will only return
// req_addr or NULL.
static char* anon_mmap_aligned(size_t bytes, size_t alignment, char* req_addr) {
size_t extra_size = bytes;
if (req_addr == NULL && alignment > 0) {
extra_size += alignment;
}
char* start = (char*) ::mmap(req_addr, extra_size, PROT_NONE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE,
-1, 0);
if (start == MAP_FAILED) {
start = NULL;
} else {
if (req_addr != NULL) {
if (start != req_addr) {
::munmap(start, extra_size);
start = NULL;
}
} else {
char* const start_aligned = (char*) align_ptr_up(start, alignment);
char* const end_aligned = start_aligned + bytes;
char* const end = start + extra_size;
if (start_aligned > start) {
::munmap(start, start_aligned - start);
}
if (end_aligned < end) {
::munmap(end_aligned, end - end_aligned);
}
start = start_aligned;
}
}
return start;
}
// Reserve memory using mmap(MAP_HUGETLB).
// - bytes shall be a multiple of alignment.
// - req_addr can be NULL. If not NULL, it must be a multiple of alignment.

View File

@ -181,8 +181,8 @@ int os::log_vsnprintf(char* buf, size_t len, const char* fmt, va_list args) {
return vsnprintf(buf, len, fmt, args);
}
int os::fileno(FILE* fp) {
return ::fileno(fp);
int os::get_fileno(FILE* fp) {
return NOT_AIX(::)fileno(fp);
}
void os::Posix::print_load_average(outputStream* st) {

Some files were not shown because too many files have changed in this diff Show More