Merge
This commit is contained in:
commit
59292b53e3
@ -83,6 +83,21 @@ suite = {
|
||||
"workingSets" : "API,JVMCI",
|
||||
},
|
||||
|
||||
"jdk.vm.ci.code.test" : {
|
||||
"subDir" : "test/compiler/jvmci",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"mx:JUNIT",
|
||||
"jdk.vm.ci.amd64",
|
||||
"jdk.vm.ci.sparc",
|
||||
"jdk.vm.ci.code",
|
||||
"jdk.vm.ci.hotspot",
|
||||
],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
"javaCompliance" : "1.8",
|
||||
"workingSets" : "API,JVMCI",
|
||||
},
|
||||
|
||||
"jdk.vm.ci.runtime" : {
|
||||
"subDir" : "src/jdk.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
@ -164,7 +179,7 @@ suite = {
|
||||
"subDir" : "test/compiler/jvmci",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"mx:TESTNG",
|
||||
"TESTNG",
|
||||
"jdk.vm.ci.hotspot",
|
||||
],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
|
@ -3325,9 +3325,15 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
|
||||
|
||||
const bool Matcher::match_rule_supported(int opcode) {
|
||||
|
||||
// TODO
|
||||
// identify extra cases that we might want to provide match rules for
|
||||
// e.g. Op_StrEquals and other intrinsics
|
||||
switch (opcode) {
|
||||
case Op_StrComp:
|
||||
case Op_StrIndexOf:
|
||||
if (CompactStrings) return false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!has_match_rule(opcode)) {
|
||||
return false;
|
||||
}
|
||||
@ -3346,6 +3352,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
@ -4190,55 +4200,6 @@ encode %{
|
||||
}
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
Register cnt_reg = as_Register($cnt$$reg);
|
||||
Register base_reg = as_Register($base$$reg);
|
||||
// base is word aligned
|
||||
// cnt is count of words
|
||||
|
||||
Label loop;
|
||||
Label entry;
|
||||
|
||||
// Algorithm:
|
||||
//
|
||||
// scratch1 = cnt & 7;
|
||||
// cnt -= scratch1;
|
||||
// p += scratch1;
|
||||
// switch (scratch1) {
|
||||
// do {
|
||||
// cnt -= 8;
|
||||
// p[-8] = 0;
|
||||
// case 7:
|
||||
// p[-7] = 0;
|
||||
// case 6:
|
||||
// p[-6] = 0;
|
||||
// // ...
|
||||
// case 1:
|
||||
// p[-1] = 0;
|
||||
// case 0:
|
||||
// p += 8;
|
||||
// } while (cnt);
|
||||
// }
|
||||
|
||||
const int unroll = 8; // Number of str(zr) instructions we'll unroll
|
||||
|
||||
__ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll
|
||||
__ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll
|
||||
// base_reg always points to the end of the region we're about to zero
|
||||
__ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
|
||||
__ adr(rscratch2, entry);
|
||||
__ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
|
||||
__ br(rscratch2);
|
||||
__ bind(loop);
|
||||
__ sub(cnt_reg, cnt_reg, unroll);
|
||||
for (int i = -unroll; i < 0; i++)
|
||||
__ str(zr, Address(base_reg, i * wordSize));
|
||||
__ bind(entry);
|
||||
__ add(base_reg, base_reg, unroll * wordSize);
|
||||
__ cbnz(cnt_reg, loop);
|
||||
%}
|
||||
|
||||
/// mov envcodings
|
||||
|
||||
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
|
||||
@ -12123,21 +12084,21 @@ instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlag
|
||||
%}
|
||||
%}
|
||||
|
||||
instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
|
||||
instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
|
||||
|
||||
expand %{
|
||||
rorL_rReg(dst, src, shift, cr);
|
||||
rorI_rReg(dst, src, shift, cr);
|
||||
%}
|
||||
%}
|
||||
|
||||
instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
|
||||
instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
|
||||
|
||||
expand %{
|
||||
rorL_rReg(dst, src, shift, cr);
|
||||
rorI_rReg(dst, src, shift, cr);
|
||||
%}
|
||||
%}
|
||||
|
||||
@ -13363,7 +13324,23 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
|
||||
ins_cost(4 * INSN_COST);
|
||||
format %{ "ClearArray $cnt, $base" %}
|
||||
|
||||
ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
|
||||
ins_encode %{
|
||||
__ zero_words($base$$Register, $cnt$$Register);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct clearArray_imm_reg(immL cnt, iRegP base, Universe dummy, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
|
||||
ins_cost(4 * INSN_COST);
|
||||
format %{ "ClearArray $cnt, $base" %}
|
||||
|
||||
ins_encode %{
|
||||
__ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
@ -14797,10 +14774,10 @@ instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp,
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
|
||||
instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
|
||||
iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!CompactStrings);
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -14819,7 +14796,7 @@ instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cn
|
||||
instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
|
||||
iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!CompactStrings);
|
||||
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
|
||||
@ -14839,7 +14816,7 @@ instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
|
||||
iRegI tmp3, iRegI tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!CompactStrings);
|
||||
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
|
||||
@ -14856,10 +14833,27 @@ instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
|
||||
instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
|
||||
iRegI_R0 result, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!CompactStrings);
|
||||
predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (StrEquals (Binary str1 str2) cnt));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
|
||||
|
||||
format %{ "String Equals $str1,$str2,$cnt -> $result" %}
|
||||
ins_encode %{
|
||||
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
||||
__ arrays_equals($str1$$Register, $str2$$Register,
|
||||
$result$$Register, $cnt$$Register,
|
||||
1, /*is_string*/true);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
|
||||
iRegI_R0 result, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrEquals (Binary str1 str2) cnt));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
|
||||
|
||||
@ -14907,6 +14901,40 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
|
||||
%}
|
||||
|
||||
|
||||
// fast char[] to byte[] compression
|
||||
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||
vRegD_V0 tmp1, vRegD_V1 tmp2,
|
||||
vRegD_V2 tmp3, vRegD_V3 tmp4,
|
||||
iRegI_R0 result, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (StrCompressedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Compress $src,$dst -> $result // KILL R1, R2, R3, R4" %}
|
||||
ins_encode %{
|
||||
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
|
||||
$tmp3$$FloatRegister, $tmp4$$FloatRegister,
|
||||
$result$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// fast byte[] to char[] inflation
|
||||
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
|
||||
vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||
vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -177,4 +177,6 @@ void AbstractInterpreter::layout_activation(Method* method,
|
||||
}
|
||||
*interpreter_frame->interpreter_frame_cache_addr() =
|
||||
method->constants()->cache();
|
||||
*interpreter_frame->interpreter_frame_mirror_addr() =
|
||||
method->method_holder()->java_mirror();
|
||||
}
|
||||
|
@ -2245,18 +2245,18 @@ public:
|
||||
rf(Vn, 5), rf(Rd, 0);
|
||||
}
|
||||
|
||||
#define INSN(NAME, opc, opc2) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
|
||||
starti; \
|
||||
/* The encodings for the immh:immb fields (bits 22:16) are \
|
||||
* 0001 xxx 8B/16B, shift = xxx \
|
||||
* 001x xxx 4H/8H, shift = xxxx \
|
||||
* 01xx xxx 2S/4S, shift = xxxxx \
|
||||
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \
|
||||
*/ \
|
||||
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
|
||||
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
|
||||
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
|
||||
#define INSN(NAME, opc, opc2) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
|
||||
starti; \
|
||||
/* The encodings for the immh:immb fields (bits 22:16) are \
|
||||
* 0001 xxx 8B/16B, shift = xxx \
|
||||
* 001x xxx 4H/8H, shift = xxxx \
|
||||
* 01xx xxx 2S/4S, shift = xxxxx \
|
||||
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \
|
||||
*/ \
|
||||
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
|
||||
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
|
||||
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
|
||||
}
|
||||
|
||||
INSN(shl, 0, 0b010101);
|
||||
@ -2347,6 +2347,24 @@ public:
|
||||
f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
// AdvSIMD ZIP/UZP/TRN
|
||||
#define INSN(NAME, opcode) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
|
||||
starti; \
|
||||
f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0b001110, 15, 10); \
|
||||
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); \
|
||||
f(T & 1, 30), f(T >> 1, 23, 22); \
|
||||
}
|
||||
|
||||
INSN(uzp1, 0b001);
|
||||
INSN(trn1, 0b010);
|
||||
INSN(zip1, 0b011);
|
||||
INSN(uzp2, 0b101);
|
||||
INSN(trn2, 0b110);
|
||||
INSN(zip2, 0b111);
|
||||
|
||||
#undef INSN
|
||||
|
||||
// CRC32 instructions
|
||||
#define INSN(NAME, c, sf, sz) \
|
||||
void NAME(Register Rd, Register Rn, Register Rm) { \
|
||||
|
@ -2942,6 +2942,10 @@ void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
|
||||
|
||||
void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
|
||||
|
||||
void LIR_Assembler::on_spin_wait() {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
|
||||
__ mov(result_reg->as_register(), rthread);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -331,7 +331,7 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
|
||||
length.load_item();
|
||||
|
||||
}
|
||||
if (needs_store_check) {
|
||||
if (needs_store_check || x->check_boolean()) {
|
||||
value.load_item();
|
||||
} else {
|
||||
value.load_for_store(x->elt_type());
|
||||
@ -380,7 +380,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
|
||||
// Seems to be a precise
|
||||
post_barrier(LIR_OprFact::address(array_addr), value.result());
|
||||
} else {
|
||||
__ move(value.result(), array_addr, null_check_info);
|
||||
LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
|
||||
__ move(result, array_addr, null_check_info);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1127,7 +1128,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
||||
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
||||
Values* dims = x->dims();
|
||||
int i = dims->length();
|
||||
LIRItemList* items = new LIRItemList(dims->length(), NULL);
|
||||
LIRItemList* items = new LIRItemList(i, i, NULL);
|
||||
while (i-- > 0) {
|
||||
LIRItem* size = new LIRItem(dims->at(i), this);
|
||||
items->at_put(i, size);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -32,22 +32,6 @@
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/safepoint.hpp"
|
||||
|
||||
// Release the CompiledICHolder* associated with this call site is there is one.
|
||||
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
if (is_icholder_entry(call->destination())) {
|
||||
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
|
||||
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
|
||||
}
|
||||
}
|
||||
|
||||
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
return is_icholder_entry(call->destination());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#define __ _masm.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -78,7 +78,9 @@
|
||||
interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
|
||||
interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
|
||||
interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1,
|
||||
interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1,
|
||||
interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1,
|
||||
interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1,
|
||||
interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1,
|
||||
interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
|
||||
interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1,
|
||||
interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -188,6 +188,12 @@ inline Method** frame::interpreter_frame_method_addr() const {
|
||||
return (Method**)addr_at(interpreter_frame_method_offset);
|
||||
}
|
||||
|
||||
// Mirror
|
||||
|
||||
inline oop* frame::interpreter_frame_mirror_addr() const {
|
||||
return (oop*)addr_at(interpreter_frame_mirror_offset);
|
||||
}
|
||||
|
||||
// top of expression stack
|
||||
inline intptr_t* frame::interpreter_frame_tos_address() const {
|
||||
intptr_t* last_sp = interpreter_frame_last_sp();
|
||||
|
@ -48,9 +48,9 @@ define_pd_global(intx, InlineFrequencyCount, 100);
|
||||
#define DEFAULT_STACK_SHADOW_PAGES (4 DEBUG_ONLY(+5))
|
||||
#define DEFAULT_STACK_RESERVED_PAGES (0)
|
||||
|
||||
#define MIN_STACK_YELLOW_PAGES 1
|
||||
#define MIN_STACK_RED_PAGES 1
|
||||
#define MIN_STACK_SHADOW_PAGES 1
|
||||
#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
|
||||
#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES
|
||||
#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
|
||||
#define MIN_STACK_RESERVED_PAGES (0)
|
||||
|
||||
define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
|
||||
@ -76,7 +76,8 @@ define_pd_global(bool, CompactStrings, false);
|
||||
// avoid biased locking while we are bootstrapping the aarch64 build
|
||||
define_pd_global(bool, UseBiasedLocking, false);
|
||||
|
||||
define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
|
||||
// Clear short arrays bigger than one word in an arch-specific way
|
||||
define_pd_global(intx, InitArrayShortSize, BytesPerLong);
|
||||
|
||||
#if defined(COMPILER1) || defined(COMPILER2)
|
||||
define_pd_global(intx, InlineSmallCode, 1000);
|
||||
|
@ -40,7 +40,43 @@
|
||||
#include "runtime/thread.inline.hpp"
|
||||
|
||||
|
||||
// Implementation of InterpreterMacroAssembler
|
||||
void InterpreterMacroAssembler::narrow(Register result) {
|
||||
|
||||
// Get method->_constMethod->_result_type
|
||||
ldr(rscratch1, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
|
||||
ldr(rscratch1, Address(rscratch1, Method::const_offset()));
|
||||
ldrb(rscratch1, Address(rscratch1, ConstMethod::result_type_offset()));
|
||||
|
||||
Label done, notBool, notByte, notChar;
|
||||
|
||||
// common case first
|
||||
cmpw(rscratch1, T_INT);
|
||||
br(Assembler::EQ, done);
|
||||
|
||||
// mask integer result to narrower return type.
|
||||
cmpw(rscratch1, T_BOOLEAN);
|
||||
br(Assembler::NE, notBool);
|
||||
andw(result, result, 0x1);
|
||||
b(done);
|
||||
|
||||
bind(notBool);
|
||||
cmpw(rscratch1, T_BYTE);
|
||||
br(Assembler::NE, notByte);
|
||||
sbfx(result, result, 0, 8);
|
||||
b(done);
|
||||
|
||||
bind(notByte);
|
||||
cmpw(rscratch1, T_CHAR);
|
||||
br(Assembler::NE, notChar);
|
||||
ubfx(result, result, 0, 16); // truncate upper 16 bits
|
||||
b(done);
|
||||
|
||||
bind(notChar);
|
||||
sbfx(result, result, 0, 16); // sign-extend short
|
||||
|
||||
// Nothing to do for T_INT
|
||||
bind(done);
|
||||
}
|
||||
|
||||
void InterpreterMacroAssembler::jump_to_entry(address entry) {
|
||||
assert(entry, "Entry must have been generated by now");
|
||||
@ -81,6 +117,7 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
|
||||
verify_oop(r0, state); break;
|
||||
case ltos: ldr(r0, val_addr); break;
|
||||
case btos: // fall through
|
||||
case ztos: // fall through
|
||||
case ctos: // fall through
|
||||
case stos: // fall through
|
||||
case itos: ldrw(r0, val_addr); break;
|
||||
@ -314,6 +351,7 @@ void InterpreterMacroAssembler::pop(TosState state) {
|
||||
switch (state) {
|
||||
case atos: pop_ptr(); break;
|
||||
case btos:
|
||||
case ztos:
|
||||
case ctos:
|
||||
case stos:
|
||||
case itos: pop_i(); break;
|
||||
@ -331,6 +369,7 @@ void InterpreterMacroAssembler::push(TosState state) {
|
||||
switch (state) {
|
||||
case atos: push_ptr(); break;
|
||||
case btos:
|
||||
case ztos:
|
||||
case ctos:
|
||||
case stos:
|
||||
case itos: push_i(); break;
|
||||
|
@ -245,6 +245,9 @@ class InterpreterMacroAssembler: public MacroAssembler {
|
||||
void update_mdp_by_constant(Register mdp_in, int constant);
|
||||
void update_mdp_for_ret(Register return_bci);
|
||||
|
||||
// narrow int return value
|
||||
void narrow(Register result);
|
||||
|
||||
void profile_taken_branch(Register mdp, Register bumped_count);
|
||||
void profile_not_taken_branch(Register mdp);
|
||||
void profile_call(Register mdp);
|
||||
|
@ -3217,6 +3217,14 @@ void MacroAssembler::load_klass(Register dst, Register src) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::load_mirror(Register dst, Register method) {
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
ldr(dst, Address(rmethod, Method::const_offset()));
|
||||
ldr(dst, Address(dst, ConstMethod::constants_offset()));
|
||||
ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
|
||||
ldr(dst, Address(dst, mirror_offset));
|
||||
}
|
||||
|
||||
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
|
||||
if (UseCompressedClassPointers) {
|
||||
ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
|
||||
@ -4585,7 +4593,16 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||
assert(elem_size == 1 || elem_size == 2, "must be char or byte");
|
||||
assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
|
||||
|
||||
BLOCK_COMMENT(is_string ? "string_equals {" : "array_equals {");
|
||||
#ifndef PRODUCT
|
||||
{
|
||||
const char kind = (elem_size == 2) ? 'U' : 'L';
|
||||
char comment[64];
|
||||
snprintf(comment, sizeof comment, "%s%c%s {",
|
||||
is_string ? "string_equals" : "array_equals",
|
||||
kind, "{");
|
||||
BLOCK_COMMENT(comment);
|
||||
}
|
||||
#endif
|
||||
|
||||
mov(result, false);
|
||||
|
||||
@ -4670,8 +4687,97 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||
BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be zeroed, 8 bytes aligned.
|
||||
// cnt: Count in 8-byte unit.
|
||||
void MacroAssembler::zero_words(Register base, Register cnt)
|
||||
{
|
||||
fill_words(base, cnt, zr);
|
||||
}
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
// base: Address of a buffer to be zeroed, 8 bytes aligned.
|
||||
// cnt: Immediate count in 8-byte unit.
|
||||
#define ShortArraySize (18 * BytesPerLong)
|
||||
void MacroAssembler::zero_words(Register base, u_int64_t cnt)
|
||||
{
|
||||
int i = cnt & 1; // store any odd word to start
|
||||
if (i) str(zr, Address(base));
|
||||
|
||||
if (cnt <= ShortArraySize / BytesPerLong) {
|
||||
for (; i < (int)cnt; i += 2)
|
||||
stp(zr, zr, Address(base, i * wordSize));
|
||||
} else {
|
||||
const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
|
||||
int remainder = cnt % (2 * unroll);
|
||||
for (; i < remainder; i += 2)
|
||||
stp(zr, zr, Address(base, i * wordSize));
|
||||
|
||||
Label loop;
|
||||
Register cnt_reg = rscratch1;
|
||||
Register loop_base = rscratch2;
|
||||
cnt = cnt - remainder;
|
||||
mov(cnt_reg, cnt);
|
||||
// adjust base and prebias by -2 * wordSize so we can pre-increment
|
||||
add(loop_base, base, (remainder - 2) * wordSize);
|
||||
bind(loop);
|
||||
sub(cnt_reg, cnt_reg, 2 * unroll);
|
||||
for (i = 1; i < unroll; i++)
|
||||
stp(zr, zr, Address(loop_base, 2 * i * wordSize));
|
||||
stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
|
||||
cbnz(cnt_reg, loop);
|
||||
}
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be filled, 8 bytes aligned.
|
||||
// cnt: Count in 8-byte unit.
|
||||
// value: Value to be filled with.
|
||||
// base will point to the end of the buffer after filling.
|
||||
void MacroAssembler::fill_words(Register base, Register cnt, Register value)
|
||||
{
|
||||
// Algorithm:
|
||||
//
|
||||
// scratch1 = cnt & 7;
|
||||
// cnt -= scratch1;
|
||||
// p += scratch1;
|
||||
// switch (scratch1) {
|
||||
// do {
|
||||
// cnt -= 8;
|
||||
// p[-8] = v;
|
||||
// case 7:
|
||||
// p[-7] = v;
|
||||
// case 6:
|
||||
// p[-6] = v;
|
||||
// // ...
|
||||
// case 1:
|
||||
// p[-1] = v;
|
||||
// case 0:
|
||||
// p += 8;
|
||||
// } while (cnt);
|
||||
// }
|
||||
|
||||
assert_different_registers(base, cnt, value, rscratch1, rscratch2);
|
||||
|
||||
Label entry, loop;
|
||||
const int unroll = 8; // Number of str instructions we'll unroll
|
||||
|
||||
andr(rscratch1, cnt, unroll - 1); // tmp1 = cnt % unroll
|
||||
cbz(rscratch1, entry);
|
||||
sub(cnt, cnt, rscratch1); // cnt -= tmp1
|
||||
// base always points to the end of the region we're about to fill
|
||||
add(base, base, rscratch1, Assembler::LSL, 3);
|
||||
adr(rscratch2, entry);
|
||||
sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
|
||||
br(rscratch2);
|
||||
bind(loop);
|
||||
add(base, base, unroll * 8);
|
||||
sub(cnt, cnt, unroll);
|
||||
for (int i = -unroll; i < 0; i++)
|
||||
str(value, Address(base, i * 8));
|
||||
bind(entry);
|
||||
cbnz(cnt, loop);
|
||||
}
|
||||
|
||||
// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
|
||||
// java/lang/StringUTF16.compress.
|
||||
void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||
@ -4734,6 +4840,90 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
|
||||
BIND(DONE);
|
||||
sub(result, result, len); // Return index where we stopped
|
||||
// Return len == 0 if we processed all
|
||||
// characters
|
||||
}
|
||||
|
||||
|
||||
// Inflate byte[] array to char[].
|
||||
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
|
||||
Register tmp4) {
|
||||
Label big, done;
|
||||
|
||||
assert_different_registers(src, dst, len, tmp4, rscratch1);
|
||||
|
||||
fmovd(vtmp1 , zr);
|
||||
lsrw(rscratch1, len, 3);
|
||||
|
||||
cbnzw(rscratch1, big);
|
||||
|
||||
// Short string: less than 8 bytes.
|
||||
{
|
||||
Label loop, around, tiny;
|
||||
|
||||
subsw(len, len, 4);
|
||||
andw(len, len, 3);
|
||||
br(LO, tiny);
|
||||
|
||||
// Use SIMD to do 4 bytes.
|
||||
ldrs(vtmp2, post(src, 4));
|
||||
zip1(vtmp3, T8B, vtmp2, vtmp1);
|
||||
strd(vtmp3, post(dst, 8));
|
||||
|
||||
cbzw(len, done);
|
||||
|
||||
// Do the remaining bytes by steam.
|
||||
bind(loop);
|
||||
ldrb(tmp4, post(src, 1));
|
||||
strh(tmp4, post(dst, 2));
|
||||
subw(len, len, 1);
|
||||
|
||||
bind(tiny);
|
||||
cbnz(len, loop);
|
||||
|
||||
bind(around);
|
||||
b(done);
|
||||
}
|
||||
|
||||
// Unpack the bytes 8 at a time.
|
||||
bind(big);
|
||||
andw(len, len, 7);
|
||||
|
||||
{
|
||||
Label loop, around;
|
||||
|
||||
bind(loop);
|
||||
ldrd(vtmp2, post(src, 8));
|
||||
sub(rscratch1, rscratch1, 1);
|
||||
zip1(vtmp3, T16B, vtmp2, vtmp1);
|
||||
st1(vtmp3, T8H, post(dst, 16));
|
||||
cbnz(rscratch1, loop);
|
||||
|
||||
bind(around);
|
||||
}
|
||||
|
||||
// Do the tail of up to 8 bytes.
|
||||
sub(src, src, 8);
|
||||
add(src, src, len, ext::uxtw, 0);
|
||||
ldrd(vtmp2, Address(src));
|
||||
sub(dst, dst, 16);
|
||||
add(dst, dst, len, ext::uxtw, 1);
|
||||
zip1(vtmp3, T16B, vtmp2, vtmp1);
|
||||
st1(vtmp3, T8H, Address(dst));
|
||||
|
||||
bind(done);
|
||||
}
|
||||
|
||||
// Compress char[] array to byte[].
|
||||
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
|
||||
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
|
||||
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
|
||||
Register result) {
|
||||
encode_iso_array(src, dst, len, result,
|
||||
tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
|
||||
cmp(len, zr);
|
||||
csel(result, result, zr, EQ);
|
||||
}
|
||||
|
||||
// get_thread() can be called anywhere inside generated code so we
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -770,6 +770,8 @@ public:
|
||||
void store_klass(Register dst, Register src);
|
||||
void cmp_klass(Register oop, Register trial_klass, Register tmp);
|
||||
|
||||
void load_mirror(Register dst, Register method);
|
||||
|
||||
void load_heap_oop(Register dst, Address src);
|
||||
|
||||
void load_heap_oop_not_null(Register dst, Address src);
|
||||
@ -1184,6 +1186,19 @@ public:
|
||||
Register result, Register cnt1,
|
||||
int elem_size, bool is_string);
|
||||
|
||||
void fill_words(Register base, Register cnt, Register value);
|
||||
void zero_words(Register base, Register cnt);
|
||||
void zero_words(Register base, u_int64_t cnt);
|
||||
|
||||
void byte_array_inflate(Register src, Register dst, Register len,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, Register tmp4);
|
||||
|
||||
void char_array_compress(Register src, Register dst, Register len,
|
||||
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
|
||||
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
|
||||
Register result);
|
||||
|
||||
void encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||
|
@ -2021,6 +2021,136 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
//
|
||||
// Generate stub for array fill. If "aligned" is true, the
|
||||
// "to" address is assumed to be heapword aligned.
|
||||
//
|
||||
// Arguments for generated stub:
|
||||
// to: c_rarg0
|
||||
// value: c_rarg1
|
||||
// count: c_rarg2 treated as signed
|
||||
//
|
||||
address generate_fill(BasicType t, bool aligned, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
|
||||
const Register to = c_rarg0; // source array address
|
||||
const Register value = c_rarg1; // value
|
||||
const Register count = c_rarg2; // elements count
|
||||
const Register cnt_words = c_rarg3; // temp register
|
||||
|
||||
__ enter();
|
||||
|
||||
Label L_fill_elements, L_exit1;
|
||||
|
||||
int shift = -1;
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
shift = 0;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ bfi(value, value, 8, 8); // 8 bit -> 16 bit
|
||||
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
case T_SHORT:
|
||||
shift = 1;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
case T_INT:
|
||||
shift = 2;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
// Align source address at 8 bytes address boundary.
|
||||
Label L_skip_align1, L_skip_align2, L_skip_align4;
|
||||
if (!aligned) {
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
// One byte misalignment happens only for byte arrays.
|
||||
__ tbz(to, 0, L_skip_align1);
|
||||
__ strb(value, Address(__ post(to, 1)));
|
||||
__ subw(count, count, 1);
|
||||
__ bind(L_skip_align1);
|
||||
// Fallthrough
|
||||
case T_SHORT:
|
||||
// Two bytes misalignment happens only for byte and short (char) arrays.
|
||||
__ tbz(to, 1, L_skip_align2);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ subw(count, count, 2 >> shift);
|
||||
__ bind(L_skip_align2);
|
||||
// Fallthrough
|
||||
case T_INT:
|
||||
// Align to 8 bytes, we know we are 4 byte aligned to start.
|
||||
__ tbz(to, 2, L_skip_align4);
|
||||
__ strw(value, Address(__ post(to, 4)));
|
||||
__ subw(count, count, 4 >> shift);
|
||||
__ bind(L_skip_align4);
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Fill large chunks
|
||||
//
|
||||
__ lsrw(cnt_words, count, 3 - shift); // number of words
|
||||
__ bfi(value, value, 32, 32); // 32 bit -> 64 bit
|
||||
__ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
|
||||
__ fill_words(to, cnt_words, value);
|
||||
|
||||
// Remaining count is less than 8 bytes. Fill it by a single store.
|
||||
// Note that the total length is no less than 8 bytes.
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
Label L_exit1;
|
||||
__ cbzw(count, L_exit1);
|
||||
__ add(to, to, count, Assembler::LSL, shift); // points to the end
|
||||
__ str(value, Address(to, -8)); // overwrite some elements
|
||||
__ bind(L_exit1);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
}
|
||||
|
||||
// Handle copies less than 8 bytes.
|
||||
Label L_fill_2, L_fill_4, L_exit2;
|
||||
__ bind(L_fill_elements);
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
__ tbz(count, 0, L_fill_2);
|
||||
__ strb(value, Address(__ post(to, 1)));
|
||||
__ bind(L_fill_2);
|
||||
__ tbz(count, 1, L_fill_4);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ bind(L_fill_4);
|
||||
__ tbz(count, 2, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
case T_SHORT:
|
||||
__ tbz(count, 0, L_fill_4);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ bind(L_fill_4);
|
||||
__ tbz(count, 1, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
case T_INT:
|
||||
__ cbzw(count, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
__ bind(L_exit2);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
return start;
|
||||
}
|
||||
|
||||
void generate_arraycopy_stubs() {
|
||||
address entry;
|
||||
address entry_jbyte_arraycopy;
|
||||
@ -2124,6 +2254,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
entry_jlong_arraycopy,
|
||||
entry_checkcast_arraycopy);
|
||||
|
||||
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
||||
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
||||
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
||||
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
||||
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||
}
|
||||
|
||||
void generate_math_stubs() { Unimplemented(); }
|
||||
|
@ -759,18 +759,13 @@ void TemplateInterpreterGenerator::lock_method() {
|
||||
|
||||
// get synchronization object
|
||||
{
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
Label done;
|
||||
__ ldrw(r0, access_flags);
|
||||
__ tst(r0, JVM_ACC_STATIC);
|
||||
// get receiver (assume this is frequent case)
|
||||
__ ldr(r0, Address(rlocals, Interpreter::local_offset_in_bytes(0)));
|
||||
__ br(Assembler::EQ, done);
|
||||
__ ldr(r0, Address(rmethod, Method::const_offset()));
|
||||
__ ldr(r0, Address(r0, ConstMethod::constants_offset()));
|
||||
__ ldr(r0, Address(r0,
|
||||
ConstantPool::pool_holder_offset_in_bytes()));
|
||||
__ ldr(r0, Address(r0, mirror_offset));
|
||||
__ load_mirror(r0, rmethod);
|
||||
|
||||
#ifdef ASSERT
|
||||
{
|
||||
@ -807,16 +802,16 @@ void TemplateInterpreterGenerator::lock_method() {
|
||||
void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
|
||||
// initialize fixed part of activation frame
|
||||
if (native_call) {
|
||||
__ sub(esp, sp, 12 * wordSize);
|
||||
__ sub(esp, sp, 14 * wordSize);
|
||||
__ mov(rbcp, zr);
|
||||
__ stp(esp, zr, Address(__ pre(sp, -12 * wordSize)));
|
||||
__ stp(esp, zr, Address(__ pre(sp, -14 * wordSize)));
|
||||
// add 2 zero-initialized slots for native calls
|
||||
__ stp(zr, zr, Address(sp, 10 * wordSize));
|
||||
__ stp(zr, zr, Address(sp, 12 * wordSize));
|
||||
} else {
|
||||
__ sub(esp, sp, 10 * wordSize);
|
||||
__ sub(esp, sp, 12 * wordSize);
|
||||
__ ldr(rscratch1, Address(rmethod, Method::const_offset())); // get ConstMethod
|
||||
__ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase
|
||||
__ stp(esp, rbcp, Address(__ pre(sp, -10 * wordSize)));
|
||||
__ stp(esp, rbcp, Address(__ pre(sp, -12 * wordSize)));
|
||||
}
|
||||
|
||||
if (ProfileInterpreter) {
|
||||
@ -825,22 +820,26 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
|
||||
__ cbz(rscratch1, method_data_continue);
|
||||
__ lea(rscratch1, Address(rscratch1, in_bytes(MethodData::data_offset())));
|
||||
__ bind(method_data_continue);
|
||||
__ stp(rscratch1, rmethod, Address(sp, 4 * wordSize)); // save Method* and mdp (method data pointer)
|
||||
__ stp(rscratch1, rmethod, Address(sp, 6 * wordSize)); // save Method* and mdp (method data pointer)
|
||||
} else {
|
||||
__ stp(zr, rmethod, Address(sp, 4 * wordSize)); // save Method* (no mdp)
|
||||
__ stp(zr, rmethod, Address(sp, 6 * wordSize)); // save Method* (no mdp)
|
||||
}
|
||||
|
||||
// Get mirror and store it in the frame as GC root for this Method*
|
||||
__ load_mirror(rscratch1, rmethod);
|
||||
__ stp(rscratch1, zr, Address(sp, 4 * wordSize));
|
||||
|
||||
__ ldr(rcpool, Address(rmethod, Method::const_offset()));
|
||||
__ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset()));
|
||||
__ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset_in_bytes()));
|
||||
__ stp(rlocals, rcpool, Address(sp, 2 * wordSize));
|
||||
|
||||
__ stp(rfp, lr, Address(sp, 8 * wordSize));
|
||||
__ lea(rfp, Address(sp, 8 * wordSize));
|
||||
__ stp(rfp, lr, Address(sp, 10 * wordSize));
|
||||
__ lea(rfp, Address(sp, 10 * wordSize));
|
||||
|
||||
// set sender sp
|
||||
// leave last_sp as null
|
||||
__ stp(zr, r13, Address(sp, 6 * wordSize));
|
||||
__ stp(zr, r13, Address(sp, 8 * wordSize));
|
||||
|
||||
// Move SP out of the way
|
||||
if (! native_call) {
|
||||
@ -1242,15 +1241,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
||||
// pass mirror handle if static call
|
||||
{
|
||||
Label L;
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
__ ldrw(t, Address(rmethod, Method::access_flags_offset()));
|
||||
__ tst(t, JVM_ACC_STATIC);
|
||||
__ br(Assembler::EQ, L);
|
||||
// get mirror
|
||||
__ ldr(t, Address(rmethod, Method::const_offset()));
|
||||
__ ldr(t, Address(t, ConstMethod::constants_offset()));
|
||||
__ ldr(t, Address(t, ConstantPool::pool_holder_offset_in_bytes()));
|
||||
__ ldr(t, Address(t, mirror_offset));
|
||||
__ load_mirror(t, rmethod);
|
||||
// copy mirror into activation frame
|
||||
__ str(t, Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize));
|
||||
// pass handle to mirror
|
||||
|
@ -229,6 +229,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
|
||||
switch (bc) {
|
||||
case Bytecodes::_fast_aputfield:
|
||||
case Bytecodes::_fast_bputfield:
|
||||
case Bytecodes::_fast_zputfield:
|
||||
case Bytecodes::_fast_cputfield:
|
||||
case Bytecodes::_fast_dputfield:
|
||||
case Bytecodes::_fast_fputfield:
|
||||
@ -1082,6 +1083,17 @@ void TemplateTable::bastore()
|
||||
// r1: index
|
||||
// r3: array
|
||||
index_check(r3, r1); // prefer index in r1
|
||||
|
||||
// Need to check whether array is boolean or byte
|
||||
// since both types share the bastore bytecode.
|
||||
__ load_klass(r2, r3);
|
||||
__ ldrw(r2, Address(r2, Klass::layout_helper_offset()));
|
||||
int diffbit_index = exact_log2(Klass::layout_helper_boolean_diffbit());
|
||||
Label L_skip;
|
||||
__ tbz(r2, diffbit_index, L_skip);
|
||||
__ andw(r0, r0, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1
|
||||
__ bind(L_skip);
|
||||
|
||||
__ lea(rscratch1, Address(r3, r1, Address::uxtw(0)));
|
||||
__ strb(r0, Address(rscratch1,
|
||||
arrayOopDesc::base_offset_in_bytes(T_BYTE)));
|
||||
@ -2193,6 +2205,13 @@ void TemplateTable::_return(TosState state)
|
||||
if (_desc->bytecode() == Bytecodes::_return)
|
||||
__ membar(MacroAssembler::StoreStore);
|
||||
|
||||
// Narrow result if state is itos but result type is smaller.
|
||||
// Need to narrow in the return bytecode rather than in generate_return_entry
|
||||
// since compiled code callers expect the result to already be narrowed.
|
||||
if (state == itos) {
|
||||
__ narrow(r0);
|
||||
}
|
||||
|
||||
__ remove_activation(state);
|
||||
__ ret(lr);
|
||||
}
|
||||
@ -2386,7 +2405,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
|
||||
|
||||
const Address field(obj, off);
|
||||
|
||||
Label Done, notByte, notInt, notShort, notChar,
|
||||
Label Done, notByte, notBool, notInt, notShort, notChar,
|
||||
notLong, notFloat, notObj, notDouble;
|
||||
|
||||
// x86 uses a shift and mask or wings it with a shift plus assert
|
||||
@ -2409,6 +2428,20 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
|
||||
__ b(Done);
|
||||
|
||||
__ bind(notByte);
|
||||
__ cmp(flags, ztos);
|
||||
__ br(Assembler::NE, notBool);
|
||||
|
||||
// ztos (same code as btos)
|
||||
__ ldrsb(r0, field);
|
||||
__ push(ztos);
|
||||
// Rewrite bytecode to be faster
|
||||
if (!is_static) {
|
||||
// use btos rewriting, no truncating to t/f bit is needed for getfield.
|
||||
patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
|
||||
}
|
||||
__ b(Done);
|
||||
|
||||
__ bind(notBool);
|
||||
__ cmp(flags, atos);
|
||||
__ br(Assembler::NE, notObj);
|
||||
// atos
|
||||
@ -2604,7 +2637,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
|
||||
// field address
|
||||
const Address field(obj, off);
|
||||
|
||||
Label notByte, notInt, notShort, notChar,
|
||||
Label notByte, notBool, notInt, notShort, notChar,
|
||||
notLong, notFloat, notObj, notDouble;
|
||||
|
||||
// x86 uses a shift and mask or wings it with a shift plus assert
|
||||
@ -2629,6 +2662,22 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
|
||||
}
|
||||
|
||||
__ bind(notByte);
|
||||
__ cmp(flags, ztos);
|
||||
__ br(Assembler::NE, notBool);
|
||||
|
||||
// ztos
|
||||
{
|
||||
__ pop(ztos);
|
||||
if (!is_static) pop_and_check_object(obj);
|
||||
__ andw(r0, r0, 0x1);
|
||||
__ strb(r0, field);
|
||||
if (!is_static) {
|
||||
patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no);
|
||||
}
|
||||
__ b(Done);
|
||||
}
|
||||
|
||||
__ bind(notBool);
|
||||
__ cmp(flags, atos);
|
||||
__ br(Assembler::NE, notObj);
|
||||
|
||||
@ -2783,6 +2832,7 @@ void TemplateTable::jvmti_post_fast_field_mod()
|
||||
switch (bytecode()) { // load values into the jvalue object
|
||||
case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
|
||||
case Bytecodes::_fast_bputfield: // fall through
|
||||
case Bytecodes::_fast_zputfield: // fall through
|
||||
case Bytecodes::_fast_sputfield: // fall through
|
||||
case Bytecodes::_fast_cputfield: // fall through
|
||||
case Bytecodes::_fast_iputfield: __ push_i(r0); break;
|
||||
@ -2808,6 +2858,7 @@ void TemplateTable::jvmti_post_fast_field_mod()
|
||||
switch (bytecode()) { // restore tos values
|
||||
case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
|
||||
case Bytecodes::_fast_bputfield: // fall through
|
||||
case Bytecodes::_fast_zputfield: // fall through
|
||||
case Bytecodes::_fast_sputfield: // fall through
|
||||
case Bytecodes::_fast_cputfield: // fall through
|
||||
case Bytecodes::_fast_iputfield: __ pop_i(r0); break;
|
||||
@ -2863,6 +2914,9 @@ void TemplateTable::fast_storefield(TosState state)
|
||||
case Bytecodes::_fast_iputfield:
|
||||
__ strw(r0, field);
|
||||
break;
|
||||
case Bytecodes::_fast_zputfield:
|
||||
__ andw(r0, r0, 0x1); // boolean is true if LSB is 1
|
||||
// fall through to bputfield
|
||||
case Bytecodes::_fast_bputfield:
|
||||
__ strb(r0, field);
|
||||
break;
|
||||
@ -2982,7 +3036,7 @@ void TemplateTable::fast_xaccess(TosState state)
|
||||
__ null_check(r0);
|
||||
switch (state) {
|
||||
case itos:
|
||||
__ ldr(r0, Address(r0, r1, Address::lsl(0)));
|
||||
__ ldrw(r0, Address(r0, r1, Address::lsl(0)));
|
||||
break;
|
||||
case atos:
|
||||
__ load_heap_oop(r0, Address(r0, r1, Address::lsl(0)));
|
||||
@ -3000,7 +3054,7 @@ void TemplateTable::fast_xaccess(TosState state)
|
||||
__ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
|
||||
ConstantPoolCacheEntry::flags_offset())));
|
||||
__ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
|
||||
__ membar(MacroAssembler::LoadLoad);
|
||||
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
||||
__ bind(notVolatile);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -152,6 +152,7 @@ void AbstractInterpreter::layout_activation(Method* method,
|
||||
intptr_t* top_frame_sp = is_top_frame ? sp : sp + (frame::abi_minframe_size - frame::abi_reg_args_size) / Interpreter::stackElementSize;
|
||||
|
||||
interpreter_frame->interpreter_frame_set_method(method);
|
||||
interpreter_frame->interpreter_frame_set_mirror(method->method_holder()->java_mirror());
|
||||
interpreter_frame->interpreter_frame_set_locals(locals_base);
|
||||
interpreter_frame->interpreter_frame_set_cpcache(method->constants()->cache());
|
||||
interpreter_frame->interpreter_frame_set_esp(esp);
|
||||
|
@ -2845,6 +2845,9 @@ void LIR_Assembler::membar_storeload() {
|
||||
__ membar(Assembler::StoreLoad);
|
||||
}
|
||||
|
||||
void LIR_Assembler::on_spin_wait() {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
|
||||
LIR_Address* addr = addr_opr->as_address_ptr();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -1056,7 +1056,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
||||
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
||||
Values* dims = x->dims();
|
||||
int i = dims->length();
|
||||
LIRItemList* items = new LIRItemList(dims->length(), NULL);
|
||||
LIRItemList* items = new LIRItemList(i, i, NULL);
|
||||
while (i-- > 0) {
|
||||
LIRItem* size = new LIRItem(dims->at(i), this);
|
||||
items->at_put(i, size);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -35,22 +35,6 @@
|
||||
#include "opto/matcher.hpp"
|
||||
#endif
|
||||
|
||||
// Release the CompiledICHolder* associated with this call site is there is one.
|
||||
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
if (is_icholder_entry(call->destination())) {
|
||||
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
|
||||
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
|
||||
}
|
||||
}
|
||||
|
||||
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
return is_icholder_entry(call->destination());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// A PPC CompiledStaticCall looks like this:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -261,6 +261,7 @@
|
||||
uint64_t ijava_reserved2; // Inserted for alignment.
|
||||
#endif
|
||||
uint64_t method;
|
||||
uint64_t mirror;
|
||||
uint64_t locals;
|
||||
uint64_t monitors;
|
||||
uint64_t cpoolCache;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -148,6 +148,11 @@ inline BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
|
||||
inline Method** frame::interpreter_frame_method_addr() const {
|
||||
return (Method**) &(get_ijava_state()->method);
|
||||
}
|
||||
|
||||
inline oop* frame::interpreter_frame_mirror_addr() const {
|
||||
return (oop*) &(get_ijava_state()->mirror);
|
||||
}
|
||||
|
||||
inline ConstantPoolCache** frame::interpreter_frame_cpoolcache_addr() const {
|
||||
return (ConstantPoolCache**) &(get_ijava_state()->cpoolCache);
|
||||
}
|
||||
|
@ -3118,6 +3118,14 @@ void MacroAssembler::load_klass(Register dst, Register src) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::load_mirror(Register mirror, Register method) {
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
ld(mirror, in_bytes(Method::const_offset()), method);
|
||||
ld(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
|
||||
ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
|
||||
ld(mirror, mirror_offset, mirror);
|
||||
}
|
||||
|
||||
// Clear Array
|
||||
// Kills both input registers. tmp == R0 is allowed.
|
||||
void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
|
||||
|
@ -647,6 +647,9 @@ class MacroAssembler: public Assembler {
|
||||
void load_klass(Register dst, Register src);
|
||||
void store_klass(Register dst_oop, Register klass, Register tmp = R0);
|
||||
void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
|
||||
|
||||
void load_mirror(Register mirror, Register method);
|
||||
|
||||
static int instr_size_for_decode_klass_not_null();
|
||||
void decode_klass_not_null(Register dst, Register src = noreg);
|
||||
Register encode_klass_not_null(Register dst, Register src = noreg);
|
||||
|
@ -2047,6 +2047,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
|
@ -871,7 +871,6 @@ void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratc
|
||||
|
||||
// Get synchronization object to Rscratch2.
|
||||
{
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
Label Lstatic;
|
||||
Label Ldone;
|
||||
|
||||
@ -883,10 +882,7 @@ void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratc
|
||||
__ b(Ldone);
|
||||
|
||||
__ bind(Lstatic); // Static case: Lock the java mirror
|
||||
__ ld(Robj_to_lock, in_bytes(Method::const_offset()), R19_method);
|
||||
__ ld(Robj_to_lock, in_bytes(ConstMethod::constants_offset()), Robj_to_lock);
|
||||
__ ld(Robj_to_lock, ConstantPool::pool_holder_offset_in_bytes(), Robj_to_lock);
|
||||
__ ld(Robj_to_lock, mirror_offset, Robj_to_lock);
|
||||
__ load_mirror(Robj_to_lock, R19_method);
|
||||
|
||||
__ bind(Ldone);
|
||||
__ verify_oop(Robj_to_lock);
|
||||
@ -1051,10 +1047,14 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
|
||||
__ addi(R26_monitor, R1_SP, - frame::ijava_state_size);
|
||||
__ addi(R15_esp, R26_monitor, - Interpreter::stackElementSize);
|
||||
|
||||
// Get mirror and store it in the frame as GC root for this Method*
|
||||
__ load_mirror(R12_scratch2, R19_method);
|
||||
|
||||
// Store values.
|
||||
// R15_esp, R14_bcp, R26_monitor, R28_mdx are saved at java calls
|
||||
// in InterpreterMacroAssembler::call_from_interpreter.
|
||||
__ std(R19_method, _ijava_state_neg(method), R1_SP);
|
||||
__ std(R12_scratch2, _ijava_state_neg(mirror), R1_SP);
|
||||
__ std(R21_sender_SP, _ijava_state_neg(sender_sp), R1_SP);
|
||||
__ std(R27_constPoolCache, _ijava_state_neg(cpoolCache), R1_SP);
|
||||
__ std(R18_locals, _ijava_state_neg(locals), R1_SP);
|
||||
@ -1319,21 +1319,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
||||
__ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT);
|
||||
__ bfalse(CCR0, method_is_not_static);
|
||||
|
||||
// constants = method->constants();
|
||||
__ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
|
||||
__ ld(R11_scratch1, in_bytes(ConstMethod::constants_offset()), R11_scratch1);
|
||||
// pool_holder = method->constants()->pool_holder();
|
||||
__ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(),
|
||||
R11_scratch1/*constants*/);
|
||||
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
|
||||
// mirror = pool_holder->klass_part()->java_mirror();
|
||||
__ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/);
|
||||
__ load_mirror(R12_sratch2, R19_method);
|
||||
// state->_native_mirror = mirror;
|
||||
|
||||
__ ld(R11_scratch1, 0, R1_SP);
|
||||
__ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1);
|
||||
__ std(R12_scratch2/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1);
|
||||
// R4_ARG2 = &state->_oop_temp;
|
||||
__ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp));
|
||||
BIND(method_is_not_static);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -281,11 +281,12 @@ void AbstractInterpreter::layout_activation(Method* method,
|
||||
// Llast_SP will be same as SP as there is no adapter space
|
||||
*interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS;
|
||||
*interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache();
|
||||
// save the mirror in the interpreter frame
|
||||
*interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
|
||||
#ifdef FAST_DISPATCH
|
||||
*interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table();
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ASSERT
|
||||
BasicObjectLock* mp = (BasicObjectLock*)monitors;
|
||||
|
||||
|
@ -3313,6 +3313,9 @@ void LIR_Assembler::membar_storeload() {
|
||||
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
|
||||
}
|
||||
|
||||
void LIR_Assembler::on_spin_wait() {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
// Pack two sequential registers containing 32 bit values
|
||||
// into a single 64 bit register.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1034,7 +1034,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
||||
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
||||
Values* dims = x->dims();
|
||||
int i = dims->length();
|
||||
LIRItemList* items = new LIRItemList(dims->length(), NULL);
|
||||
LIRItemList* items = new LIRItemList(i, i, NULL);
|
||||
while (i-- > 0) {
|
||||
LIRItem* size = new LIRItem(dims->at(i), this);
|
||||
items->at_put(i, size);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,22 +34,6 @@
|
||||
#include "opto/matcher.hpp"
|
||||
#endif
|
||||
|
||||
// Release the CompiledICHolder* associated with this call site is there is one.
|
||||
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
if (is_icholder_entry(call->destination())) {
|
||||
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
|
||||
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
|
||||
}
|
||||
}
|
||||
|
||||
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
return is_icholder_entry(call->destination());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#define __ _masm.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -783,7 +783,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) {
|
||||
if (is_interpreted_frame()) {
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_padding);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_oop_temp);
|
||||
|
||||
// esp, according to Lesp (e.g. not depending on bci), if seems valid
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -209,7 +209,8 @@
|
||||
// 2 words, also used to save float regs across calls to C
|
||||
interpreter_frame_d_scratch_fp_offset = -2,
|
||||
interpreter_frame_l_scratch_fp_offset = -4,
|
||||
interpreter_frame_padding_offset = -5, // for native calls only
|
||||
interpreter_frame_mirror_offset = -5, // keep interpreted method alive
|
||||
|
||||
interpreter_frame_oop_temp_offset = -6, // for native calls only
|
||||
interpreter_frame_vm_locals_fp_offset = -6, // should be same as above, and should be zero mod 8
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -163,6 +163,10 @@ inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) {
|
||||
*interpreter_frame_monitors_addr() = monitors;
|
||||
}
|
||||
|
||||
inline oop* frame::interpreter_frame_mirror_addr() const {
|
||||
return (oop*)(fp() + interpreter_frame_mirror_offset);
|
||||
}
|
||||
|
||||
// Constant pool cache
|
||||
|
||||
// where LcpoolCache is saved:
|
||||
|
@ -3257,12 +3257,12 @@ void MacroAssembler::eden_allocate(
|
||||
if (var_size_in_bytes->is_valid()) {
|
||||
// size is unknown at compile time
|
||||
cmp(free, var_size_in_bytes);
|
||||
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
|
||||
brx(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
|
||||
delayed()->add(obj, var_size_in_bytes, end);
|
||||
} else {
|
||||
// size is known at compile time
|
||||
cmp(free, con_size_in_bytes);
|
||||
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
|
||||
brx(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
|
||||
delayed()->add(obj, con_size_in_bytes, end);
|
||||
}
|
||||
// Compare obj with the value at top_addr; if still equal, swap the value of
|
||||
@ -3972,6 +3972,14 @@ void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_v
|
||||
card_table_write(bs->byte_map_base, tmp, store_addr);
|
||||
}
|
||||
|
||||
void MacroAssembler::load_mirror(Register mirror, Register method) {
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
ld_ptr(method, in_bytes(Method::const_offset()), mirror);
|
||||
ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
|
||||
ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
|
||||
ld_ptr(mirror, mirror_offset, mirror);
|
||||
}
|
||||
|
||||
void MacroAssembler::load_klass(Register src_oop, Register klass) {
|
||||
// The number of bytes in this code is used by
|
||||
// MachCallDynamicJavaNode::ret_addr_offset()
|
||||
|
@ -1012,6 +1012,8 @@ public:
|
||||
inline void ldbool(const Address& a, Register d);
|
||||
inline void movbool( bool boolconst, Register d);
|
||||
|
||||
void load_mirror(Register mirror, Register method);
|
||||
|
||||
// klass oop manipulations if compressed
|
||||
void load_klass(Register src_oop, Register klass);
|
||||
void store_klass(Register klass, Register dst_oop);
|
||||
|
@ -1904,6 +1904,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
|
@ -557,17 +557,12 @@ void TemplateInterpreterGenerator::lock_method() {
|
||||
|
||||
// get synchronization object to O0
|
||||
{ Label done;
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
__ btst(JVM_ACC_STATIC, O0);
|
||||
__ br( Assembler::zero, true, Assembler::pt, done);
|
||||
__ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case
|
||||
|
||||
__ ld_ptr( Lmethod, in_bytes(Method::const_offset()), O0);
|
||||
__ ld_ptr( O0, in_bytes(ConstMethod::constants_offset()), O0);
|
||||
__ ld_ptr( O0, ConstantPool::pool_holder_offset_in_bytes(), O0);
|
||||
|
||||
// lock the mirror, not the Klass*
|
||||
__ ld_ptr( O0, mirror_offset, O0);
|
||||
__ load_mirror(O0, Lmethod);
|
||||
|
||||
#ifdef ASSERT
|
||||
__ tst(O0);
|
||||
@ -881,6 +876,10 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
|
||||
__ add(Lbcp, in_bytes(ConstMethod::codes_offset()), Lbcp);
|
||||
}
|
||||
__ mov( G5_method, Lmethod); // set Lmethod
|
||||
// Get mirror and store it in the frame as GC root for this Method*
|
||||
Register mirror = LcpoolCache;
|
||||
__ load_mirror(mirror, Lmethod);
|
||||
__ st_ptr(mirror, FP, (frame::interpreter_frame_mirror_offset * wordSize) + STACK_BIAS);
|
||||
__ get_constant_pool_cache( LcpoolCache ); // set LcpoolCache
|
||||
__ sub(FP, rounded_vm_local_words * BytesPerWord, Lmonitors ); // set Lmonitors
|
||||
#ifdef _LP64
|
||||
@ -1297,12 +1296,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
||||
// get native function entry point(O0 is a good temp until the very end)
|
||||
__ delayed()->ld_ptr(Lmethod, in_bytes(Method::native_function_offset()), O0);
|
||||
// for static methods insert the mirror argument
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
|
||||
__ ld_ptr(Lmethod, Method:: const_offset(), O1);
|
||||
__ ld_ptr(O1, ConstMethod::constants_offset(), O1);
|
||||
__ ld_ptr(O1, ConstantPool::pool_holder_offset_in_bytes(), O1);
|
||||
__ ld_ptr(O1, mirror_offset, O1);
|
||||
__ load_mirror(O1, Lmethod);
|
||||
#ifdef ASSERT
|
||||
if (!PrintSignatureHandlers) // do not dirty the output with this
|
||||
{ Label L;
|
||||
|
@ -112,11 +112,14 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
|
||||
}
|
||||
if (AllocatePrefetchInstr == 1) {
|
||||
// Need a space at the end of TLAB for BIS since it
|
||||
// will fault when accessing memory outside of heap.
|
||||
// Need extra space at the end of TLAB for BIS, otherwise prefetching
|
||||
// instructions will fault (due to accessing memory outside of heap).
|
||||
// The amount of space is the max of the number of lines to
|
||||
// prefetch for array and for instance allocations. (Extra space must be
|
||||
// reserved to accomodate both types of allocations.)
|
||||
|
||||
// +1 for rounding up to next cache line, +1 to be safe
|
||||
int lines = AllocatePrefetchLines + 2;
|
||||
int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
|
||||
int step_size = AllocatePrefetchStepSize;
|
||||
int distance = AllocatePrefetchDistance;
|
||||
_reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -113,6 +113,8 @@ void AbstractInterpreter::layout_activation(Method* method,
|
||||
}
|
||||
*interpreter_frame->interpreter_frame_cache_addr() =
|
||||
method->constants()->cache();
|
||||
*interpreter_frame->interpreter_frame_mirror_addr() =
|
||||
method->method_holder()->java_mirror();
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
|
@ -1240,6 +1240,7 @@ void Assembler::addr_nop_8() {
|
||||
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -1250,6 +1251,7 @@ void Assembler::addsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_operand(dst, src);
|
||||
@ -1599,6 +1601,7 @@ void Assembler::comisd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x2F);
|
||||
emit_operand(dst, src);
|
||||
@ -1607,6 +1610,7 @@ void Assembler::comisd(XMMRegister dst, Address src) {
|
||||
void Assembler::comisd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x2F);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -1733,6 +1737,7 @@ void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5A);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -1743,6 +1748,7 @@ void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5A);
|
||||
emit_operand(dst, src);
|
||||
@ -1827,6 +1833,15 @@ void Assembler::cvttss2sil(Register dst, XMMRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xE6);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::decl(Address dst) {
|
||||
// Don't use it directly. Use MacroAssembler::decrement() instead.
|
||||
InstructionMark im(this);
|
||||
@ -1840,6 +1855,7 @@ void Assembler::divsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_operand(dst, src);
|
||||
@ -1848,6 +1864,7 @@ void Assembler::divsd(XMMRegister dst, Address src) {
|
||||
void Assembler::divsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -2122,6 +2139,7 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x28);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -2156,6 +2174,7 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
|
||||
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x12);
|
||||
emit_int8(0xC0 | encode);
|
||||
@ -2193,6 +2212,15 @@ void Assembler::kmovwl(Register dst, KRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::kmovwl(KRegister dst, Address src) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0x90);
|
||||
emit_operand((Register)dst, src);
|
||||
}
|
||||
|
||||
void Assembler::kmovdl(KRegister dst, Register src) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
@ -2251,6 +2279,14 @@ void Assembler::kmovql(Register dst, KRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::knotwl(KRegister dst, KRegister src) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0x44);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
// This instruction produces ZF or CF flags
|
||||
void Assembler::kortestbl(KRegister src1, KRegister src2) {
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
@ -2423,6 +2459,7 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
|
||||
void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
@ -2435,6 +2472,7 @@ void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
emit_operand(dst, src);
|
||||
@ -2447,6 +2485,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst);
|
||||
@ -2455,6 +2494,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
|
||||
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
@ -2466,6 +2506,7 @@ void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
@ -2478,6 +2519,7 @@ void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
@ -2509,8 +2551,8 @@ void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
|
||||
assert(src != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst);
|
||||
@ -2529,8 +2571,8 @@ void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
emit_operand(dst, src);
|
||||
@ -2541,8 +2583,8 @@ void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
|
||||
assert(src != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst);
|
||||
@ -2592,6 +2634,7 @@ void Assembler::movlpd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x12);
|
||||
emit_operand(dst, src);
|
||||
@ -2622,6 +2665,7 @@ void Assembler::movq(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7E);
|
||||
emit_operand(dst, src);
|
||||
@ -2632,6 +2676,7 @@ void Assembler::movq(Address dst, XMMRegister src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD6);
|
||||
emit_operand(src, dst);
|
||||
@ -2656,6 +2701,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb
|
||||
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x10);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -2666,6 +2712,7 @@ void Assembler::movsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x10);
|
||||
emit_operand(dst, src);
|
||||
@ -2676,6 +2723,7 @@ void Assembler::movsd(Address dst, XMMRegister src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x11);
|
||||
emit_operand(src, dst);
|
||||
@ -2799,6 +2847,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
@ -2807,6 +2856,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
|
||||
void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -3786,6 +3836,7 @@ void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4081,6 +4132,7 @@ void Assembler::smovl() {
|
||||
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x51);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4091,6 +4143,7 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x51);
|
||||
emit_operand(dst, src);
|
||||
@ -4166,6 +4219,7 @@ void Assembler::subl(Register dst, Register src) {
|
||||
void Assembler::subsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4176,6 +4230,7 @@ void Assembler::subsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_operand(dst, src);
|
||||
@ -4263,6 +4318,7 @@ void Assembler::ucomisd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x2E);
|
||||
emit_operand(dst, src);
|
||||
@ -4271,6 +4327,7 @@ void Assembler::ucomisd(XMMRegister dst, Address src) {
|
||||
void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x2E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4382,6 +4439,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_operand(dst, src);
|
||||
@ -4390,6 +4448,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4418,6 +4477,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_operand(dst, src);
|
||||
@ -4426,6 +4486,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4454,6 +4515,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
@ -4462,6 +4524,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4490,6 +4553,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_operand(dst, src);
|
||||
@ -4498,6 +4562,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4528,6 +4593,7 @@ void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
void Assembler::addpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4537,6 +4603,7 @@ void Assembler::addpd(XMMRegister dst, Address src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
@ -4555,6 +4622,7 @@ void Assembler::addps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4573,6 +4641,7 @@ void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_operand(dst, src);
|
||||
@ -4591,6 +4660,7 @@ void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::subpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4607,6 +4677,7 @@ void Assembler::subps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4625,6 +4696,7 @@ void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_operand(dst, src);
|
||||
@ -4643,6 +4715,7 @@ void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4653,6 +4726,7 @@ void Assembler::mulpd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
@ -4669,6 +4743,7 @@ void Assembler::mulps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4687,6 +4762,7 @@ void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
@ -4705,6 +4781,7 @@ void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::divpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4721,6 +4798,7 @@ void Assembler::divps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4739,6 +4817,7 @@ void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_operand(dst, src);
|
||||
@ -4757,6 +4836,7 @@ void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x51);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4767,6 +4847,7 @@ void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x51);
|
||||
emit_operand(dst, src);
|
||||
@ -4775,6 +4856,7 @@ void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
|
||||
void Assembler::andpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x54);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4803,6 +4885,7 @@ void Assembler::andpd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x54);
|
||||
emit_operand(dst, src);
|
||||
@ -4811,6 +4894,7 @@ void Assembler::andpd(XMMRegister dst, Address src) {
|
||||
void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x54);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4829,6 +4913,7 @@ void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x54);
|
||||
emit_operand(dst, src);
|
||||
@ -4847,6 +4932,7 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x15);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4855,6 +4941,7 @@ void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x14);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4863,6 +4950,7 @@ void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x57);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4881,6 +4969,7 @@ void Assembler::xorpd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x57);
|
||||
emit_operand(dst, src);
|
||||
@ -4899,6 +4988,7 @@ void Assembler::xorps(XMMRegister dst, Address src) {
|
||||
void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x57);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4917,6 +5007,7 @@ void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x57);
|
||||
emit_operand(dst, src);
|
||||
@ -4987,13 +5078,14 @@ void Assembler::paddd(XMMRegister dst, Address src) {
|
||||
void Assembler::paddq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD4);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
|
||||
assert(VM_Version::supports_sse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x01);
|
||||
@ -5001,7 +5093,7 @@ void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
|
||||
assert(VM_Version::supports_sse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x02);
|
||||
@ -5035,6 +5127,7 @@ void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
|
||||
void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD4);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5075,6 +5168,7 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD4);
|
||||
emit_operand(dst, src);
|
||||
@ -5106,6 +5200,7 @@ void Assembler::psubd(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::psubq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xFB);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5138,6 +5233,7 @@ void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
|
||||
void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xFB);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5178,6 +5274,7 @@ void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xFB);
|
||||
emit_operand(dst, src);
|
||||
@ -5216,8 +5313,9 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
|
||||
}
|
||||
|
||||
void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 2, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
assert(UseAVX > 2, "requires some form of EVEX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x40);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5244,10 +5342,11 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vecto
|
||||
}
|
||||
|
||||
void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
assert(UseAVX > 2, "requires some form of EVEX");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x40);
|
||||
emit_operand(dst, src);
|
||||
@ -5303,6 +5402,7 @@ void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
|
||||
void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xF3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5332,6 +5432,7 @@ void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_l
|
||||
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
// XMM6 is for /6 encoding: 66 0F 73 /6 ib
|
||||
int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x73);
|
||||
@ -5358,6 +5459,7 @@ void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xF3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5389,6 +5491,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
|
||||
// shifts 128 bit value in xmm register by number of bytes.
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
|
||||
int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x73);
|
||||
@ -5415,6 +5518,7 @@ void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
|
||||
void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5443,6 +5547,7 @@ void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_l
|
||||
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
|
||||
int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x73);
|
||||
@ -5469,6 +5574,7 @@ void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5578,6 +5684,7 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
|
||||
void Assembler::pandn(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xDF);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5867,9 +5974,9 @@ void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
}
|
||||
|
||||
void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x39);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5957,9 +6064,9 @@ void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
}
|
||||
|
||||
void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x19);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -6084,7 +6191,8 @@ void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
|
||||
// duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
|
||||
void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -6094,7 +6202,8 @@ void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
@ -6129,7 +6238,8 @@ void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
|
||||
// duplicate double precision data from src into programmed locations in dest : requires AVX512VL
|
||||
void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x19);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -6139,8 +6249,9 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x19);
|
||||
@ -6154,12 +6265,9 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
|
||||
void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
if (attributes.is_evex_instruction()) {
|
||||
emit_int8(0x7A);
|
||||
} else {
|
||||
emit_int8(0x78);
|
||||
}
|
||||
emit_int8(0x7A);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
@ -6167,12 +6275,9 @@ void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
|
||||
void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
if (attributes.is_evex_instruction()) {
|
||||
emit_int8(0x7B);
|
||||
} else {
|
||||
emit_int8(0x79);
|
||||
}
|
||||
emit_int8(0x7B);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
@ -6180,12 +6285,9 @@ void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
|
||||
void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
if (attributes.is_evex_instruction()) {
|
||||
emit_int8(0x7C);
|
||||
} else {
|
||||
emit_int8(0x58);
|
||||
}
|
||||
emit_int8(0x7C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
@ -6193,12 +6295,9 @@ void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
|
||||
void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
if (attributes.is_evex_instruction()) {
|
||||
emit_int8(0x7C);
|
||||
} else {
|
||||
emit_int8(0x59);
|
||||
}
|
||||
emit_int8(0x7C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
@ -6853,6 +6952,9 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
|
||||
attributes->set_is_evex_instruction();
|
||||
evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
|
||||
} else {
|
||||
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
|
||||
attributes->set_rex_vex_w(false);
|
||||
}
|
||||
vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
|
||||
}
|
||||
}
|
||||
@ -6912,6 +7014,9 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
|
||||
attributes->set_is_evex_instruction();
|
||||
evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
|
||||
} else {
|
||||
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
|
||||
attributes->set_rex_vex_w(false);
|
||||
}
|
||||
vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
|
||||
}
|
||||
|
||||
@ -6966,6 +7071,21 @@ void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
|
||||
emit_int8((unsigned char)(0xF0 & src2_enc<<4));
|
||||
}
|
||||
|
||||
void Assembler::shlxl(Register dst, Register src1, Register src2) {
|
||||
assert(VM_Version::supports_bmi2(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xF7);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::shlxq(Register dst, Register src1, Register src2) {
|
||||
assert(VM_Version::supports_bmi2(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xF7);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
|
||||
|
@ -1048,6 +1048,8 @@ private:
|
||||
void cvttss2sil(Register dst, XMMRegister src);
|
||||
void cvttss2siq(Register dst, XMMRegister src);
|
||||
|
||||
void cvttpd2dq(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Divide Scalar Double-Precision Floating-Point Values
|
||||
void divsd(XMMRegister dst, Address src);
|
||||
void divsd(XMMRegister dst, XMMRegister src);
|
||||
@ -1335,6 +1337,7 @@ private:
|
||||
void kmovbl(KRegister dst, Register src);
|
||||
void kmovbl(Register dst, KRegister src);
|
||||
void kmovwl(KRegister dst, Register src);
|
||||
void kmovwl(KRegister dst, Address src);
|
||||
void kmovwl(Register dst, KRegister src);
|
||||
void kmovdl(KRegister dst, Register src);
|
||||
void kmovdl(Register dst, KRegister src);
|
||||
@ -1344,6 +1347,8 @@ private:
|
||||
void kmovql(KRegister dst, Register src);
|
||||
void kmovql(Register dst, KRegister src);
|
||||
|
||||
void knotwl(KRegister dst, KRegister src);
|
||||
|
||||
void kortestbl(KRegister dst, KRegister src);
|
||||
void kortestwl(KRegister dst, KRegister src);
|
||||
void kortestdl(KRegister dst, KRegister src);
|
||||
@ -2050,6 +2055,8 @@ private:
|
||||
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
||||
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
|
||||
void shlxl(Register dst, Register src1, Register src2);
|
||||
void shlxq(Register dst, Register src1, Register src2);
|
||||
|
||||
protected:
|
||||
// Next instructions require address alignment 16 bytes SSE mode.
|
||||
@ -2075,6 +2082,7 @@ public:
|
||||
:
|
||||
_avx_vector_len(vector_len),
|
||||
_rex_vex_w(rex_vex_w),
|
||||
_rex_vex_w_reverted(false),
|
||||
_legacy_mode(legacy_mode),
|
||||
_no_reg_mask(no_reg_mask),
|
||||
_uses_vl(uses_vl),
|
||||
@ -2098,6 +2106,7 @@ public:
|
||||
private:
|
||||
int _avx_vector_len;
|
||||
bool _rex_vex_w;
|
||||
bool _rex_vex_w_reverted;
|
||||
bool _legacy_mode;
|
||||
bool _no_reg_mask;
|
||||
bool _uses_vl;
|
||||
@ -2114,6 +2123,7 @@ public:
|
||||
// query functions for field accessors
|
||||
int get_vector_len(void) const { return _avx_vector_len; }
|
||||
bool is_rex_vex_w(void) const { return _rex_vex_w; }
|
||||
bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
|
||||
bool is_legacy_mode(void) const { return _legacy_mode; }
|
||||
bool is_no_reg_mask(void) const { return _no_reg_mask; }
|
||||
bool uses_vl(void) const { return _uses_vl; }
|
||||
@ -2127,6 +2137,12 @@ public:
|
||||
// Set the vector len manually
|
||||
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
|
||||
|
||||
// Set revert rex_vex_w for avx encoding
|
||||
void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
|
||||
|
||||
// Set rex_vex_w based on state
|
||||
void set_rex_vex_w(bool state) { _rex_vex_w = state; }
|
||||
|
||||
// Set the instruction to be encoded in AVX mode
|
||||
void set_is_legacy_mode(void) { _legacy_mode = true; }
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -169,18 +169,18 @@ void FpuStackSim::clear() {
|
||||
|
||||
intArray* FpuStackSim::write_state() {
|
||||
intArray* res = new intArray(1 + FrameMap::nof_fpu_regs);
|
||||
(*res)[0] = stack_size();
|
||||
res->append(stack_size());
|
||||
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
||||
(*res)[1 + i] = regs_at(i);
|
||||
res->append(regs_at(i));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void FpuStackSim::read_state(intArray* fpu_stack_state) {
|
||||
_stack_size = (*fpu_stack_state)[0];
|
||||
_stack_size = fpu_stack_state->at(0);
|
||||
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
||||
set_regs_at(i, (*fpu_stack_state)[1 + i]);
|
||||
set_regs_at(i, fpu_stack_state->at(1 + i));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2365,13 +2365,8 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
|
||||
} else if (value->is_double_fpu()) {
|
||||
assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
|
||||
switch(code) {
|
||||
case lir_log10 : __ flog10() ; break;
|
||||
case lir_abs : __ fabs() ; break;
|
||||
case lir_sqrt : __ fsqrt(); break;
|
||||
case lir_tan :
|
||||
// Should consider not saving rbx, if not necessary
|
||||
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
|
||||
break;
|
||||
default : ShouldNotReachHere();
|
||||
}
|
||||
} else {
|
||||
@ -3886,6 +3881,10 @@ void LIR_Assembler::membar_storeload() {
|
||||
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
|
||||
}
|
||||
|
||||
void LIR_Assembler::on_spin_wait() {
|
||||
__ pause ();
|
||||
}
|
||||
|
||||
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
|
||||
assert(result_reg->is_register(), "check");
|
||||
#ifdef _LP64
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -813,7 +813,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||
|
||||
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
|
||||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
|
||||
x->id() == vmIntrinsics::_dsin) {
|
||||
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
|
||||
x->id() == vmIntrinsics::_dlog10) {
|
||||
do_LibmIntrinsic(x);
|
||||
return;
|
||||
}
|
||||
@ -821,58 +822,17 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||
LIRItem value(x->argument_at(0), this);
|
||||
|
||||
bool use_fpu = false;
|
||||
if (UseSSE >= 2) {
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dtan:
|
||||
case vmIntrinsics::_dlog10:
|
||||
use_fpu = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (UseSSE < 2) {
|
||||
value.set_destroys_register();
|
||||
}
|
||||
|
||||
value.load_item();
|
||||
|
||||
LIR_Opr calc_input = value.result();
|
||||
LIR_Opr calc_input2 = NULL;
|
||||
if (x->id() == vmIntrinsics::_dpow) {
|
||||
LIRItem extra_arg(x->argument_at(1), this);
|
||||
if (UseSSE < 2) {
|
||||
extra_arg.set_destroys_register();
|
||||
}
|
||||
extra_arg.load_item();
|
||||
calc_input2 = extra_arg.result();
|
||||
}
|
||||
LIR_Opr calc_result = rlock_result(x);
|
||||
|
||||
// sin, cos, pow and exp need two free fpu stack slots, so register
|
||||
// two temporary operands
|
||||
LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
|
||||
LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
|
||||
|
||||
if (use_fpu) {
|
||||
LIR_Opr tmp = FrameMap::fpu0_double_opr;
|
||||
int tmp_start = 1;
|
||||
if (calc_input2 != NULL) {
|
||||
__ move(calc_input2, tmp);
|
||||
tmp_start = 2;
|
||||
calc_input2 = tmp;
|
||||
}
|
||||
__ move(calc_input, tmp);
|
||||
|
||||
calc_input = tmp;
|
||||
calc_result = tmp;
|
||||
|
||||
tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
|
||||
tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
|
||||
}
|
||||
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
|
||||
case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
|
||||
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
|
||||
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
@ -913,21 +873,28 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
result_reg = tmp;
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dexp:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog10:
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
|
||||
@ -947,18 +914,44 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dtan:
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
#else
|
||||
switch (x->id()) {
|
||||
case vmIntrinsics::_dexp:
|
||||
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog:
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog10:
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dsin:
|
||||
if (StubRoutines::dsin() != NULL) {
|
||||
@ -974,6 +967,13 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dtan:
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
#endif // _LP64
|
||||
@ -1261,7 +1261,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
||||
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
||||
Values* dims = x->dims();
|
||||
int i = dims->length();
|
||||
LIRItemList* items = new LIRItemList(dims->length(), NULL);
|
||||
LIRItemList* items = new LIRItemList(i, i, NULL);
|
||||
while (i-- > 0) {
|
||||
LIRItem* size = new LIRItem(dims->at(i), this);
|
||||
items->at_put(i, size);
|
||||
|
@ -786,58 +786,6 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
|
||||
break;
|
||||
}
|
||||
|
||||
case lir_log10: {
|
||||
// log and log10 need one temporary fpu stack slot, so
|
||||
// there is one temporary registers stored in temp of the
|
||||
// operation. the stack allocator must guarantee that the stack
|
||||
// slots are really free, otherwise there might be a stack
|
||||
// overflow.
|
||||
assert(right->is_illegal(), "must be");
|
||||
assert(left->is_fpu_register(), "must be");
|
||||
assert(res->is_fpu_register(), "must be");
|
||||
assert(op2->tmp1_opr()->is_fpu_register(), "must be");
|
||||
|
||||
insert_free_if_dead(op2->tmp1_opr());
|
||||
insert_free_if_dead(res, left);
|
||||
insert_exchange(left);
|
||||
do_rename(left, res);
|
||||
|
||||
new_left = to_fpu_stack_top(res);
|
||||
new_res = new_left;
|
||||
|
||||
op2->set_fpu_stack_size(sim()->stack_size());
|
||||
assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case lir_tan: {
|
||||
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
|
||||
// registers (stored in right and temp of the operation).
|
||||
// the stack allocator must guarantee that the stack slots are really free,
|
||||
// otherwise there might be a stack overflow.
|
||||
assert(left->is_fpu_register(), "must be");
|
||||
assert(res->is_fpu_register(), "must be");
|
||||
// assert(left->is_last_use(), "old value gets destroyed");
|
||||
assert(right->is_fpu_register(), "right is used as the first temporary register");
|
||||
assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
|
||||
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
|
||||
|
||||
insert_free_if_dead(right);
|
||||
insert_free_if_dead(op2->tmp1_opr());
|
||||
|
||||
insert_free_if_dead(res, left);
|
||||
insert_exchange(left);
|
||||
do_rename(left, res);
|
||||
|
||||
new_left = to_fpu_stack_top(res);
|
||||
new_res = new_left;
|
||||
|
||||
op2->set_fpu_stack_size(sim()->stack_size());
|
||||
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
assert(false, "missed a fpu-operation");
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -31,22 +31,6 @@
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/safepoint.hpp"
|
||||
|
||||
// Release the CompiledICHolder* associated with this call site is there is one.
|
||||
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
if (is_icholder_entry(call->destination())) {
|
||||
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
|
||||
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
|
||||
}
|
||||
}
|
||||
|
||||
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
return is_icholder_entry(call->destination());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#define __ _masm.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -640,6 +640,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) {
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_method);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_cache);
|
||||
DESCRIBE_FP_OFFSET(interpreter_frame_locals);
|
||||
|
@ -70,7 +70,8 @@
|
||||
// outgoing sp before a call to an invoked method
|
||||
interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
|
||||
interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
|
||||
interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1,
|
||||
interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1,
|
||||
interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1,
|
||||
interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1,
|
||||
interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
|
||||
interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -184,6 +184,12 @@ inline Method** frame::interpreter_frame_method_addr() const {
|
||||
return (Method**)addr_at(interpreter_frame_method_offset);
|
||||
}
|
||||
|
||||
// Mirror
|
||||
|
||||
inline oop* frame::interpreter_frame_mirror_addr() const {
|
||||
return (oop*)addr_at(interpreter_frame_mirror_offset);
|
||||
}
|
||||
|
||||
// top of expression stack
|
||||
inline intptr_t* frame::interpreter_frame_tos_address() const {
|
||||
intptr_t* last_sp = interpreter_frame_last_sp();
|
||||
|
@ -194,9 +194,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
product(bool, UseBMI2Instructions, false, \
|
||||
"Use BMI2 instructions") \
|
||||
\
|
||||
diagnostic(bool, UseLibmSinIntrinsic, true, \
|
||||
"Use Libm Sin Intrinsic") \
|
||||
\
|
||||
diagnostic(bool, UseLibmCosIntrinsic, true, \
|
||||
"Use Libm Cos Intrinsic")
|
||||
diagnostic(bool, UseLibmIntrinsic, true, \
|
||||
"Use Libm Intrinsics")
|
||||
#endif // CPU_X86_VM_GLOBALS_X86_HPP
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -3399,6 +3399,18 @@ void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::setvectmask(Register dst, Register src) {
|
||||
Assembler::movl(dst, 1);
|
||||
Assembler::shlxl(dst, dst, src);
|
||||
Assembler::decl(dst);
|
||||
Assembler::kmovdl(k1, dst);
|
||||
Assembler::movl(dst, src);
|
||||
}
|
||||
|
||||
void MacroAssembler::restorevectmask() {
|
||||
Assembler::knotwl(k1, k0);
|
||||
}
|
||||
|
||||
void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
|
||||
if (reachable(src)) {
|
||||
if (UseXmmLoadAndClearUpper) {
|
||||
@ -6693,6 +6705,14 @@ void MacroAssembler::restore_cpu_control_state_after_jni() {
|
||||
#endif // _LP64
|
||||
}
|
||||
|
||||
void MacroAssembler::load_mirror(Register mirror, Register method) {
|
||||
// get mirror
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
movptr(mirror, Address(method, Method::const_offset()));
|
||||
movptr(mirror, Address(mirror, ConstMethod::constants_offset()));
|
||||
movptr(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
|
||||
movptr(mirror, Address(mirror, mirror_offset));
|
||||
}
|
||||
|
||||
void MacroAssembler::load_klass(Register dst, Register src) {
|
||||
#ifdef _LP64
|
||||
|
@ -156,6 +156,10 @@ class MacroAssembler: public Assembler {
|
||||
void incrementq(Register reg, int value = 1);
|
||||
void incrementq(Address dst, int value = 1);
|
||||
|
||||
// special instructions for EVEX
|
||||
void setvectmask(Register dst, Register src);
|
||||
void restorevectmask();
|
||||
|
||||
// Support optimal SSE move instructions.
|
||||
void movflt(XMMRegister dst, XMMRegister src) {
|
||||
if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
|
||||
@ -319,6 +323,8 @@ class MacroAssembler: public Assembler {
|
||||
void movbool(Address dst, Register src);
|
||||
void testbool(Register dst);
|
||||
|
||||
void load_mirror(Register mirror, Register method);
|
||||
|
||||
// oop manipulations
|
||||
void load_klass(Register dst, Register src);
|
||||
void store_klass(Register dst, Register src);
|
||||
@ -928,6 +934,10 @@ class MacroAssembler: public Assembler {
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
|
||||
|
||||
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register r11);
|
||||
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
|
||||
@ -941,11 +951,19 @@ class MacroAssembler: public Assembler {
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1,
|
||||
Register tmp2, Register tmp3, Register tmp4);
|
||||
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1,
|
||||
Register tmp2, Register tmp3, Register tmp4);
|
||||
#else
|
||||
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1);
|
||||
|
||||
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp);
|
||||
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx, Register tmp);
|
||||
@ -964,6 +982,14 @@ class MacroAssembler: public Assembler {
|
||||
|
||||
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
|
||||
Register esi, Register edi, Register ebp, Register esp);
|
||||
|
||||
void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
|
||||
Register edx, Register ebx, Register esi, Register edi,
|
||||
Register ebp, Register esp);
|
||||
|
||||
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp);
|
||||
#endif
|
||||
|
||||
void increase_precision();
|
||||
|
889
hotspot/src/cpu/x86/vm/macroAssembler_x86_cos.cpp
Normal file
889
hotspot/src/cpu/x86/vm/macroAssembler_x86_cos.cpp
Normal file
@ -0,0 +1,889 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - COS()
|
||||
// ---------------------
|
||||
//
|
||||
// 1. RANGE REDUCTION
|
||||
//
|
||||
// We perform an initial range reduction from X to r with
|
||||
//
|
||||
// X =~= N * pi/32 + r
|
||||
//
|
||||
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
|
||||
// where |N| <= 932560. Beyond this, the range reduction is
|
||||
// insufficiently accurate. For extremely small inputs,
|
||||
// denormalization can occur internally, impacting performance.
|
||||
// This means that the main path is actually only taken for
|
||||
// 2^-252 <= |X| < 90112.
|
||||
//
|
||||
// To avoid branches, we perform the range reduction to full
|
||||
// accuracy each time.
|
||||
//
|
||||
// X - N * (P_1 + P_2 + P_3)
|
||||
//
|
||||
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
|
||||
// is exact) and P_3 is a 53-bit number. Together, these
|
||||
// approximate pi well enough for all cases in the restricted
|
||||
// range.
|
||||
//
|
||||
// The main reduction sequence is:
|
||||
//
|
||||
// y = 32/pi * x
|
||||
// N = integer(y)
|
||||
// (computed by adding and subtracting off SHIFTER)
|
||||
//
|
||||
// m_1 = N * P_1
|
||||
// m_2 = N * P_2
|
||||
// r_1 = x - m_1
|
||||
// r = r_1 - m_2
|
||||
// (this r can be used for most of the calculation)
|
||||
//
|
||||
// c_1 = r_1 - r
|
||||
// m_3 = N * P_3
|
||||
// c_2 = c_1 - m_2
|
||||
// c = c_2 - m_3
|
||||
//
|
||||
// 2. MAIN ALGORITHM
|
||||
//
|
||||
// The algorithm uses a table lookup based on B = M * pi / 32
|
||||
// where M = N mod 64. The stored values are:
|
||||
// sigma closest power of 2 to cos(B)
|
||||
// C_hl 53-bit cos(B) - sigma
|
||||
// S_hi + S_lo 2 * 53-bit sin(B)
|
||||
//
|
||||
// The computation is organized as follows:
|
||||
//
|
||||
// sin(B + r + c) = [sin(B) + sigma * r] +
|
||||
// r * (cos(B) - sigma) +
|
||||
// sin(B) * [cos(r + c) - 1] +
|
||||
// cos(B) * [sin(r + c) - r]
|
||||
//
|
||||
// which is approximately:
|
||||
//
|
||||
// [S_hi + sigma * r] +
|
||||
// C_hl * r +
|
||||
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
|
||||
// (C_hl + sigma) * [(sin(r) - r) + c]
|
||||
//
|
||||
// and this is what is actually computed. We separate this sum
|
||||
// into four parts:
|
||||
//
|
||||
// hi + med + pols + corr
|
||||
//
|
||||
// where
|
||||
//
|
||||
// hi = S_hi + sigma r
|
||||
// med = C_hl * r
|
||||
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
|
||||
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
|
||||
//
|
||||
// 3. POLYNOMIAL
|
||||
//
|
||||
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
|
||||
// (sin(r) - r) can be rearranged freely, since it is quite
|
||||
// small, so we exploit parallelism to the fullest.
|
||||
//
|
||||
// psc4 = SC_4 * r_1
|
||||
// msc4 = psc4 * r
|
||||
// r2 = r * r
|
||||
// msc2 = SC_2 * r2
|
||||
// r4 = r2 * r2
|
||||
// psc3 = SC_3 + msc4
|
||||
// psc1 = SC_1 + msc2
|
||||
// msc3 = r4 * psc3
|
||||
// sincospols = psc1 + msc3
|
||||
// pols = sincospols *
|
||||
// <S_hi * r^2 | (C_hl + sigma) * r^3>
|
||||
//
|
||||
// 4. CORRECTION TERM
|
||||
//
|
||||
// This is where the "c" component of the range reduction is
|
||||
// taken into account; recall that just "r" is used for most of
|
||||
// the calculation.
|
||||
//
|
||||
// -c = m_3 - c_2
|
||||
// -d = S_hi * r - (C_hl + sigma)
|
||||
// corr = -c * -d + S_lo
|
||||
//
|
||||
// 5. COMPENSATED SUMMATIONS
|
||||
//
|
||||
// The two successive compensated summations add up the high
|
||||
// and medium parts, leaving just the low parts to add up at
|
||||
// the end.
|
||||
//
|
||||
// rs = sigma * r
|
||||
// res_int = S_hi + rs
|
||||
// k_0 = S_hi - res_int
|
||||
// k_2 = k_0 + rs
|
||||
// med = C_hl * r
|
||||
// res_hi = res_int + med
|
||||
// k_1 = res_int - res_hi
|
||||
// k_3 = k_1 + med
|
||||
//
|
||||
// 6. FINAL SUMMATION
|
||||
//
|
||||
// We now add up all the small parts:
|
||||
//
|
||||
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
|
||||
//
|
||||
// Now the overall result is just:
|
||||
//
|
||||
// res_hi + res_lo
|
||||
//
|
||||
// 7. SMALL ARGUMENTS
|
||||
//
|
||||
// Inputs with |X| < 2^-252 are treated specially as
|
||||
// 1 - |x|.
|
||||
//
|
||||
// Special cases:
|
||||
// cos(NaN) = quiet NaN, and raise invalid exception
|
||||
// cos(INF) = NaN and raise invalid exception
|
||||
// cos(0) = 1
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(8) juint _ONE[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) {
|
||||
|
||||
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
|
||||
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
|
||||
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
|
||||
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start;
|
||||
|
||||
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
|
||||
|
||||
address ONEHALF = StubRoutines::x86::_ONEHALF_addr();
|
||||
address P_2 = StubRoutines::x86::_P_2_addr();
|
||||
address SC_4 = StubRoutines::x86::_SC_4_addr();
|
||||
address Ctable = StubRoutines::x86::_Ctable_addr();
|
||||
address SC_2 = StubRoutines::x86::_SC_2_addr();
|
||||
address SC_3 = StubRoutines::x86::_SC_3_addr();
|
||||
address SC_1 = StubRoutines::x86::_SC_1_addr();
|
||||
address PI_INV_TABLE = StubRoutines::x86::_PI_INV_TABLE_addr();
|
||||
address PI_4 = (address)StubRoutines::x86::_PI_4_addr();
|
||||
address PI32INV = (address)StubRoutines::x86::_PI32INV_addr();
|
||||
address SIGN_MASK = (address)StubRoutines::x86::_SIGN_MASK_addr();
|
||||
address P_1 = (address)StubRoutines::x86::_P_1_addr();
|
||||
address P_3 = (address)StubRoutines::x86::_P_3_addr();
|
||||
address ONE = (address)_ONE;
|
||||
address NEG_ZERO = (address)StubRoutines::x86::_NEG_ZERO_addr();
|
||||
|
||||
bind(start);
|
||||
push(rbx);
|
||||
subq(rsp, 16);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
|
||||
andl(eax, 2147418112);
|
||||
subl(eax, 808452096);
|
||||
cmpl(eax, 281346048);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
|
||||
mulsd(xmm1, xmm0);
|
||||
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2sil(edx, xmm1);
|
||||
cvtsi2sdl(xmm1, edx);
|
||||
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
|
||||
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
addq(rdx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
andq(rdx, 63);
|
||||
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
|
||||
lea(rax, ExternalAddress(Ctable));
|
||||
shlq(rdx, 5);
|
||||
addq(rax, rdx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
|
||||
subsd(xmm4, xmm3);
|
||||
movq(xmm7, Address(rax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(rax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movq(xmm3, Address(rax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
|
||||
mulsd(xmm4, Address(rax, 0));
|
||||
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulpd(xmm5, xmm0);
|
||||
movdqu(xmm0, xmm3);
|
||||
addsd(xmm3, Address(rax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movdqu(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movq(xmm5, Address(rax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(rax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm0, xmm5);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm3);
|
||||
addsd(xmm0, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
addsd(xmm0, xmm6);
|
||||
addsd(xmm0, xmm4);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_1);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
|
||||
subsd(xmm1, xmm0);
|
||||
movdqu(xmm0, xmm1);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_1);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32752);
|
||||
cmpl(eax, 32752);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_2_0_1);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
subl(ecx, 16224);
|
||||
shrl(ecx, 7);
|
||||
andl(ecx, 65532);
|
||||
lea(r11, ExternalAddress(PI_INV_TABLE));
|
||||
addq(rcx, r11);
|
||||
movdq(rax, xmm0);
|
||||
movl(r10, Address(rcx, 20));
|
||||
movl(r8, Address(rcx, 24));
|
||||
movl(edx, eax);
|
||||
shrq(rax, 21);
|
||||
orl(eax, INT_MIN);
|
||||
shrl(eax, 11);
|
||||
movl(r9, r10);
|
||||
imulq(r10, rdx);
|
||||
imulq(r9, rax);
|
||||
imulq(r8, rax);
|
||||
movl(rsi, Address(rcx, 16));
|
||||
movl(rdi, Address(rcx, 12));
|
||||
movl(r11, r10);
|
||||
shrq(r10, 32);
|
||||
addq(r9, r10);
|
||||
addq(r11, r8);
|
||||
movl(r8, r11);
|
||||
shrq(r11, 32);
|
||||
addq(r9, r11);
|
||||
movl(r10, rsi);
|
||||
imulq(rsi, rdx);
|
||||
imulq(r10, rax);
|
||||
movl(r11, rdi);
|
||||
imulq(rdi, rdx);
|
||||
movl(rbx, rsi);
|
||||
shrq(rsi, 32);
|
||||
addq(r9, rbx);
|
||||
movl(rbx, r9);
|
||||
shrq(r9, 32);
|
||||
addq(r10, rsi);
|
||||
addq(r10, r9);
|
||||
shlq(rbx, 32);
|
||||
orq(r8, rbx);
|
||||
imulq(r11, rax);
|
||||
movl(r9, Address(rcx, 8));
|
||||
movl(rsi, Address(rcx, 4));
|
||||
movl(rbx, rdi);
|
||||
shrq(rdi, 32);
|
||||
addq(r10, rbx);
|
||||
movl(rbx, r10);
|
||||
shrq(r10, 32);
|
||||
addq(r11, rdi);
|
||||
addq(r11, r10);
|
||||
movq(rdi, r9);
|
||||
imulq(r9, rdx);
|
||||
imulq(rdi, rax);
|
||||
movl(r10, r9);
|
||||
shrq(r9, 32);
|
||||
addq(r11, r10);
|
||||
movl(r10, r11);
|
||||
shrq(r11, 32);
|
||||
addq(rdi, r9);
|
||||
addq(rdi, r11);
|
||||
movq(r9, rsi);
|
||||
imulq(rsi, rdx);
|
||||
imulq(r9, rax);
|
||||
shlq(r10, 32);
|
||||
orq(r10, rbx);
|
||||
movl(eax, Address(rcx, 0));
|
||||
movl(r11, rsi);
|
||||
shrq(rsi, 32);
|
||||
addq(rdi, r11);
|
||||
movl(r11, rdi);
|
||||
shrq(rdi, 32);
|
||||
addq(r9, rsi);
|
||||
addq(r9, rdi);
|
||||
imulq(rdx, rax);
|
||||
pextrw(rbx, xmm0, 3);
|
||||
lea(rdi, ExternalAddress(PI_INV_TABLE));
|
||||
subq(rcx, rdi);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, 19);
|
||||
movl(rsi, 32768);
|
||||
andl(rsi, rbx);
|
||||
shrl(rbx, 4);
|
||||
andl(rbx, 2047);
|
||||
subl(rbx, 1023);
|
||||
subl(ecx, rbx);
|
||||
addq(r9, rdx);
|
||||
movl(edx, ecx);
|
||||
addl(edx, 32);
|
||||
cmpl(ecx, 1);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_3_0_1);
|
||||
negl(ecx);
|
||||
addl(ecx, 29);
|
||||
shll(r9);
|
||||
movl(rdi, r9);
|
||||
andl(r9, 536870911);
|
||||
testl(r9, 268435456);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 0);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_1);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_1);
|
||||
bsrq(r11, r9);
|
||||
movl(ecx, 29);
|
||||
subl(ecx, r11);
|
||||
jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1);
|
||||
shlq(r9);
|
||||
movq(rax, r10);
|
||||
shlq(r10);
|
||||
addl(edx, ecx);
|
||||
negl(ecx);
|
||||
addl(ecx, 64);
|
||||
shrq(rax);
|
||||
shrq(r8);
|
||||
orq(r9, rax);
|
||||
orq(r10, r8);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_1);
|
||||
cvtsi2sdq(xmm0, r9);
|
||||
shrq(r10, 1);
|
||||
cvtsi2sdq(xmm3, r10);
|
||||
xorpd(xmm4, xmm4);
|
||||
shll(edx, 4);
|
||||
negl(edx);
|
||||
addl(edx, 16368);
|
||||
orl(edx, rsi);
|
||||
xorl(edx, rbx);
|
||||
pinsrw(xmm4, edx, 3);
|
||||
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
|
||||
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
|
||||
xorpd(xmm5, xmm5);
|
||||
subl(edx, 1008);
|
||||
pinsrw(xmm5, edx, 3);
|
||||
mulsd(xmm0, xmm4);
|
||||
shll(rsi, 16);
|
||||
sarl(rsi, 31);
|
||||
mulsd(xmm3, xmm5);
|
||||
movdqu(xmm1, xmm0);
|
||||
mulsd(xmm0, xmm2);
|
||||
shrl(rdi, 29);
|
||||
addsd(xmm1, xmm3);
|
||||
mulsd(xmm3, xmm2);
|
||||
addl(rdi, rsi);
|
||||
xorl(rdi, rsi);
|
||||
mulsd(xmm6, xmm1);
|
||||
movl(eax, rdi);
|
||||
addsd(xmm6, xmm3);
|
||||
movdqu(xmm2, xmm0);
|
||||
addsd(xmm0, xmm6);
|
||||
subsd(xmm2, xmm0);
|
||||
addsd(xmm6, xmm2);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_1);
|
||||
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2siq(rdx, xmm1);
|
||||
cvtsi2sdq(xmm1, rdx);
|
||||
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
|
||||
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
shll(eax, 3);
|
||||
addl(edx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
addl(edx, eax);
|
||||
andl(edx, 63);
|
||||
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
|
||||
lea(rax, ExternalAddress(Ctable));
|
||||
shll(edx, 5);
|
||||
addq(rax, rdx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
|
||||
subsd(xmm4, xmm3);
|
||||
movq(xmm7, Address(rax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(rax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movq(xmm3, Address(rax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
subsd(xmm1, xmm6);
|
||||
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
|
||||
mulsd(xmm4, Address(rax, 0));
|
||||
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulpd(xmm5, xmm0);
|
||||
movdqu(xmm0, xmm3);
|
||||
addsd(xmm3, Address(rax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movdqu(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movq(xmm5, Address(rax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(rax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm5, xmm0);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm1, xmm3);
|
||||
addsd(xmm1, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
movdqu(xmm0, xmm4);
|
||||
addsd(xmm1, xmm6);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_1);
|
||||
addl(edx, 64);
|
||||
movq(r9, r10);
|
||||
movq(r10, r8);
|
||||
movl(r8, 0);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
|
||||
addl(edx, 64);
|
||||
movq(r9, r10);
|
||||
movq(r10, r8);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
|
||||
xorpd(xmm0, xmm0);
|
||||
xorpd(xmm6, xmm6);
|
||||
jmp(L_2TAG_PACKET_11_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_1);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_10_0_1);
|
||||
negl(ecx);
|
||||
shrq(r10);
|
||||
movq(rax, r9);
|
||||
shrq(r9);
|
||||
subl(edx, ecx);
|
||||
negl(ecx);
|
||||
addl(ecx, 64);
|
||||
shlq(rax);
|
||||
orq(r10, rax);
|
||||
jmp(L_2TAG_PACKET_10_0_1);
|
||||
bind(L_2TAG_PACKET_3_0_1);
|
||||
negl(ecx);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
shlq(r9);
|
||||
movq(rdi, r9);
|
||||
testl(r9, INT_MIN);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 0);
|
||||
shrq(rdi, 3);
|
||||
jmp(L_2TAG_PACKET_6_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 536870912);
|
||||
shrl(rbx);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
shlq(rbx, 32);
|
||||
addl(rdi, 536870912);
|
||||
movl(rcx, 0);
|
||||
movl(r11, 0);
|
||||
subq(rcx, r8);
|
||||
sbbq(r11, r10);
|
||||
sbbq(rbx, r9);
|
||||
movq(r8, rcx);
|
||||
movq(r10, r11);
|
||||
movq(r9, rbx);
|
||||
movl(rbx, 32768);
|
||||
jmp(L_2TAG_PACKET_5_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_12_0_1);
|
||||
shrl(r9);
|
||||
mov64(rbx, 0x100000000);
|
||||
shrq(rbx);
|
||||
movl(rcx, 0);
|
||||
movl(r11, 0);
|
||||
subq(rcx, r8);
|
||||
sbbq(r11, r10);
|
||||
sbbq(rbx, r9);
|
||||
movq(r8, rcx);
|
||||
movq(r10, r11);
|
||||
movq(r9, rbx);
|
||||
movl(rbx, 32768);
|
||||
shrq(rdi, 3);
|
||||
addl(rdi, 536870912);
|
||||
jmp(L_2TAG_PACKET_6_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_1);
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
|
||||
movq(Address(rsp, 0), xmm0);
|
||||
|
||||
bind(L_2TAG_PACKET_13_0_1);
|
||||
|
||||
bind(B1_4);
|
||||
addq(rsp, 16);
|
||||
pop(rbx);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
|
||||
ALIGNED_(16) juint _static_const_table_cos[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
|
||||
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
|
||||
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
|
||||
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
|
||||
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
|
||||
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
|
||||
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
|
||||
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
|
||||
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
|
||||
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
|
||||
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
|
||||
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
|
||||
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
|
||||
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
|
||||
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
|
||||
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
|
||||
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
|
||||
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
|
||||
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
|
||||
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
|
||||
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
|
||||
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
|
||||
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
|
||||
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
|
||||
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
|
||||
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
|
||||
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
|
||||
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
|
||||
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
|
||||
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
|
||||
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
|
||||
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
|
||||
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
|
||||
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
|
||||
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
|
||||
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
|
||||
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
|
||||
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
|
||||
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
|
||||
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
|
||||
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
|
||||
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
|
||||
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
|
||||
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
|
||||
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
|
||||
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
|
||||
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
|
||||
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
|
||||
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
|
||||
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
|
||||
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
|
||||
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
|
||||
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
|
||||
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
|
||||
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
|
||||
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
|
||||
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
|
||||
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
|
||||
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
|
||||
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
|
||||
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
|
||||
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
|
||||
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
|
||||
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
|
||||
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
|
||||
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
|
||||
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
|
||||
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
|
||||
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
|
||||
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
|
||||
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
|
||||
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
|
||||
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
|
||||
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
|
||||
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
|
||||
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
|
||||
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
|
||||
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
|
||||
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
|
||||
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
|
||||
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
|
||||
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
|
||||
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
|
||||
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
|
||||
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
|
||||
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
|
||||
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
|
||||
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
};
|
||||
//registers,
|
||||
// input: (rbp + 8)
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
|
||||
address static_const_table_cos = (address)_static_const_table_cos;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 120);
|
||||
movl(Address(rsp, 56), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table_cos));
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
subl(eax, 12336);
|
||||
cmpl(eax, 4293);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm1, Address(tmp, 2160));
|
||||
mulsd(xmm1, xmm0);
|
||||
movdqu(xmm5, Address(tmp, 2240));
|
||||
movsd(xmm4, Address(tmp, 2224));
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
movsd(xmm3, Address(tmp, 2128));
|
||||
movdqu(xmm2, Address(tmp, 2112));
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2sil(edx, xmm1);
|
||||
cvtsi2sdl(xmm1, edx);
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
addl(edx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
andl(edx, 63);
|
||||
movdqu(xmm5, Address(tmp, 2096));
|
||||
lea(eax, Address(tmp, 0));
|
||||
shll(edx, 5);
|
||||
addl(eax, edx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, Address(tmp, 2144));
|
||||
subsd(xmm4, xmm3);
|
||||
movsd(xmm7, Address(eax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movapd(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
movdqu(xmm6, Address(tmp, 2064));
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(eax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movsd(xmm3, Address(eax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, Address(tmp, 2080));
|
||||
mulsd(xmm4, Address(eax, 0));
|
||||
addpd(xmm6, Address(tmp, 2048));
|
||||
mulpd(xmm5, xmm0);
|
||||
movapd(xmm0, xmm3);
|
||||
addsd(xmm3, Address(eax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movapd(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movsd(xmm5, Address(eax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(eax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm5, xmm0);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm1, xmm3);
|
||||
addsd(xmm1, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
addsd(xmm1, xmm6);
|
||||
addsd(xmm4, xmm1);
|
||||
movsd(Address(rsp, 0), xmm4);
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
movsd(xmm1, Address(tmp, 2192));
|
||||
subsd(xmm1, xmm0);
|
||||
movsd(Address(rsp, 0), xmm1);
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
andl(eax, 2146435072);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
|
||||
subl(rsp, 32);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
lea(eax, Address(rsp, 40));
|
||||
movl(Address(rsp, 8), eax);
|
||||
movl(eax, 1);
|
||||
movl(Address(rsp, 12), eax);
|
||||
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
|
||||
addl(rsp, 32);
|
||||
fld_d(Address(rsp, 8));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
fld_d(Address(rsp, 128));
|
||||
fmul_d(Address(tmp, 2208));
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
movl(tmp, Address(rsp, 56));
|
||||
}
|
||||
#endif
|
674
hotspot/src/cpu/x86/vm/macroAssembler_x86_exp.cpp
Normal file
674
hotspot/src/cpu/x86/vm/macroAssembler_x86_exp.cpp
Normal file
@ -0,0 +1,674 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - EXP()
|
||||
// ---------------------
|
||||
//
|
||||
// Description:
|
||||
// Let K = 64 (table size).
|
||||
// x x/log(2) n
|
||||
// e = 2 = 2 * T[j] * (1 + P(y))
|
||||
// where
|
||||
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
|
||||
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
|
||||
// j/K
|
||||
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
|
||||
//
|
||||
// P(y) is a minimax polynomial approximation of exp(x)-1
|
||||
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
|
||||
//
|
||||
// To avoid problems with arithmetic overflow and underflow,
|
||||
// n n1 n2
|
||||
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
|
||||
// where BIAS is a value of exponent bias.
|
||||
//
|
||||
// Special cases:
|
||||
// exp(NaN) = NaN
|
||||
// exp(+INF) = +INF
|
||||
// exp(-INF) = 0
|
||||
// exp(x) = 1 for subnormals
|
||||
// for finite argument, only exp(0)=1 is exact
|
||||
// For IEEE double
|
||||
// if x > 709.782712893383973096 then exp(x) overflow
|
||||
// if x < -745.133219101941108420 then exp(x) underflow
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _cv[] =
|
||||
{
|
||||
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
|
||||
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
|
||||
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
|
||||
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
|
||||
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _shifter[] =
|
||||
{
|
||||
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _mmask[] =
|
||||
{
|
||||
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _bias[] =
|
||||
{
|
||||
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _Tbl_addr[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||
0x000fa7c1UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _ALLONES[] =
|
||||
{
|
||||
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _ebias[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _XMAX[] =
|
||||
{
|
||||
0xffffffffUL, 0x7fefffffUL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _XMIN[] =
|
||||
{
|
||||
0x00000000UL, 0x00100000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _INF[] =
|
||||
{
|
||||
0x00000000UL, 0x7ff00000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _ZERO[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _ONE_val[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
|
||||
|
||||
// Registers:
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, tmp - r11
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address cv = (address)_cv;
|
||||
address Shifter = (address)_shifter;
|
||||
address mmask = (address)_mmask;
|
||||
address bias = (address)_bias;
|
||||
address Tbl_addr = (address)_Tbl_addr;
|
||||
address ALLONES = (address)_ALLONES;
|
||||
address ebias = (address)_ebias;
|
||||
address XMAX = (address)_XMAX;
|
||||
address XMIN = (address)_XMIN;
|
||||
address INF = (address)_INF;
|
||||
address ZERO = (address)_ZERO;
|
||||
address ONE_val = (address)_ONE_val;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||
movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
movl(edx, 16527);
|
||||
subl(edx, eax);
|
||||
subl(eax, 15504);
|
||||
orl(edx, eax);
|
||||
cmpl(edx, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
mulpd(xmm1, xmm0);
|
||||
addpd(xmm1, xmm6);
|
||||
movapd(xmm7, xmm1);
|
||||
subpd(xmm1, xmm6);
|
||||
mulpd(xmm2, xmm1);
|
||||
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||
mulpd(xmm3, xmm1);
|
||||
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
subpd(xmm0, xmm2);
|
||||
movdl(eax, xmm7);
|
||||
movl(ecx, eax);
|
||||
andl(ecx, 63);
|
||||
shll(ecx, 4);
|
||||
sarl(eax, 6);
|
||||
movl(edx, eax);
|
||||
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
pand(xmm7, xmm6);
|
||||
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
paddq(xmm7, xmm6);
|
||||
psllq(xmm7, 46);
|
||||
subpd(xmm0, xmm3);
|
||||
lea(tmp, ExternalAddress(Tbl_addr));
|
||||
movdqu(xmm2, Address(ecx, tmp));
|
||||
mulpd(xmm4, xmm0);
|
||||
movapd(xmm6, xmm0);
|
||||
movapd(xmm1, xmm0);
|
||||
mulpd(xmm6, xmm6);
|
||||
mulpd(xmm0, xmm6);
|
||||
addpd(xmm5, xmm4);
|
||||
mulsd(xmm0, xmm6);
|
||||
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||
addsd(xmm1, xmm2);
|
||||
unpckhpd(xmm2, xmm2);
|
||||
mulpd(xmm0, xmm5);
|
||||
addsd(xmm1, xmm0);
|
||||
por(xmm2, xmm7);
|
||||
unpckhpd(xmm0, xmm0);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
addl(edx, 894);
|
||||
cmpl(edx, 1916);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||
mulsd(xmm0, xmm2);
|
||||
addsd(xmm0, xmm2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
xorpd(xmm3, xmm3);
|
||||
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
||||
movl(edx, -1022);
|
||||
subl(edx, eax);
|
||||
movdl(xmm5, edx);
|
||||
psllq(xmm4, xmm5);
|
||||
movl(ecx, eax);
|
||||
sarl(eax, 1);
|
||||
pinsrw(xmm3, eax, 3);
|
||||
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
||||
psllq(xmm3, 4);
|
||||
psubd(xmm2, xmm3);
|
||||
mulsd(xmm0, xmm2);
|
||||
cmpl(edx, 52);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
||||
pand(xmm4, xmm2);
|
||||
paddd(xmm3, xmm6);
|
||||
subsd(xmm2, xmm4);
|
||||
addsd(xmm0, xmm2);
|
||||
cmpl(ecx, 1023);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32768);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||
movapd(xmm6, xmm0);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
mulsd(xmm6, xmm3);
|
||||
mulsd(xmm4, xmm3);
|
||||
movdqu(xmm0, xmm6);
|
||||
pxor(xmm6, xmm4);
|
||||
psrad(xmm6, 31);
|
||||
pshufd(xmm6, xmm6, 85);
|
||||
psllq(xmm0, 1);
|
||||
psrlq(xmm0, 1);
|
||||
pxor(xmm0, xmm6);
|
||||
psrlq(xmm6, 63);
|
||||
paddq(xmm0, xmm6);
|
||||
paddq(xmm0, xmm4);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 32752);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
paddd(xmm3, xmm6);
|
||||
addpd(xmm0, xmm2);
|
||||
mulsd(xmm0, xmm3);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
cmpl(eax, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
|
||||
mulsd(xmm0, xmm0);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
movl(Address(rsp, 0), 14);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movl(edx, Address(rsp, 8));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
|
||||
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_12_0_2);
|
||||
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
andl(eax, 2147483647);
|
||||
cmpl(eax, 1083179008);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
movq(Address(rsp, 16), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 16));
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table[] =
|
||||
{
|
||||
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
|
||||
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
|
||||
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
|
||||
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
|
||||
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
|
||||
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
|
||||
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
|
||||
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
|
||||
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
|
||||
0x00100000UL
|
||||
};
|
||||
|
||||
//registers,
|
||||
// input: (rbp + 8)
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address static_const_table = (address)_static_const_table;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 120);
|
||||
movl(Address(rsp, 64), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table));
|
||||
movdqu(xmm0, Address(rsp, 128));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
movl(edx, 16527);
|
||||
subl(edx, eax);
|
||||
subl(eax, 15504);
|
||||
orl(edx, eax);
|
||||
cmpl(edx, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
mulpd(xmm1, xmm0);
|
||||
addpd(xmm1, xmm6);
|
||||
movapd(xmm7, xmm1);
|
||||
subpd(xmm1, xmm6);
|
||||
mulpd(xmm2, xmm1);
|
||||
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||
mulpd(xmm3, xmm1);
|
||||
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
subpd(xmm0, xmm2);
|
||||
movdl(eax, xmm7);
|
||||
movl(ecx, eax);
|
||||
andl(ecx, 63);
|
||||
shll(ecx, 4);
|
||||
sarl(eax, 6);
|
||||
movl(edx, eax);
|
||||
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
pand(xmm7, xmm6);
|
||||
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
paddq(xmm7, xmm6);
|
||||
psllq(xmm7, 46);
|
||||
subpd(xmm0, xmm3);
|
||||
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
|
||||
mulpd(xmm4, xmm0);
|
||||
movapd(xmm6, xmm0);
|
||||
movapd(xmm1, xmm0);
|
||||
mulpd(xmm6, xmm6);
|
||||
mulpd(xmm0, xmm6);
|
||||
addpd(xmm5, xmm4);
|
||||
mulsd(xmm0, xmm6);
|
||||
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||
addsd(xmm1, xmm2);
|
||||
unpckhpd(xmm2, xmm2);
|
||||
mulpd(xmm0, xmm5);
|
||||
addsd(xmm1, xmm0);
|
||||
por(xmm2, xmm7);
|
||||
unpckhpd(xmm0, xmm0);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
addl(edx, 894);
|
||||
cmpl(edx, 1916);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||
mulsd(xmm0, xmm2);
|
||||
addsd(xmm0, xmm2);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
fnstcw(Address(rsp, 24));
|
||||
movzwl(edx, Address(rsp, 24));
|
||||
orl(edx, 768);
|
||||
movw(Address(rsp, 28), edx);
|
||||
fldcw(Address(rsp, 28));
|
||||
movl(edx, eax);
|
||||
sarl(eax, 1);
|
||||
subl(edx, eax);
|
||||
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
|
||||
pandn(xmm6, xmm2);
|
||||
addl(eax, 1023);
|
||||
movdl(xmm3, eax);
|
||||
psllq(xmm3, 52);
|
||||
por(xmm6, xmm3);
|
||||
addl(edx, 1023);
|
||||
movdl(xmm4, edx);
|
||||
psllq(xmm4, 52);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
fld_d(Address(rsp, 8));
|
||||
movsd(Address(rsp, 16), xmm6);
|
||||
fld_d(Address(rsp, 16));
|
||||
fmula(1);
|
||||
faddp(1);
|
||||
movsd(Address(rsp, 8), xmm4);
|
||||
fld_d(Address(rsp, 8));
|
||||
fmulp(1);
|
||||
fstp_d(Address(rsp, 8));
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
fldcw(Address(rsp, 24));
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 32752);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(ecx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
cmpl(ecx, INT_MIN);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(ecx, -1064950997);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
|
||||
movl(edx, Address(rsp, 128));
|
||||
cmpl(edx, -17155601);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
||||
jmp(L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movl(edx, 14);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movl(edx, 15);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
cmpl(eax, INT_MIN);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
|
||||
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(edx, 14);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(xmm0, Address(tmp, 1216));
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(edx, 15);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movl(edx, Address(rsp, 128));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
andl(eax, 2147483647);
|
||||
cmpl(eax, 1083179008);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 48), xmm0);
|
||||
fld_d(Address(rsp, 48));
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
movl(tmp, Address(rsp, 64));
|
||||
}
|
||||
#endif
|
655
hotspot/src/cpu/x86/vm/macroAssembler_x86_log.cpp
Normal file
655
hotspot/src/cpu/x86/vm/macroAssembler_x86_log.cpp
Normal file
@ -0,0 +1,655 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - LOG()
|
||||
// ---------------------
|
||||
//
|
||||
// x=2^k * mx, mx in [1,2)
|
||||
//
|
||||
// Get B~1/mx based on the output of rcpss instruction (B0)
|
||||
// B = int((B0*2^7+0.5))/2^7
|
||||
//
|
||||
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
|
||||
//
|
||||
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
|
||||
// p(r) is a degree 7 polynomial
|
||||
// -log(B) read from data table (high, low parts)
|
||||
// Result is formed from high and low parts
|
||||
//
|
||||
// Special cases:
|
||||
// log(NaN) = quiet NaN, and raise invalid exception
|
||||
// log(+INF) = that INF
|
||||
// log(0) = -INF with divide-by-zero exception raised
|
||||
// log(1) = +0
|
||||
// log(x) = NaN with invalid exception raised if x < -0, including -INF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _L_tbl[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
|
||||
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
|
||||
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
|
||||
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
|
||||
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
|
||||
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
|
||||
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
|
||||
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
|
||||
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
|
||||
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
|
||||
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
|
||||
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
|
||||
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
|
||||
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
|
||||
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
|
||||
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
|
||||
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
|
||||
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
|
||||
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
|
||||
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
|
||||
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
|
||||
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
|
||||
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
|
||||
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
|
||||
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
|
||||
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
|
||||
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
|
||||
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
|
||||
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
|
||||
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
|
||||
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
|
||||
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
|
||||
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
|
||||
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
|
||||
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
|
||||
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
|
||||
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
|
||||
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
|
||||
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
|
||||
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
|
||||
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
|
||||
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
|
||||
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
|
||||
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
|
||||
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
|
||||
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
|
||||
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
|
||||
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
|
||||
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
|
||||
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
|
||||
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
|
||||
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
|
||||
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
|
||||
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
|
||||
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
|
||||
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
|
||||
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
|
||||
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
|
||||
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
|
||||
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
|
||||
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
|
||||
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
|
||||
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
|
||||
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
|
||||
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
|
||||
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
|
||||
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
|
||||
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
|
||||
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
|
||||
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
|
||||
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
|
||||
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
|
||||
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
|
||||
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
|
||||
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
|
||||
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
|
||||
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
|
||||
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
|
||||
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
|
||||
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
|
||||
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
|
||||
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
|
||||
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
|
||||
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
|
||||
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
|
||||
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
|
||||
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
|
||||
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
|
||||
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
|
||||
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
|
||||
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
|
||||
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
|
||||
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
|
||||
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
|
||||
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
|
||||
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
|
||||
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
|
||||
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
|
||||
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
|
||||
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
|
||||
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
|
||||
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
|
||||
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x80000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _log2[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _coeff[] =
|
||||
{
|
||||
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
|
||||
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
|
||||
0x00000000UL, 0xbfe00000UL
|
||||
};
|
||||
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, r8, r11
|
||||
|
||||
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp1, tmp2, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address L_tbl = (address)_L_tbl;
|
||||
address log2 = (address)_log2;
|
||||
address coeff = (address)_coeff;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
mov64(rax, 0x3ff0000000000000);
|
||||
movdq(xmm2, rax);
|
||||
mov64(rdx, 0x77f0000000000000);
|
||||
movdq(xmm3, rdx);
|
||||
movl(ecx, 32768);
|
||||
movdl(xmm4, rcx);
|
||||
mov64(tmp1, 0xffffe00000000000);
|
||||
movdq(xmm5, tmp1);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 27);
|
||||
lea(tmp2, ExternalAddress(L_tbl));
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
paddd(xmm0, xmm4);
|
||||
por(xmm1, xmm3);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
pand(xmm5, xmm1);
|
||||
pand(xmm0, xmm6);
|
||||
subsd(xmm1, xmm5);
|
||||
mulpd(xmm5, xmm0);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL
|
||||
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp2, edx));
|
||||
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulsd(xmm6, xmm7);
|
||||
if (VM_Version::supports_sse3()) {
|
||||
movddup(xmm5, xmm1);
|
||||
}
|
||||
else {
|
||||
movdqu(xmm5, xmm1);
|
||||
movlhps(xmm5, xmm5);
|
||||
}
|
||||
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
mulpd(xmm5, xmm5);
|
||||
if (VM_Version::supports_sse3()) {
|
||||
movddup(xmm6, xmm0);
|
||||
}
|
||||
else {
|
||||
movdqu(xmm6, xmm0);
|
||||
movlhps(xmm6, xmm6);
|
||||
}
|
||||
addsd(xmm0, xmm1);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
subsd(xmm6, xmm0);
|
||||
mulsd(xmm4, xmm1);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
addsd(xmm1, xmm6);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movq(xmm0, Address(rsp, 0));
|
||||
movq(xmm1, Address(rsp, 0));
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
addsd(xmm1, xmm0);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psrlq(xmm0, 27);
|
||||
movl(ecx, 18416);
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 3);
|
||||
jmp(L_2TAG_PACKET_8_0_2);
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movq(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 8));
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table_log[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
|
||||
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
|
||||
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
|
||||
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
|
||||
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
|
||||
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
|
||||
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
|
||||
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
|
||||
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
|
||||
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
|
||||
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
|
||||
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
|
||||
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
|
||||
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
|
||||
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
|
||||
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
|
||||
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
|
||||
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
|
||||
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
|
||||
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
|
||||
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
|
||||
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
|
||||
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
|
||||
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
|
||||
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
|
||||
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
|
||||
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
|
||||
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
|
||||
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
|
||||
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
|
||||
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
|
||||
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
|
||||
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
|
||||
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
|
||||
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
|
||||
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
|
||||
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
|
||||
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
|
||||
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
|
||||
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
|
||||
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
|
||||
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
|
||||
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
|
||||
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
|
||||
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
|
||||
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
|
||||
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
|
||||
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
|
||||
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
|
||||
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
|
||||
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
|
||||
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
|
||||
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
|
||||
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
|
||||
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
|
||||
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
|
||||
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
|
||||
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
|
||||
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
|
||||
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
|
||||
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
|
||||
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
|
||||
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
|
||||
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
|
||||
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
|
||||
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
|
||||
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
|
||||
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
|
||||
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
|
||||
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
|
||||
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
|
||||
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
|
||||
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
|
||||
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
|
||||
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
|
||||
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
|
||||
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
|
||||
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
|
||||
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
|
||||
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
|
||||
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
|
||||
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
|
||||
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
|
||||
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
|
||||
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
|
||||
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
|
||||
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
|
||||
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
|
||||
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
|
||||
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
|
||||
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
|
||||
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
|
||||
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
|
||||
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
|
||||
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
|
||||
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
|
||||
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
|
||||
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
|
||||
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
|
||||
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
|
||||
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
|
||||
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
|
||||
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
|
||||
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
|
||||
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
|
||||
0xffffe000UL
|
||||
};
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
|
||||
Label L_2TAG_PACKET_10_0_2, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address static_const_table = (address)_static_const_table_log;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 104);
|
||||
movl(Address(rsp, 40), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table));
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movapd(xmm1, xmm0);
|
||||
movl(ecx, 32768);
|
||||
movdl(xmm4, ecx);
|
||||
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psllq(xmm0, 5);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
paddd(xmm0, xmm4);
|
||||
por(xmm1, xmm3);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
pand(xmm5, xmm1);
|
||||
pand(xmm0, xmm6);
|
||||
subsd(xmm1, xmm5);
|
||||
mulpd(xmm5, xmm0);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulsd(xmm1, xmm0);
|
||||
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
|
||||
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp, edx));
|
||||
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
mulpd(xmm5, xmm5);
|
||||
pshufd(xmm6, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
subsd(xmm6, xmm0);
|
||||
mulsd(xmm4, xmm1);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
addsd(xmm1, xmm6);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movdqu(xmm1, xmm0);
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
jmp(L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
movl(edx, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_10_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(edx, 2);
|
||||
jmp(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movapd(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psllq(xmm0, 5);
|
||||
movl(ecx, 18416);
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 24), xmm0);
|
||||
fld_d(Address(rsp, 24));
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movl(tmp, Address(rsp, 40));
|
||||
}
|
||||
#endif
|
687
hotspot/src/cpu/x86/vm/macroAssembler_x86_log10.cpp
Normal file
687
hotspot/src/cpu/x86/vm/macroAssembler_x86_log10.cpp
Normal file
@ -0,0 +1,687 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - LOG10()
|
||||
// ---------------------
|
||||
//
|
||||
// Let x=2^k * mx, mx in [1,2)
|
||||
//
|
||||
// Get B~1/mx based on the output of rcpss instruction (B0)
|
||||
// B = int((B0*LH*2^7+0.5))/2^7
|
||||
// LH is a short approximation for log10(e)
|
||||
//
|
||||
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
|
||||
//
|
||||
// Result: k*log10(2) - log(B) + p(r)
|
||||
// p(r) is a degree 7 polynomial
|
||||
// -log(B) read from data table (high, low parts)
|
||||
// Result is formed from high and low parts
|
||||
//
|
||||
// Special cases:
|
||||
// log10(0) = -INF with divide-by-zero exception raised
|
||||
// log10(1) = +0
|
||||
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
|
||||
// log10(+INF) = +INF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _HIGHSIGMASK_log10[] =
|
||||
{
|
||||
0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _LOG10_E[] =
|
||||
{
|
||||
0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _L_tbl_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
|
||||
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
|
||||
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
|
||||
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
|
||||
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
|
||||
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
|
||||
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
|
||||
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
|
||||
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
|
||||
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
|
||||
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
|
||||
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
|
||||
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
|
||||
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
|
||||
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
|
||||
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
|
||||
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
|
||||
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
|
||||
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
|
||||
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
|
||||
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
|
||||
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
|
||||
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
|
||||
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
|
||||
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
|
||||
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
|
||||
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
|
||||
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
|
||||
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
|
||||
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
|
||||
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
|
||||
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
|
||||
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
|
||||
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
|
||||
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
|
||||
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
|
||||
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
|
||||
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
|
||||
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
|
||||
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
|
||||
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
|
||||
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
|
||||
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
|
||||
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
|
||||
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
|
||||
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
|
||||
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
|
||||
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
|
||||
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
|
||||
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
|
||||
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
|
||||
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
|
||||
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
|
||||
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
|
||||
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
|
||||
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
|
||||
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
|
||||
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
|
||||
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
|
||||
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
|
||||
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
|
||||
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
|
||||
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
|
||||
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
|
||||
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
|
||||
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
|
||||
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
|
||||
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
|
||||
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
|
||||
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
|
||||
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
|
||||
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
|
||||
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
|
||||
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
|
||||
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
|
||||
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
|
||||
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
|
||||
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
|
||||
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
|
||||
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
|
||||
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
|
||||
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
|
||||
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
|
||||
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
|
||||
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
|
||||
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
|
||||
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
|
||||
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
|
||||
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
|
||||
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
|
||||
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
|
||||
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
|
||||
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
|
||||
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
|
||||
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
|
||||
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
|
||||
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
|
||||
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
|
||||
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
|
||||
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
|
||||
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
|
||||
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
|
||||
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _log2_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _coeff_log10[] =
|
||||
{
|
||||
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
|
||||
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
|
||||
0xdc77b115UL, 0xbff27af2UL
|
||||
};
|
||||
|
||||
// Registers:
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, tmp - r11
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start;
|
||||
|
||||
assert_different_registers(r11, eax, ecx, edx);
|
||||
|
||||
address HIGHSIGMASK = (address)_HIGHSIGMASK_log10;
|
||||
address LOG10_E = (address)_LOG10_E;
|
||||
address L_tbl = (address)_L_tbl_log10;
|
||||
address log2 = (address)_log2_log10;
|
||||
address coeff = (address)_coeff_log10;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
|
||||
bind(B1_2);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movl(ecx, 1054736384);
|
||||
movdl(xmm7, ecx);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
movl(edx, 32768);
|
||||
movdl(xmm4, edx);
|
||||
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 27);
|
||||
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
mulss(xmm0, xmm7);
|
||||
por(xmm1, xmm3);
|
||||
lea(r11, ExternalAddress(L_tbl));
|
||||
andpd(xmm5, xmm1);
|
||||
paddd(xmm0, xmm4);
|
||||
subsd(xmm1, xmm5);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
andpd(xmm0, xmm6);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
|
||||
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
|
||||
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
mulpd(xmm5, xmm5);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
pshufd(xmm2, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
mulsd(xmm4, xmm1);
|
||||
subsd(xmm2, xmm0);
|
||||
mulsd(xmm6, xmm1);
|
||||
addsd(xmm1, xmm2);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addsd(xmm1, xmm6);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movq(xmm0, Address(rsp, 0));
|
||||
movq(xmm1, Address(rsp, 0));
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
addsd(xmm1, xmm0);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 18416);
|
||||
psrlq(xmm0, 27);
|
||||
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 9);
|
||||
jmp(L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 8);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movq(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 8));
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
|
||||
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
|
||||
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
|
||||
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
|
||||
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
|
||||
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
|
||||
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
|
||||
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
|
||||
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
|
||||
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
|
||||
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
|
||||
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
|
||||
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
|
||||
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
|
||||
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
|
||||
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
|
||||
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
|
||||
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
|
||||
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
|
||||
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
|
||||
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
|
||||
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
|
||||
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
|
||||
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
|
||||
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
|
||||
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
|
||||
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
|
||||
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
|
||||
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
|
||||
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
|
||||
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
|
||||
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
|
||||
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
|
||||
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
|
||||
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
|
||||
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
|
||||
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
|
||||
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
|
||||
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
|
||||
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
|
||||
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
|
||||
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
|
||||
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
|
||||
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
|
||||
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
|
||||
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
|
||||
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
|
||||
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
|
||||
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
|
||||
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
|
||||
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
|
||||
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
|
||||
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
|
||||
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
|
||||
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
|
||||
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
|
||||
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
|
||||
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
|
||||
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
|
||||
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
|
||||
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
|
||||
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
|
||||
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
|
||||
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
|
||||
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
|
||||
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
|
||||
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
|
||||
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
|
||||
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
|
||||
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
|
||||
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
|
||||
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
|
||||
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
|
||||
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
|
||||
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
|
||||
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
|
||||
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
|
||||
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
|
||||
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
|
||||
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
|
||||
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
|
||||
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
|
||||
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
|
||||
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
|
||||
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
|
||||
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
|
||||
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
|
||||
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
|
||||
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
|
||||
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
|
||||
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
|
||||
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
|
||||
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
|
||||
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
|
||||
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
|
||||
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
|
||||
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
|
||||
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
|
||||
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
|
||||
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
|
||||
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
|
||||
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
|
||||
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
|
||||
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
|
||||
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
|
||||
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
|
||||
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
};
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
|
||||
address static_const_table_log10 = (address)_static_const_table_log10;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 104);
|
||||
movl(Address(rsp, 40), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table_log10));
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movl(ecx, 1054736384);
|
||||
movdl(xmm7, ecx);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movdqu(xmm1, xmm0);
|
||||
movl(edx, 32768);
|
||||
movdl(xmm4, edx);
|
||||
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psllq(xmm0, 5);
|
||||
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
mulss(xmm0, xmm7);
|
||||
por(xmm1, xmm3);
|
||||
andpd(xmm5, xmm1);
|
||||
paddd(xmm0, xmm4);
|
||||
subsd(xmm1, xmm5);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
andpd(xmm0, xmm6);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulsd(xmm1, xmm0);
|
||||
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
|
||||
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
|
||||
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
|
||||
mulpd(xmm5, xmm5);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
pshufd(xmm2, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
mulsd(xmm4, xmm1);
|
||||
subsd(xmm2, xmm0);
|
||||
mulsd(xmm6, xmm1);
|
||||
addsd(xmm1, xmm2);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addsd(xmm1, xmm6);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
|
||||
movdqu(xmm1, xmm0);
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
jmp(L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
movl(edx, 9);
|
||||
mulsd(xmm0, xmm1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_10_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(edx, 8);
|
||||
jmp(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 18416);
|
||||
psllq(xmm0, 5);
|
||||
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 24), xmm0);
|
||||
fld_d(Address(rsp, 24));
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movl(tmp, Address(rsp, 40));
|
||||
|
||||
}
|
||||
#endif
|
3592
hotspot/src/cpu/x86/vm/macroAssembler_x86_pow.cpp
Normal file
3592
hotspot/src/cpu/x86/vm/macroAssembler_x86_pow.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2448
hotspot/src/cpu/x86/vm/macroAssembler_x86_sin.cpp
Normal file
2448
hotspot/src/cpu/x86/vm/macroAssembler_x86_sin.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2144
hotspot/src/cpu/x86/vm/macroAssembler_x86_tan.cpp
Normal file
2144
hotspot/src/cpu/x86/vm/macroAssembler_x86_tan.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -2092,25 +2092,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
entry_checkcast_arraycopy);
|
||||
}
|
||||
|
||||
void generate_math_stubs() {
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "log10");
|
||||
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
|
||||
|
||||
__ fld_d(Address(rsp, 4));
|
||||
__ flog10();
|
||||
__ ret(0);
|
||||
}
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "tan");
|
||||
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
|
||||
|
||||
__ fld_d(Address(rsp, 4));
|
||||
__ trigfunc('t');
|
||||
__ ret(0);
|
||||
}
|
||||
}
|
||||
|
||||
// AES intrinsic stubs
|
||||
enum {AESBlockSize = 16};
|
||||
|
||||
@ -3533,6 +3514,31 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
}
|
||||
|
||||
address generate_libmLog10() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = rbx;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmPow() {
|
||||
address start = __ pc();
|
||||
|
||||
@ -3627,6 +3633,44 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libm_tan_cot_huge() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmTan() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = rbx;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
// Safefetch stubs.
|
||||
@ -3852,24 +3896,25 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||
}
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
|
||||
StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
|
||||
StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
|
||||
StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
|
||||
StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
|
||||
StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
|
||||
StubRoutines::_dexp = generate_libmExp();
|
||||
StubRoutines::_dlog = generate_libmLog();
|
||||
StubRoutines::_dlog10 = generate_libmLog10();
|
||||
StubRoutines::_dpow = generate_libmPow();
|
||||
if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
|
||||
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
|
||||
}
|
||||
if (UseLibmSinIntrinsic) {
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
}
|
||||
if (UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
}
|
||||
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
|
||||
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
|
||||
StubRoutines::_dtan = generate_libmTan();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void generate_all() {
|
||||
// Generates all stubs and initializes the entry points
|
||||
|
||||
@ -3888,8 +3933,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
||||
generate_math_stubs();
|
||||
|
||||
// don't bother generating these AES intrinsic stubs unless global flag is set
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
|
||||
|
@ -2971,35 +2971,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
|
||||
}
|
||||
|
||||
void generate_math_stubs() {
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "log10");
|
||||
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
|
||||
|
||||
__ subq(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), xmm0);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ flog10();
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl(xmm0, Address(rsp, 0));
|
||||
__ addq(rsp, 8);
|
||||
__ ret(0);
|
||||
}
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "tan");
|
||||
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
|
||||
|
||||
__ subq(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), xmm0);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ trigfunc('t');
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl(xmm0, Address(rsp, 0));
|
||||
__ addq(rsp, 8);
|
||||
__ ret(0);
|
||||
}
|
||||
}
|
||||
|
||||
// AES intrinsic stubs
|
||||
enum {AESBlockSize = 16};
|
||||
|
||||
@ -4730,6 +4701,46 @@ class StubGenerator: public StubCodeGenerator {
|
||||
#endif
|
||||
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmLog10() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = r11;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
#endif
|
||||
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
@ -4809,6 +4820,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
@ -4821,6 +4834,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
@ -4852,6 +4867,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
@ -4864,6 +4881,55 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmTan() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp1 = r8;
|
||||
const Register tmp2 = r9;
|
||||
const Register tmp3 = r10;
|
||||
const Register tmp4 = r11;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
#endif
|
||||
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
@ -5064,16 +5130,28 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||
}
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
|
||||
StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
|
||||
StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
|
||||
StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
|
||||
StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
|
||||
StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
|
||||
StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
|
||||
StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
|
||||
StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
|
||||
StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
|
||||
StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
|
||||
StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
|
||||
StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
|
||||
StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
|
||||
StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
|
||||
StubRoutines::_dexp = generate_libmExp();
|
||||
StubRoutines::_dlog = generate_libmLog();
|
||||
StubRoutines::_dlog10 = generate_libmLog10();
|
||||
StubRoutines::_dpow = generate_libmPow();
|
||||
if (UseLibmSinIntrinsic) {
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
}
|
||||
if (UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
}
|
||||
StubRoutines::_dtan = generate_libmTan();
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
}
|
||||
}
|
||||
|
||||
@ -5118,8 +5196,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
||||
generate_math_stubs();
|
||||
|
||||
// don't bother generating these AES intrinsic stubs unless global flag is set
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
|
||||
|
@ -48,6 +48,29 @@ address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
|
||||
address StubRoutines::x86::_k256_adr = NULL;
|
||||
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
|
||||
|
||||
//tables common for sin and cos
|
||||
address StubRoutines::x86::_ONEHALF_adr = NULL;
|
||||
address StubRoutines::x86::_P_2_adr = NULL;
|
||||
address StubRoutines::x86::_SC_4_adr = NULL;
|
||||
address StubRoutines::x86::_Ctable_adr = NULL;
|
||||
address StubRoutines::x86::_SC_2_adr = NULL;
|
||||
address StubRoutines::x86::_SC_3_adr = NULL;
|
||||
address StubRoutines::x86::_SC_1_adr = NULL;
|
||||
address StubRoutines::x86::_PI_INV_TABLE_adr = NULL;
|
||||
address StubRoutines::x86::_PI_4_adr = NULL;
|
||||
address StubRoutines::x86::_PI32INV_adr = NULL;
|
||||
address StubRoutines::x86::_SIGN_MASK_adr = NULL;
|
||||
address StubRoutines::x86::_P_1_adr = NULL;
|
||||
address StubRoutines::x86::_P_3_adr = NULL;
|
||||
address StubRoutines::x86::_NEG_ZERO_adr = NULL;
|
||||
|
||||
//tables common for sincos and tancot
|
||||
address StubRoutines::x86::_L_2il0floatpacket_0_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4Inv_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4x3_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4x4_adr = NULL;
|
||||
address StubRoutines::x86::_ones_adr = NULL;
|
||||
|
||||
uint64_t StubRoutines::x86::_crc_by128_masks[] =
|
||||
{
|
||||
/* The fields in this structure are arranged so that they can be
|
||||
|
@ -57,6 +57,48 @@
|
||||
// byte flip mask for sha256
|
||||
static address _pshuffle_byte_flip_mask_addr;
|
||||
|
||||
//tables common for LIBM sin and cos
|
||||
static juint _ONEHALF[];
|
||||
static address _ONEHALF_adr;
|
||||
static juint _P_2[];
|
||||
static address _P_2_adr;
|
||||
static juint _SC_4[];
|
||||
static address _SC_4_adr;
|
||||
static juint _Ctable[];
|
||||
static address _Ctable_adr;
|
||||
static juint _SC_2[];
|
||||
static address _SC_2_adr;
|
||||
static juint _SC_3[];
|
||||
static address _SC_3_adr;
|
||||
static juint _SC_1[];
|
||||
static address _SC_1_adr;
|
||||
static juint _PI_INV_TABLE[];
|
||||
static address _PI_INV_TABLE_adr;
|
||||
static juint _PI_4[];
|
||||
static address _PI_4_adr;
|
||||
static juint _PI32INV[];
|
||||
static address _PI32INV_adr;
|
||||
static juint _SIGN_MASK[];
|
||||
static address _SIGN_MASK_adr;
|
||||
static juint _P_1[];
|
||||
static address _P_1_adr;
|
||||
static juint _P_3[];
|
||||
static address _P_3_adr;
|
||||
static juint _NEG_ZERO[];
|
||||
static address _NEG_ZERO_adr;
|
||||
|
||||
//tables common for LIBM sincos and tancot
|
||||
static juint _L_2il0floatpacket_0[];
|
||||
static address _L_2il0floatpacket_0_adr;
|
||||
static juint _Pi4Inv[];
|
||||
static address _Pi4Inv_adr;
|
||||
static juint _Pi4x3[];
|
||||
static address _Pi4x3_adr;
|
||||
static juint _Pi4x4[];
|
||||
static address _Pi4x4_adr;
|
||||
static juint _ones[];
|
||||
static address _ones_adr;
|
||||
|
||||
public:
|
||||
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
||||
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
||||
@ -69,4 +111,24 @@
|
||||
static address k256_addr() { return _k256_adr; }
|
||||
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
|
||||
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
|
||||
static address _ONEHALF_addr() { return _ONEHALF_adr; }
|
||||
static address _P_2_addr() { return _P_2_adr; }
|
||||
static address _SC_4_addr() { return _SC_4_adr; }
|
||||
static address _Ctable_addr() { return _Ctable_adr; }
|
||||
static address _SC_2_addr() { return _SC_2_adr; }
|
||||
static address _SC_3_addr() { return _SC_3_adr; }
|
||||
static address _SC_1_addr() { return _SC_1_adr; }
|
||||
static address _PI_INV_TABLE_addr() { return _PI_INV_TABLE_adr; }
|
||||
static address _PI_4_addr() { return _PI_4_adr; }
|
||||
static address _PI32INV_addr() { return _PI32INV_adr; }
|
||||
static address _SIGN_MASK_addr() { return _SIGN_MASK_adr; }
|
||||
static address _P_1_addr() { return _P_1_adr; }
|
||||
static address _P_3_addr() { return _P_3_adr; }
|
||||
static address _NEG_ZERO_addr() { return _NEG_ZERO_adr; }
|
||||
static address _L_2il0floatpacket_0_addr() { return _L_2il0floatpacket_0_adr; }
|
||||
static address _Pi4Inv_addr() { return _Pi4Inv_adr; }
|
||||
static address _Pi4x3_addr() { return _Pi4x3_adr; }
|
||||
static address _Pi4x4_addr() { return _Pi4x4_adr; }
|
||||
static address _ones_addr() { return _ones_adr; }
|
||||
|
||||
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
||||
|
@ -608,18 +608,13 @@ void TemplateInterpreterGenerator::lock_method() {
|
||||
|
||||
// get synchronization object
|
||||
{
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
Label done;
|
||||
__ movl(rax, access_flags);
|
||||
__ testl(rax, JVM_ACC_STATIC);
|
||||
// get receiver (assume this is frequent case)
|
||||
__ movptr(rax, Address(rlocals, Interpreter::local_offset_in_bytes(0)));
|
||||
__ jcc(Assembler::zero, done);
|
||||
__ movptr(rax, Address(rbx, Method::const_offset()));
|
||||
__ movptr(rax, Address(rax, ConstMethod::constants_offset()));
|
||||
__ movptr(rax, Address(rax,
|
||||
ConstantPool::pool_holder_offset_in_bytes()));
|
||||
__ movptr(rax, Address(rax, mirror_offset));
|
||||
__ load_mirror(rax, rbx);
|
||||
|
||||
#ifdef ASSERT
|
||||
{
|
||||
@ -662,6 +657,9 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
|
||||
__ movptr(rbcp, Address(rbx, Method::const_offset())); // get ConstMethod*
|
||||
__ lea(rbcp, Address(rbcp, ConstMethod::codes_offset())); // get codebase
|
||||
__ push(rbx); // save Method*
|
||||
// Get mirror and store it in the frame as GC root for this Method*
|
||||
__ load_mirror(rdx, rbx);
|
||||
__ push(rdx);
|
||||
if (ProfileInterpreter) {
|
||||
Label method_data_continue;
|
||||
__ movptr(rdx, Address(rbx, in_bytes(Method::method_data_offset())));
|
||||
@ -999,15 +997,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
||||
// pass mirror handle if static call
|
||||
{
|
||||
Label L;
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
__ movl(t, Address(method, Method::access_flags_offset()));
|
||||
__ testl(t, JVM_ACC_STATIC);
|
||||
__ jcc(Assembler::zero, L);
|
||||
// get mirror
|
||||
__ movptr(t, Address(method, Method::const_offset()));
|
||||
__ movptr(t, Address(t, ConstMethod::constants_offset()));
|
||||
__ movptr(t, Address(t, ConstantPool::pool_holder_offset_in_bytes()));
|
||||
__ movptr(t, Address(t, mirror_offset));
|
||||
__ load_mirror(t, method);
|
||||
// copy mirror into activation frame
|
||||
__ movptr(Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize),
|
||||
t);
|
||||
|
@ -345,13 +345,34 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
__ fld_d(Address(rsp, 1*wordSize));
|
||||
switch (kind) {
|
||||
case Interpreter::java_lang_math_sin :
|
||||
__ trigfunc('s');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_cos :
|
||||
__ trigfunc('c');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_tan :
|
||||
__ trigfunc('t');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_sqrt:
|
||||
__ fsqrt();
|
||||
@ -362,26 +383,29 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
case Interpreter::java_lang_math_log:
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_log10:
|
||||
__ flog10();
|
||||
// Store to stack to convert 80bit precision back to 64bits
|
||||
__ push_fTOS();
|
||||
__ pop_fTOS();
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_pow:
|
||||
__ fld_d(Address(rsp, 3*wordSize)); // second argument
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ fstp_d(Address(rsp, 2 * wordSize));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
|
||||
@ -391,7 +415,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
case Interpreter::java_lang_math_exp:
|
||||
__ subptr(rsp, 2*wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "interpreter/interpreterRuntime.hpp"
|
||||
#include "interpreter/templateInterpreterGenerator.hpp"
|
||||
#include "runtime/arguments.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
|
||||
#define __ _masm->
|
||||
|
||||
@ -373,32 +374,60 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
__ sqrtsd(xmm0, Address(rsp, wordSize));
|
||||
} else if (kind == Interpreter::java_lang_math_exp) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_log) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_log10) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_sin) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dsin() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_cos) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dcos() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_pow) {
|
||||
__ movdbl(xmm1, Address(rsp, wordSize));
|
||||
__ movdbl(xmm0, Address(rsp, 3 * wordSize));
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_tan) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
|
||||
}
|
||||
} else {
|
||||
__ fld_d(Address(rsp, wordSize));
|
||||
switch (kind) {
|
||||
case Interpreter::java_lang_math_sin :
|
||||
__ trigfunc('s');
|
||||
break;
|
||||
case Interpreter::java_lang_math_cos :
|
||||
__ trigfunc('c');
|
||||
break;
|
||||
case Interpreter::java_lang_math_tan :
|
||||
__ trigfunc('t');
|
||||
break;
|
||||
case Interpreter::java_lang_math_abs:
|
||||
__ fabs();
|
||||
break;
|
||||
case Interpreter::java_lang_math_log10:
|
||||
__ flog10();
|
||||
break;
|
||||
default :
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
@ -844,6 +844,11 @@ public:
|
||||
static uint32_t get_xsave_header_upper_segment() {
|
||||
return _cpuid_info.xem_xcr0_edx;
|
||||
}
|
||||
|
||||
// SSE2 and later processors implement a 'pause' instruction
|
||||
// that can be used for efficient implementation of
|
||||
// the intrinsic for java.lang.Thread.onSpinWait()
|
||||
static bool supports_on_spin_wait() { return supports_sse2(); }
|
||||
};
|
||||
|
||||
#endif // CPU_X86_VM_VM_VERSION_X86_HPP
|
||||
|
@ -1719,6 +1719,10 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
if (!(UseSSE > 4))
|
||||
ret_value = false;
|
||||
break;
|
||||
case Op_OnSpinWait:
|
||||
if (VM_Version::supports_on_spin_wait() == false)
|
||||
ret_value = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret_value; // Per default match rules are supported.
|
||||
@ -1754,6 +1758,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::has_predicated_vectors(void) {
|
||||
bool ret_value = false;
|
||||
if (UseAVX > 2) {
|
||||
ret_value = VM_Version::supports_avx512vl();
|
||||
}
|
||||
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
int float_pressure_threshold = default_pressure_threshold;
|
||||
#ifdef _LP64
|
||||
@ -1871,7 +1884,7 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
|
||||
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
|
||||
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -1926,7 +1939,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
|
||||
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -1946,7 +1959,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
|
||||
__ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -2172,6 +2185,19 @@ instruct ShouldNotReachHere() %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// =================================EVEX special===============================
|
||||
|
||||
instruct setMask(rRegI dst, rRegI src) %{
|
||||
predicate(Matcher::has_predicated_vectors());
|
||||
match(Set dst (SetVectMaskI src));
|
||||
effect(TEMP dst);
|
||||
format %{ "setvectmask $dst, $src" %}
|
||||
ins_encode %{
|
||||
__ setvectmask($dst$$Register, $src$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ============================================================================
|
||||
|
||||
instruct addF_reg(regF dst, regF src) %{
|
||||
@ -2996,6 +3022,24 @@ instruct sqrtD_imm(regD dst, immD con) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct onspinwait() %{
|
||||
match(OnSpinWait);
|
||||
ins_cost(200);
|
||||
|
||||
format %{
|
||||
$$template
|
||||
if (os::is_MP()) {
|
||||
$$emit$$"pause\t! membar_onspinwait"
|
||||
} else {
|
||||
$$emit$$"MEMBAR-onspinwait ! (empty encoding)"
|
||||
}
|
||||
%}
|
||||
ins_encode %{
|
||||
__ pause();
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ====================VECTOR INSTRUCTIONS=====================================
|
||||
|
||||
// Load vectors (4 bytes long)
|
||||
@ -3047,11 +3091,11 @@ instruct loadV32(vecY dst, memory mem) %{
|
||||
%}
|
||||
|
||||
// Load vectors (64 bytes long)
|
||||
instruct loadV64(vecZ dst, memory mem) %{
|
||||
predicate(n->as_LoadVector()->memory_size() == 64);
|
||||
instruct loadV64_dword(vecZ dst, memory mem) %{
|
||||
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4);
|
||||
match(Set dst (LoadVector mem));
|
||||
ins_cost(125);
|
||||
format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
|
||||
format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %}
|
||||
ins_encode %{
|
||||
int vector_len = 2;
|
||||
__ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
@ -3059,6 +3103,19 @@ instruct loadV64(vecZ dst, memory mem) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Load vectors (64 bytes long)
|
||||
instruct loadV64_qword(vecZ dst, memory mem) %{
|
||||
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4);
|
||||
match(Set dst (LoadVector mem));
|
||||
ins_cost(125);
|
||||
format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %}
|
||||
ins_encode %{
|
||||
int vector_len = 2;
|
||||
__ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Store vectors
|
||||
instruct storeV4(memory mem, vecS src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 4);
|
||||
@ -3104,11 +3161,11 @@ instruct storeV32(memory mem, vecY src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct storeV64(memory mem, vecZ src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 64);
|
||||
instruct storeV64_dword(memory mem, vecZ src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4);
|
||||
match(Set mem (StoreVector mem src));
|
||||
ins_cost(145);
|
||||
format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
|
||||
format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %}
|
||||
ins_encode %{
|
||||
int vector_len = 2;
|
||||
__ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len);
|
||||
@ -3116,6 +3173,18 @@ instruct storeV64(memory mem, vecZ src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct storeV64_qword(memory mem, vecZ src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4);
|
||||
match(Set mem (StoreVector mem src));
|
||||
ins_cost(145);
|
||||
format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %}
|
||||
ins_encode %{
|
||||
int vector_len = 2;
|
||||
__ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// ====================LEGACY REPLICATE=======================================
|
||||
|
||||
instruct Repl4B_mem(vecS dst, memory mem) %{
|
||||
|
@ -1021,10 +1021,10 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
|
||||
__ vmovdqu(xmm0, Address(rsp, -32));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(Address(rsp, -64), xmm0, 2);
|
||||
__ evmovdqul(xmm0, Address(rsp, src_offset), 2);
|
||||
__ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
|
||||
__ evmovdqul(xmm0, Address(rsp, -64), 2);
|
||||
__ evmovdquq(Address(rsp, -64), xmm0, 2);
|
||||
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
|
||||
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
|
||||
__ evmovdquq(xmm0, Address(rsp, -64), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -9828,27 +9828,6 @@ instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
|
||||
predicate (UseSSE<=1);
|
||||
match(Set dst(TanD src));
|
||||
format %{ "DTAN $dst" %}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
|
||||
Opcode(0xDD), Opcode(0xD8)); // fstp st
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct tanD_reg(regD dst, eFlagsReg cr) %{
|
||||
predicate (UseSSE>=2);
|
||||
match(Set dst(TanD dst));
|
||||
effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
|
||||
format %{ "DTAN $dst" %}
|
||||
ins_encode( Push_SrcD(dst),
|
||||
Opcode(0xD9), Opcode(0xF2), // fptan
|
||||
Opcode(0xDD), Opcode(0xD8), // fstp st
|
||||
Push_ResultD(dst) );
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct atanDPR_reg(regDPR dst, regDPR src) %{
|
||||
predicate (UseSSE<=1);
|
||||
match(Set dst(AtanD dst src));
|
||||
@ -9880,41 +9859,6 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
|
||||
predicate (UseSSE<=1);
|
||||
// The source Double operand on FPU stack
|
||||
match(Set dst (Log10D src));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fxch ; swap ST(0) with ST(1)
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "FLDLG2 \t\t\t#Log10\n\t"
|
||||
"FXCH \n\t"
|
||||
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
|
||||
%}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Opcode(0xD9), Opcode(0xC9), // fxch
|
||||
Opcode(0xD9), Opcode(0xF1)); // fyl2x
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
|
||||
predicate (UseSSE>=2);
|
||||
effect(KILL cr);
|
||||
match(Set dst (Log10D src));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "FLDLG2 \t\t\t#Log10\n\t"
|
||||
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
|
||||
%}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Push_SrcD(src),
|
||||
Opcode(0xD9), Opcode(0xF1), // fyl2x
|
||||
Push_ResultD(dst));
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
//-------------Float Instructions-------------------------------
|
||||
// Float Math
|
||||
|
||||
@ -12103,6 +12047,7 @@ instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
|
||||
@ -12118,6 +12063,7 @@ instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
@ -12132,6 +12078,7 @@ instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
@ -12145,6 +12092,60 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
// mask version
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "J$cop $labl\t# Loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "J$cop,u $labl\t# Loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(300);
|
||||
format %{ "J$cop,u $labl\t# Loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - using unsigned comparison
|
||||
instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
match(If cop cmp);
|
||||
|
@ -1081,10 +1081,10 @@ static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
|
||||
__ vmovdqu(xmm0, Address(rsp, -32));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(Address(rsp, -64), xmm0, 2);
|
||||
__ evmovdqul(xmm0, Address(rsp, src_offset), 2);
|
||||
__ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
|
||||
__ evmovdqul(xmm0, Address(rsp, -64), 2);
|
||||
__ evmovdquq(Address(rsp, -64), xmm0, 2);
|
||||
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
|
||||
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
|
||||
__ evmovdquq(xmm0, Address(rsp, -64), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -9897,34 +9897,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// -----------Trig and Trancendental Instructions------------------------------
|
||||
instruct tanD_reg(regD dst) %{
|
||||
match(Set dst (TanD dst));
|
||||
|
||||
format %{ "dtan $dst\n\t" %}
|
||||
ins_encode( Push_SrcXD(dst),
|
||||
Opcode(0xD9), Opcode(0xF2), //fptan
|
||||
Opcode(0xDD), Opcode(0xD8), //fstp st
|
||||
Push_ResultXD(dst) );
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10D_reg(regD dst) %{
|
||||
// The source and result Double operands in XMM registers
|
||||
match(Set dst (Log10D dst));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "fldlg2\t\t\t#Log10\n\t"
|
||||
"fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
|
||||
%}
|
||||
ins_encode(Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Push_SrcXD(dst),
|
||||
Opcode(0xD9), Opcode(0xF1), // fyl2x
|
||||
Push_ResultXD(dst));
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
//----------Arithmetic Conversion Instructions---------------------------------
|
||||
|
||||
instruct roundFloat_nop(regF dst)
|
||||
@ -11471,6 +11443,7 @@ instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
|
||||
%{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
|
||||
@ -11486,6 +11459,7 @@ instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
@ -11500,6 +11474,7 @@ instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
@ -11513,6 +11488,61 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
// mask version
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
|
||||
%{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "j$cop $labl\t# loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "j$cop,u $labl\t# loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(300);
|
||||
format %{ "j$cop,u $labl\t# loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - using unsigned comparison
|
||||
instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
match(If cop cmp);
|
||||
|
@ -49,6 +49,7 @@ const char *BytecodeInterpreter::name_of_field_at_address(address addr) {
|
||||
DO(_locals);
|
||||
DO(_constants);
|
||||
DO(_method);
|
||||
DO(_mirror);
|
||||
DO(_mdx);
|
||||
DO(_stack);
|
||||
DO(_msg);
|
||||
@ -77,6 +78,7 @@ void BytecodeInterpreter::layout_interpreterState(interpreterState istate,
|
||||
bool is_top_frame) {
|
||||
istate->set_locals(locals);
|
||||
istate->set_method(method);
|
||||
istate->set_mirror(method->method_holder()->java_mirror());
|
||||
istate->set_self_link(istate);
|
||||
istate->set_prev_link(NULL);
|
||||
// thread will be set by a hacky repurposing of frame::patch_pc()
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2007, 2008, 2011 Red Hat, Inc.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -44,6 +44,9 @@
|
||||
inline void set_method(Method* new_method) {
|
||||
_method = new_method;
|
||||
}
|
||||
inline void set_mirror(oop new_mirror) {
|
||||
_mirror = new_mirror;
|
||||
}
|
||||
inline interpreterState self_link() {
|
||||
return _self_link;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -42,22 +42,6 @@
|
||||
#include "utilities/events.hpp"
|
||||
|
||||
|
||||
// Release the CompiledICHolder* associated with this call site is there is one.
|
||||
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
if (is_icholder_entry(call->destination())) {
|
||||
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
|
||||
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
|
||||
}
|
||||
}
|
||||
|
||||
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
|
||||
// This call site might have become stale so inspect it carefully.
|
||||
NativeCall* call = nativeCall_at(call_site->addr());
|
||||
return is_icholder_entry(call->destination());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
|
||||
|
@ -221,9 +221,16 @@ void CppInterpreter::main_loop(int recurse, TRAPS) {
|
||||
// Push our result
|
||||
for (int i = 0; i < result_slots; i++) {
|
||||
// Adjust result to smaller
|
||||
intptr_t res = result[-i];
|
||||
union {
|
||||
intptr_t res;
|
||||
jint res_jint;
|
||||
};
|
||||
res = result[-i];
|
||||
if (result_slots == 1) {
|
||||
res = narrow(method->result_type(), res);
|
||||
BasicType t = method->result_type();
|
||||
if (is_subword_type(t)) {
|
||||
res_jint = (jint)narrow(t, res_jint);
|
||||
}
|
||||
}
|
||||
stack->push(res);
|
||||
}
|
||||
@ -748,6 +755,7 @@ InterpreterFrame *InterpreterFrame::build(Method* const method, TRAPS) {
|
||||
|
||||
istate->set_locals(locals);
|
||||
istate->set_method(method);
|
||||
istate->set_mirror(method->method_holder()->java_mirror());
|
||||
istate->set_self_link(istate);
|
||||
istate->set_prev_link(NULL);
|
||||
istate->set_thread(thread);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -115,6 +115,10 @@ inline Method** frame::interpreter_frame_method_addr() const {
|
||||
return &(get_interpreterState()->_method);
|
||||
}
|
||||
|
||||
inline oop* frame::interpreter_frame_mirror_addr() const {
|
||||
return &(get_interpreterState()->_mirror);
|
||||
}
|
||||
|
||||
inline intptr_t* frame::interpreter_frame_mdp_addr() const {
|
||||
return (intptr_t*) &(get_interpreterState()->_mdx);
|
||||
}
|
||||
|
@ -43,7 +43,12 @@ define_pd_global(intx, CodeEntryAlignment, 32);
|
||||
define_pd_global(intx, OptoLoopAlignment, 16);
|
||||
define_pd_global(intx, InlineFrequencyCount, 100);
|
||||
define_pd_global(intx, InlineSmallCode, 1000);
|
||||
define_pd_global(intx, InitArrayShortSize, -1); // not used
|
||||
|
||||
// not used, but must satisfy following constraints:
|
||||
// 1.) <VALUE> must be in the allowed range for intx *and*
|
||||
// 2.) <VALUE> % BytesPerLong == 0 so as to not
|
||||
// violate the constraint verifier on JVM start-up.
|
||||
define_pd_global(intx, InitArrayShortSize, 0);
|
||||
|
||||
#define DEFAULT_STACK_YELLOW_PAGES (2)
|
||||
#define DEFAULT_STACK_RED_PAGES (1)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -85,6 +85,12 @@ public class SymbolTable extends sun.jvm.hotspot.utilities.Hashtable {
|
||||
tables. */
|
||||
public Symbol probe(byte[] name) {
|
||||
long hashValue = hashSymbol(name);
|
||||
|
||||
Symbol s = sharedTable.probe(name, hashValue);
|
||||
if (s != null) {
|
||||
return s;
|
||||
}
|
||||
|
||||
for (HashtableEntry e = (HashtableEntry) bucket(hashToIndex(hashValue)); e != null; e = (HashtableEntry) e.next()) {
|
||||
if (e.hash() == hashValue) {
|
||||
Symbol sym = Symbol.create(e.literalValue());
|
||||
@ -94,7 +100,7 @@ public class SymbolTable extends sun.jvm.hotspot.utilities.Hashtable {
|
||||
}
|
||||
}
|
||||
|
||||
return sharedTable.probe(name, hashValue);
|
||||
return null;
|
||||
}
|
||||
|
||||
public interface SymbolVisitor {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2016 Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -46,6 +46,11 @@ public class BreakpointInfo extends VMObject {
|
||||
}
|
||||
|
||||
private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
|
||||
if (!VM.getVM().isJvmtiSupported()) {
|
||||
// no BreakpointInfo support without JVMTI
|
||||
return;
|
||||
}
|
||||
|
||||
Type type = db.lookupType("BreakpointInfo");
|
||||
|
||||
origBytecodeField = type.getCIntegerField("_orig_bytecode");
|
||||
|
@ -85,7 +85,9 @@ public class InstanceKlass extends Klass {
|
||||
isMarkedDependent = new CIntField(type.getCIntegerField("_is_marked_dependent"), 0);
|
||||
initState = new CIntField(type.getCIntegerField("_init_state"), 0);
|
||||
itableLen = new CIntField(type.getCIntegerField("_itable_len"), 0);
|
||||
breakpoints = type.getAddressField("_breakpoints");
|
||||
if (VM.getVM().isJvmtiSupported()) {
|
||||
breakpoints = type.getAddressField("_breakpoints");
|
||||
}
|
||||
genericSignatureIndex = new CIntField(type.getCIntegerField("_generic_signature_index"), 0);
|
||||
majorVersion = new CIntField(type.getCIntegerField("_major_version"), 0);
|
||||
minorVersion = new CIntField(type.getCIntegerField("_minor_version"), 0);
|
||||
@ -837,6 +839,9 @@ public class InstanceKlass extends Klass {
|
||||
|
||||
/** Breakpoint support (see methods on Method* for details) */
|
||||
public BreakpointInfo getBreakpoints() {
|
||||
if (!VM.getVM().isJvmtiSupported()) {
|
||||
return null;
|
||||
}
|
||||
Address addr = getAddress().getAddressAt(breakpoints.getOffset());
|
||||
return (BreakpointInfo) VMObjectFactory.newObject(BreakpointInfo.class, addr);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -87,6 +87,8 @@ public class VM {
|
||||
private StubRoutines stubRoutines;
|
||||
private Bytes bytes;
|
||||
|
||||
/** Flag indicating if JVMTI support is included in the build */
|
||||
private boolean isJvmtiSupported;
|
||||
/** Flags indicating whether we are attached to a core, C1, or C2 build */
|
||||
private boolean usingClientCompiler;
|
||||
private boolean usingServerCompiler;
|
||||
@ -336,6 +338,16 @@ public class VM {
|
||||
stackBias = db.lookupIntConstant("STACK_BIAS").intValue();
|
||||
invocationEntryBCI = db.lookupIntConstant("InvocationEntryBci").intValue();
|
||||
|
||||
// We infer the presence of JVMTI from the presence of the InstanceKlass::_breakpoints field.
|
||||
{
|
||||
Type type = db.lookupType("InstanceKlass");
|
||||
if (type.getField("_breakpoints", false, false) == null) {
|
||||
isJvmtiSupported = false;
|
||||
} else {
|
||||
isJvmtiSupported = true;
|
||||
}
|
||||
}
|
||||
|
||||
// We infer the presence of C1 or C2 from a couple of fields we
|
||||
// already have present in the type database
|
||||
{
|
||||
@ -701,6 +713,11 @@ public class VM {
|
||||
return isBigEndian;
|
||||
}
|
||||
|
||||
/** Returns true if JVMTI is supported, false otherwise */
|
||||
public boolean isJvmtiSupported() {
|
||||
return isJvmtiSupported;
|
||||
}
|
||||
|
||||
/** Returns true if this is a "core" build, false if either C1 or C2
|
||||
is present */
|
||||
public boolean isCore() {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -44,21 +44,23 @@ public class CompactHashTable extends VMObject {
|
||||
Type type = db.lookupType("SymbolCompactHashTable");
|
||||
baseAddressField = type.getAddressField("_base_address");
|
||||
bucketCountField = type.getCIntegerField("_bucket_count");
|
||||
tableEndOffsetField = type.getCIntegerField("_table_end_offset");
|
||||
entryCountField = type.getCIntegerField("_entry_count");
|
||||
bucketsField = type.getAddressField("_buckets");
|
||||
uintSize = db.lookupType("juint").getSize();
|
||||
entriesField = type.getAddressField("_entries");
|
||||
uintSize = db.lookupType("u4").getSize();
|
||||
}
|
||||
|
||||
// Fields
|
||||
private static CIntegerField bucketCountField;
|
||||
private static CIntegerField tableEndOffsetField;
|
||||
private static CIntegerField entryCountField;
|
||||
private static AddressField baseAddressField;
|
||||
private static AddressField bucketsField;
|
||||
private static AddressField entriesField;
|
||||
private static long uintSize;
|
||||
|
||||
private static int BUCKET_OFFSET_MASK = 0x3FFFFFFF;
|
||||
private static int BUCKET_TYPE_SHIFT = 30;
|
||||
private static int COMPACT_BUCKET_TYPE = 1;
|
||||
private static int VALUE_ONLY_BUCKET_TYPE = 1;
|
||||
|
||||
public CompactHashTable(Address addr) {
|
||||
super(addr);
|
||||
@ -68,12 +70,8 @@ public class CompactHashTable extends VMObject {
|
||||
return (int)bucketCountField.getValue(addr);
|
||||
}
|
||||
|
||||
private int tableEndOffset() {
|
||||
return (int)tableEndOffsetField.getValue(addr);
|
||||
}
|
||||
|
||||
private boolean isCompactBucket(int bucket_info) {
|
||||
return (bucket_info >> BUCKET_TYPE_SHIFT) == COMPACT_BUCKET_TYPE;
|
||||
private boolean isValueOnlyBucket(int bucket_info) {
|
||||
return (bucket_info >> BUCKET_TYPE_SHIFT) == VALUE_ONLY_BUCKET_TYPE;
|
||||
}
|
||||
|
||||
private int bucketOffset(int bucket_info) {
|
||||
@ -81,9 +79,8 @@ public class CompactHashTable extends VMObject {
|
||||
}
|
||||
|
||||
public Symbol probe(byte[] name, long hash) {
|
||||
|
||||
if (bucketCount() == 0) {
|
||||
// The table is invalid, so don't try to lookup
|
||||
if (bucketCount() <= 0) {
|
||||
// This CompactHashTable is not in use
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -91,34 +88,33 @@ public class CompactHashTable extends VMObject {
|
||||
Symbol sym;
|
||||
Address baseAddress = baseAddressField.getValue(addr);
|
||||
Address bucket = bucketsField.getValue(addr);
|
||||
Address bucketEnd = bucket;
|
||||
long index = hash % bucketCount();
|
||||
int bucketInfo = (int)bucket.getCIntegerAt(index * uintSize, uintSize, true);
|
||||
int bucketOffset = bucketOffset(bucketInfo);
|
||||
int nextBucketInfo = (int)bucket.getCIntegerAt((index+1) * uintSize, uintSize, true);
|
||||
int nextBucketOffset = bucketOffset(nextBucketInfo);
|
||||
|
||||
bucket = bucket.addOffsetTo(bucketOffset * uintSize);
|
||||
Address entry = entriesField.getValue(addr).addOffsetTo(bucketOffset * uintSize);
|
||||
|
||||
if (isCompactBucket(bucketInfo)) {
|
||||
symOffset = bucket.getCIntegerAt(0, uintSize, true);
|
||||
if (isValueOnlyBucket(bucketInfo)) {
|
||||
symOffset = entry.getCIntegerAt(0, uintSize, true);
|
||||
sym = Symbol.create(baseAddress.addOffsetTo(symOffset));
|
||||
if (sym.equals(name)) {
|
||||
return sym;
|
||||
}
|
||||
} else {
|
||||
bucketEnd = bucket.addOffsetTo(nextBucketOffset * uintSize);
|
||||
while (bucket.lessThan(bucketEnd)) {
|
||||
long symHash = bucket.getCIntegerAt(0, uintSize, true);
|
||||
Address entryMax = entriesField.getValue(addr).addOffsetTo(nextBucketOffset * uintSize);
|
||||
while (entry.lessThan(entryMax)) {
|
||||
long symHash = entry.getCIntegerAt(0, uintSize, true);
|
||||
if (symHash == hash) {
|
||||
symOffset = bucket.getCIntegerAt(uintSize, uintSize, true);
|
||||
symOffset = entry.getCIntegerAt(uintSize, uintSize, true);
|
||||
Address symAddr = baseAddress.addOffsetTo(symOffset);
|
||||
sym = Symbol.create(symAddr);
|
||||
if (sym.equals(name)) {
|
||||
return sym;
|
||||
}
|
||||
}
|
||||
bucket = bucket.addOffsetTo(2 * uintSize);
|
||||
entry = entry.addOffsetTo(2 * uintSize);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
@ -22,7 +22,7 @@
|
||||
*/
|
||||
package jdk.vm.ci.common;
|
||||
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
/**
|
||||
* Utilities for operating on raw memory with {@link Unsafe}.
|
||||
|
@ -39,7 +39,7 @@ import jdk.vm.ci.inittimer.InitTimer;
|
||||
import jdk.vm.ci.meta.JavaType;
|
||||
import jdk.vm.ci.meta.ResolvedJavaMethod;
|
||||
import jdk.vm.ci.meta.ResolvedJavaType;
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
/**
|
||||
* Calls from Java into HotSpot. The behavior of all the methods in this class that take a native
|
||||
|
@ -120,7 +120,9 @@ public class HotSpotCodeCacheProvider implements CodeCacheProvider {
|
||||
resultInstalledCode = installedCode;
|
||||
}
|
||||
|
||||
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, (HotSpotSpeculationLog) log);
|
||||
HotSpotSpeculationLog speculationLog = (log != null && log.hasSpeculations()) ? (HotSpotSpeculationLog) log : null;
|
||||
|
||||
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, speculationLog);
|
||||
if (result != config.codeInstallResultOk) {
|
||||
String resultDesc = config.getCodeInstallResultDescription(result);
|
||||
if (compiledCode instanceof HotSpotCompiledNmethod) {
|
||||
|
@ -25,7 +25,7 @@ package jdk.vm.ci.hotspot;
|
||||
import static jdk.vm.ci.hotspot.UnsafeAccess.UNSAFE;
|
||||
import jdk.vm.ci.code.InstalledCode;
|
||||
import jdk.vm.ci.inittimer.SuppressFBWarnings;
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
/**
|
||||
* Implementation of {@link InstalledCode} for HotSpot.
|
||||
|
@ -30,7 +30,7 @@ import jdk.vm.ci.meta.JavaKind;
|
||||
import jdk.vm.ci.meta.JavaType;
|
||||
import jdk.vm.ci.meta.ResolvedJavaType;
|
||||
import jdk.vm.ci.runtime.JVMCIRuntime;
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
//JaCoCo Exclude
|
||||
|
||||
|
@ -39,7 +39,7 @@ import jdk.vm.ci.meta.JavaTypeProfile.ProfiledType;
|
||||
import jdk.vm.ci.meta.ResolvedJavaMethod;
|
||||
import jdk.vm.ci.meta.ResolvedJavaType;
|
||||
import jdk.vm.ci.meta.TriState;
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
/**
|
||||
* Access to a HotSpot MethodData structure (defined in methodData.hpp).
|
||||
|
@ -38,7 +38,7 @@ public class HotSpotSpeculationLog implements SpeculationLog {
|
||||
/** All speculations that have been a deoptimization reason. */
|
||||
private Set<SpeculationReason> failedSpeculations;
|
||||
|
||||
/** Strong references to all reasons embededded in the current nmethod. */
|
||||
/** Strong references to all reasons embedded in the current nmethod. */
|
||||
private volatile Collection<SpeculationReason> speculations;
|
||||
|
||||
@Override
|
||||
@ -81,4 +81,9 @@ public class HotSpotSpeculationLog implements SpeculationLog {
|
||||
|
||||
return HotSpotObjectConstantImpl.forObject(reason);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean hasSpeculations() {
|
||||
return speculations != null && !speculations.isEmpty();
|
||||
}
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ import jdk.vm.ci.hotspotvmconfig.HotSpotVMData;
|
||||
import jdk.vm.ci.hotspotvmconfig.HotSpotVMField;
|
||||
import jdk.vm.ci.hotspotvmconfig.HotSpotVMFlag;
|
||||
import jdk.vm.ci.hotspotvmconfig.HotSpotVMType;
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
//JaCoCo Exclude
|
||||
|
||||
|
@ -40,7 +40,7 @@ import jdk.internal.org.objectweb.asm.Label;
|
||||
import jdk.internal.org.objectweb.asm.MethodVisitor;
|
||||
import jdk.internal.org.objectweb.asm.Opcodes;
|
||||
import jdk.internal.org.objectweb.asm.Type;
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
/**
|
||||
* A {@link ClassVisitor} that verifies {@link HotSpotVMConfig} does not access {@link Unsafe} from
|
||||
|
@ -24,7 +24,7 @@ package jdk.vm.ci.hotspot;
|
||||
|
||||
import static jdk.vm.ci.hotspot.HotSpotJVMCIRuntime.runtime;
|
||||
import static jdk.vm.ci.hotspot.UnsafeAccess.UNSAFE;
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
/**
|
||||
* Class to access the C++ {@code vmSymbols} table.
|
||||
|
@ -24,7 +24,7 @@ package jdk.vm.ci.hotspot;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
import sun.misc.Unsafe;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
/**
|
||||
* Package private access to the {@link Unsafe} capability.
|
||||
|
@ -56,4 +56,11 @@ public interface SpeculationLog {
|
||||
* argument to the deoptimization function.
|
||||
*/
|
||||
JavaConstant speculate(SpeculationReason reason);
|
||||
|
||||
/**
|
||||
* Returns if this log has speculations.
|
||||
*
|
||||
* @return true if there are speculations, false otherwise
|
||||
*/
|
||||
boolean hasSpeculations();
|
||||
}
|
||||
|
@ -24,9 +24,6 @@
|
||||
*/
|
||||
|
||||
module jdk.vm.ci {
|
||||
// 8153756
|
||||
requires jdk.unsupported;
|
||||
|
||||
uses jdk.vm.ci.hotspot.HotSpotVMEventListener;
|
||||
uses jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
|
||||
uses jdk.vm.ci.runtime.JVMCICompilerFactory;
|
||||
|
@ -4875,3 +4875,16 @@ bool os::start_debugging(char *buf, int buflen) {
|
||||
}
|
||||
return yes;
|
||||
}
|
||||
|
||||
static inline time_t get_mtime(const char* filename) {
|
||||
struct stat st;
|
||||
int ret = os::stat(filename, &st);
|
||||
assert(ret == 0, "failed to stat() file '%s': %s", filename, strerror(errno));
|
||||
return st.st_mtime;
|
||||
}
|
||||
|
||||
int os::compare_file_modified_times(const char* file1, const char* file2) {
|
||||
time_t t1 = get_mtime(file1);
|
||||
time_t t2 = get_mtime(file2);
|
||||
return t1 - t2;
|
||||
}
|
||||
|
@ -2013,8 +2013,8 @@ void os::print_os_info(outputStream* st) {
|
||||
// their own specific XXX-release file as well as a redhat-release file.
|
||||
// Because of this the XXX-release file needs to be searched for before the
|
||||
// redhat-release file.
|
||||
// Since Red Hat has a lsb-release file that is not very descriptive the
|
||||
// search for redhat-release needs to be before lsb-release.
|
||||
// Since Red Hat and SuSE have an lsb-release file that is not very descriptive the
|
||||
// search for redhat-release / SuSE-release needs to be before lsb-release.
|
||||
// Since the lsb-release file is the new standard it needs to be searched
|
||||
// before the older style release files.
|
||||
// Searching system-release (Red Hat) and os-release (other Linuxes) are a
|
||||
@ -2031,8 +2031,8 @@ const char* distro_files[] = {
|
||||
"/etc/mandrake-release",
|
||||
"/etc/sun-release",
|
||||
"/etc/redhat-release",
|
||||
"/etc/lsb-release",
|
||||
"/etc/SuSE-release",
|
||||
"/etc/lsb-release",
|
||||
"/etc/turbolinux-release",
|
||||
"/etc/gentoo-release",
|
||||
"/etc/ltib-release",
|
||||
@ -2062,14 +2062,11 @@ void os::Linux::print_distro_info(outputStream* st) {
|
||||
st->cr();
|
||||
}
|
||||
|
||||
static void parse_os_info(char* distro, size_t length, const char* file) {
|
||||
FILE* fp = fopen(file, "r");
|
||||
if (fp != NULL) {
|
||||
char buf[256];
|
||||
// get last line of the file.
|
||||
while (fgets(buf, sizeof(buf), fp)) { }
|
||||
// Edit out extra stuff in expected ubuntu format
|
||||
if (strstr(buf, "DISTRIB_DESCRIPTION=") != NULL) {
|
||||
static void parse_os_info_helper(FILE* fp, char* distro, size_t length, bool get_first_line) {
|
||||
char buf[256];
|
||||
while (fgets(buf, sizeof(buf), fp)) {
|
||||
// Edit out extra stuff in expected format
|
||||
if (strstr(buf, "DISTRIB_DESCRIPTION=") != NULL || strstr(buf, "PRETTY_NAME=") != NULL) {
|
||||
char* ptr = strstr(buf, "\""); // the name is in quotes
|
||||
if (ptr != NULL) {
|
||||
ptr++; // go beyond first quote
|
||||
@ -2083,13 +2080,26 @@ static void parse_os_info(char* distro, size_t length, const char* file) {
|
||||
if (nl != NULL) *nl = '\0';
|
||||
strncpy(distro, ptr, length);
|
||||
}
|
||||
} else {
|
||||
// if not in expected Ubuntu format, print out whole line minus \n
|
||||
return;
|
||||
} else if (get_first_line) {
|
||||
char* nl = strchr(buf, '\n');
|
||||
if (nl != NULL) *nl = '\0';
|
||||
strncpy(distro, buf, length);
|
||||
return;
|
||||
}
|
||||
// close distro file
|
||||
}
|
||||
// print last line and close
|
||||
char* nl = strchr(buf, '\n');
|
||||
if (nl != NULL) *nl = '\0';
|
||||
strncpy(distro, buf, length);
|
||||
}
|
||||
|
||||
static void parse_os_info(char* distro, size_t length, const char* file) {
|
||||
FILE* fp = fopen(file, "r");
|
||||
if (fp != NULL) {
|
||||
// if suse format, print out first line
|
||||
bool get_first_line = (strcmp(file, "/etc/SuSE-release") == 0);
|
||||
parse_os_info_helper(fp, distro, length, get_first_line);
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
@ -3041,6 +3051,48 @@ static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
|
||||
return addr == MAP_FAILED ? NULL : addr;
|
||||
}
|
||||
|
||||
// Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address
|
||||
// (req_addr != NULL) or with a given alignment.
|
||||
// - bytes shall be a multiple of alignment.
|
||||
// - req_addr can be NULL. If not NULL, it must be a multiple of alignment.
|
||||
// - alignment sets the alignment at which memory shall be allocated.
|
||||
// It must be a multiple of allocation granularity.
|
||||
// Returns address of memory or NULL. If req_addr was not NULL, will only return
|
||||
// req_addr or NULL.
|
||||
static char* anon_mmap_aligned(size_t bytes, size_t alignment, char* req_addr) {
|
||||
|
||||
size_t extra_size = bytes;
|
||||
if (req_addr == NULL && alignment > 0) {
|
||||
extra_size += alignment;
|
||||
}
|
||||
|
||||
char* start = (char*) ::mmap(req_addr, extra_size, PROT_NONE,
|
||||
MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE,
|
||||
-1, 0);
|
||||
if (start == MAP_FAILED) {
|
||||
start = NULL;
|
||||
} else {
|
||||
if (req_addr != NULL) {
|
||||
if (start != req_addr) {
|
||||
::munmap(start, extra_size);
|
||||
start = NULL;
|
||||
}
|
||||
} else {
|
||||
char* const start_aligned = (char*) align_ptr_up(start, alignment);
|
||||
char* const end_aligned = start_aligned + bytes;
|
||||
char* const end = start + extra_size;
|
||||
if (start_aligned > start) {
|
||||
::munmap(start, start_aligned - start);
|
||||
}
|
||||
if (end_aligned < end) {
|
||||
::munmap(end_aligned, end - end_aligned);
|
||||
}
|
||||
start = start_aligned;
|
||||
}
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
static int anon_munmap(char * addr, size_t size) {
|
||||
return ::munmap(addr, size) == 0;
|
||||
}
|
||||
@ -3317,29 +3369,113 @@ void os::large_page_init() {
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
#define shm_warning_format(format, ...) \
|
||||
do { \
|
||||
if (UseLargePages && \
|
||||
(!FLAG_IS_DEFAULT(UseLargePages) || \
|
||||
!FLAG_IS_DEFAULT(UseSHM) || \
|
||||
!FLAG_IS_DEFAULT(LargePageSizeInBytes))) { \
|
||||
warning(format, __VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define shm_warning(str) shm_warning_format("%s", str)
|
||||
|
||||
#define shm_warning_with_errno(str) \
|
||||
do { \
|
||||
int err = errno; \
|
||||
shm_warning_format(str " (error = %d)", err); \
|
||||
} while (0)
|
||||
|
||||
static char* shmat_with_alignment(int shmid, size_t bytes, size_t alignment) {
|
||||
assert(is_size_aligned(bytes, alignment), "Must be divisible by the alignment");
|
||||
|
||||
if (!is_size_aligned(alignment, SHMLBA)) {
|
||||
assert(false, "Code below assumes that alignment is at least SHMLBA aligned");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// To ensure that we get 'alignment' aligned memory from shmat,
|
||||
// we pre-reserve aligned virtual memory and then attach to that.
|
||||
|
||||
char* pre_reserved_addr = anon_mmap_aligned(bytes, alignment, NULL);
|
||||
if (pre_reserved_addr == NULL) {
|
||||
// Couldn't pre-reserve aligned memory.
|
||||
shm_warning("Failed to pre-reserve aligned memory for shmat.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// SHM_REMAP is needed to allow shmat to map over an existing mapping.
|
||||
char* addr = (char*)shmat(shmid, pre_reserved_addr, SHM_REMAP);
|
||||
|
||||
if ((intptr_t)addr == -1) {
|
||||
int err = errno;
|
||||
shm_warning_with_errno("Failed to attach shared memory.");
|
||||
|
||||
assert(err != EACCES, "Unexpected error");
|
||||
assert(err != EIDRM, "Unexpected error");
|
||||
assert(err != EINVAL, "Unexpected error");
|
||||
|
||||
// Since we don't know if the kernel unmapped the pre-reserved memory area
|
||||
// we can't unmap it, since that would potentially unmap memory that was
|
||||
// mapped from other threads.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
static char* shmat_at_address(int shmid, char* req_addr) {
|
||||
if (!is_ptr_aligned(req_addr, SHMLBA)) {
|
||||
assert(false, "Requested address needs to be SHMLBA aligned");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char* addr = (char*)shmat(shmid, req_addr, 0);
|
||||
|
||||
if ((intptr_t)addr == -1) {
|
||||
shm_warning_with_errno("Failed to attach shared memory.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
static char* shmat_large_pages(int shmid, size_t bytes, size_t alignment, char* req_addr) {
|
||||
// If a req_addr has been provided, we assume that the caller has already aligned the address.
|
||||
if (req_addr != NULL) {
|
||||
assert(is_ptr_aligned(req_addr, os::large_page_size()), "Must be divisible by the large page size");
|
||||
assert(is_ptr_aligned(req_addr, alignment), "Must be divisible by given alignment");
|
||||
return shmat_at_address(shmid, req_addr);
|
||||
}
|
||||
|
||||
// Since shmid has been setup with SHM_HUGETLB, shmat will automatically
|
||||
// return large page size aligned memory addresses when req_addr == NULL.
|
||||
// However, if the alignment is larger than the large page size, we have
|
||||
// to manually ensure that the memory returned is 'alignment' aligned.
|
||||
if (alignment > os::large_page_size()) {
|
||||
assert(is_size_aligned(alignment, os::large_page_size()), "Must be divisible by the large page size");
|
||||
return shmat_with_alignment(shmid, bytes, alignment);
|
||||
} else {
|
||||
return shmat_at_address(shmid, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment,
|
||||
char* req_addr, bool exec) {
|
||||
// "exec" is passed in but not used. Creating the shared image for
|
||||
// the code cache doesn't have an SHM_X executable permission to check.
|
||||
assert(UseLargePages && UseSHM, "only for SHM large pages");
|
||||
assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
|
||||
assert(is_ptr_aligned(req_addr, alignment), "Unaligned address");
|
||||
|
||||
if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) {
|
||||
if (!is_size_aligned(bytes, os::large_page_size())) {
|
||||
return NULL; // Fallback to small pages.
|
||||
}
|
||||
|
||||
key_t key = IPC_PRIVATE;
|
||||
char *addr;
|
||||
|
||||
bool warn_on_failure = UseLargePages &&
|
||||
(!FLAG_IS_DEFAULT(UseLargePages) ||
|
||||
!FLAG_IS_DEFAULT(UseSHM) ||
|
||||
!FLAG_IS_DEFAULT(LargePageSizeInBytes));
|
||||
char msg[128];
|
||||
|
||||
// Create a large shared memory region to attach to based on size.
|
||||
// Currently, size is the total size of the heap
|
||||
int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
|
||||
// Currently, size is the total size of the heap.
|
||||
int shmid = shmget(IPC_PRIVATE, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
|
||||
if (shmid == -1) {
|
||||
// Possible reasons for shmget failure:
|
||||
// 1. shmmax is too small for Java heap.
|
||||
@ -3355,16 +3491,12 @@ char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment,
|
||||
// they are so fragmented after a long run that they can't
|
||||
// coalesce into large pages. Try to reserve large pages when
|
||||
// the system is still "fresh".
|
||||
if (warn_on_failure) {
|
||||
jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno);
|
||||
warning("%s", msg);
|
||||
}
|
||||
shm_warning_with_errno("Failed to reserve shared memory.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// attach to the region
|
||||
addr = (char*)shmat(shmid, req_addr, 0);
|
||||
int err = errno;
|
||||
// Attach to the region.
|
||||
char* addr = shmat_large_pages(shmid, bytes, alignment, req_addr);
|
||||
|
||||
// Remove shmid. If shmat() is successful, the actual shared memory segment
|
||||
// will be deleted when it's detached by shmdt() or when the process
|
||||
@ -3372,14 +3504,6 @@ char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment,
|
||||
// segment immediately.
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
|
||||
if ((intptr_t)addr == -1) {
|
||||
if (warn_on_failure) {
|
||||
jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err);
|
||||
warning("%s", msg);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
@ -3422,50 +3546,6 @@ char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes,
|
||||
return addr;
|
||||
}
|
||||
|
||||
// Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed().
|
||||
// Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address
|
||||
// (req_addr != NULL) or with a given alignment.
|
||||
// - bytes shall be a multiple of alignment.
|
||||
// - req_addr can be NULL. If not NULL, it must be a multiple of alignment.
|
||||
// - alignment sets the alignment at which memory shall be allocated.
|
||||
// It must be a multiple of allocation granularity.
|
||||
// Returns address of memory or NULL. If req_addr was not NULL, will only return
|
||||
// req_addr or NULL.
|
||||
static char* anon_mmap_aligned(size_t bytes, size_t alignment, char* req_addr) {
|
||||
|
||||
size_t extra_size = bytes;
|
||||
if (req_addr == NULL && alignment > 0) {
|
||||
extra_size += alignment;
|
||||
}
|
||||
|
||||
char* start = (char*) ::mmap(req_addr, extra_size, PROT_NONE,
|
||||
MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE,
|
||||
-1, 0);
|
||||
if (start == MAP_FAILED) {
|
||||
start = NULL;
|
||||
} else {
|
||||
if (req_addr != NULL) {
|
||||
if (start != req_addr) {
|
||||
::munmap(start, extra_size);
|
||||
start = NULL;
|
||||
}
|
||||
} else {
|
||||
char* const start_aligned = (char*) align_ptr_up(start, alignment);
|
||||
char* const end_aligned = start_aligned + bytes;
|
||||
char* const end = start + extra_size;
|
||||
if (start_aligned > start) {
|
||||
::munmap(start, start_aligned - start);
|
||||
}
|
||||
if (end_aligned < end) {
|
||||
::munmap(end_aligned, end - end_aligned);
|
||||
}
|
||||
start = start_aligned;
|
||||
}
|
||||
}
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
// Reserve memory using mmap(MAP_HUGETLB).
|
||||
// - bytes shall be a multiple of alignment.
|
||||
// - req_addr can be NULL. If not NULL, it must be a multiple of alignment.
|
||||
|
@ -181,8 +181,8 @@ int os::log_vsnprintf(char* buf, size_t len, const char* fmt, va_list args) {
|
||||
return vsnprintf(buf, len, fmt, args);
|
||||
}
|
||||
|
||||
int os::fileno(FILE* fp) {
|
||||
return ::fileno(fp);
|
||||
int os::get_fileno(FILE* fp) {
|
||||
return NOT_AIX(::)fileno(fp);
|
||||
}
|
||||
|
||||
void os::Posix::print_load_average(outputStream* st) {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user