Merge
This commit is contained in:
commit
b6ac98452d
@ -3425,9 +3425,6 @@ const bool Matcher::misaligned_vectors_ok() {
|
|||||||
// false => size gets scaled to BytesPerLong, ok.
|
// false => size gets scaled to BytesPerLong, ok.
|
||||||
const bool Matcher::init_array_count_is_in_bytes = false;
|
const bool Matcher::init_array_count_is_in_bytes = false;
|
||||||
|
|
||||||
// Threshold size for cleararray.
|
|
||||||
const int Matcher::init_array_short_size = 18 * BytesPerLong;
|
|
||||||
|
|
||||||
// Use conditional move (CMOVL)
|
// Use conditional move (CMOVL)
|
||||||
const int Matcher::long_cmove_cost() {
|
const int Matcher::long_cmove_cost() {
|
||||||
// long cmoves are no more expensive than int cmoves
|
// long cmoves are no more expensive than int cmoves
|
||||||
@ -4135,14 +4132,14 @@ encode %{
|
|||||||
MacroAssembler _masm(&cbuf);
|
MacroAssembler _masm(&cbuf);
|
||||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||||
&Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
|
Assembler::xword, /*acquire*/ false, /*release*/ true);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||||
MacroAssembler _masm(&cbuf);
|
MacroAssembler _masm(&cbuf);
|
||||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||||
&Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
|
Assembler::word, /*acquire*/ false, /*release*/ true);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
|
||||||
@ -4154,14 +4151,14 @@ encode %{
|
|||||||
MacroAssembler _masm(&cbuf);
|
MacroAssembler _masm(&cbuf);
|
||||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||||
&Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
|
Assembler::xword, /*acquire*/ true, /*release*/ true);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||||
MacroAssembler _masm(&cbuf);
|
MacroAssembler _masm(&cbuf);
|
||||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||||
&Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
|
Assembler::word, /*acquire*/ true, /*release*/ true);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
|
||||||
@ -4679,8 +4676,14 @@ encode %{
|
|||||||
|
|
||||||
// Compare object markOop with mark and if equal exchange scratch1
|
// Compare object markOop with mark and if equal exchange scratch1
|
||||||
// with object markOop.
|
// with object markOop.
|
||||||
{
|
if (UseLSE) {
|
||||||
|
__ mov(tmp, disp_hdr);
|
||||||
|
__ casal(Assembler::xword, tmp, box, oop);
|
||||||
|
__ cmp(tmp, disp_hdr);
|
||||||
|
__ br(Assembler::EQ, cont);
|
||||||
|
} else {
|
||||||
Label retry_load;
|
Label retry_load;
|
||||||
|
__ prfm(Address(oop), PSTL1STRM);
|
||||||
__ bind(retry_load);
|
__ bind(retry_load);
|
||||||
__ ldaxr(tmp, oop);
|
__ ldaxr(tmp, oop);
|
||||||
__ cmp(tmp, disp_hdr);
|
__ cmp(tmp, disp_hdr);
|
||||||
@ -4729,8 +4732,13 @@ encode %{
|
|||||||
__ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
|
__ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
|
||||||
__ mov(disp_hdr, zr);
|
__ mov(disp_hdr, zr);
|
||||||
|
|
||||||
{
|
if (UseLSE) {
|
||||||
|
__ mov(rscratch1, disp_hdr);
|
||||||
|
__ casal(Assembler::xword, rscratch1, rthread, tmp);
|
||||||
|
__ cmp(rscratch1, disp_hdr);
|
||||||
|
} else {
|
||||||
Label retry_load, fail;
|
Label retry_load, fail;
|
||||||
|
__ prfm(Address(tmp), PSTL1STRM);
|
||||||
__ bind(retry_load);
|
__ bind(retry_load);
|
||||||
__ ldaxr(rscratch1, tmp);
|
__ ldaxr(rscratch1, tmp);
|
||||||
__ cmp(disp_hdr, rscratch1);
|
__ cmp(disp_hdr, rscratch1);
|
||||||
@ -4818,8 +4826,13 @@ encode %{
|
|||||||
// see the stack address of the basicLock in the markOop of the
|
// see the stack address of the basicLock in the markOop of the
|
||||||
// object.
|
// object.
|
||||||
|
|
||||||
{
|
if (UseLSE) {
|
||||||
|
__ mov(tmp, box);
|
||||||
|
__ casl(Assembler::xword, tmp, disp_hdr, oop);
|
||||||
|
__ cmp(tmp, box);
|
||||||
|
} else {
|
||||||
Label retry_load;
|
Label retry_load;
|
||||||
|
__ prfm(Address(oop), PSTL1STRM);
|
||||||
__ bind(retry_load);
|
__ bind(retry_load);
|
||||||
__ ldxr(tmp, oop);
|
__ ldxr(tmp, oop);
|
||||||
__ cmp(box, tmp);
|
__ cmp(box, tmp);
|
||||||
|
@ -972,7 +972,7 @@ public:
|
|||||||
|
|
||||||
// System
|
// System
|
||||||
void system(int op0, int op1, int CRn, int CRm, int op2,
|
void system(int op0, int op1, int CRn, int CRm, int op2,
|
||||||
Register rt = (Register)0b11111)
|
Register rt = dummy_reg)
|
||||||
{
|
{
|
||||||
starti;
|
starti;
|
||||||
f(0b11010101000, 31, 21);
|
f(0b11010101000, 31, 21);
|
||||||
@ -1082,7 +1082,7 @@ public:
|
|||||||
|
|
||||||
#define INSN(NAME, opc) \
|
#define INSN(NAME, opc) \
|
||||||
void NAME() { \
|
void NAME() { \
|
||||||
branch_reg((Register)0b11111, opc); \
|
branch_reg(dummy_reg, opc); \
|
||||||
}
|
}
|
||||||
|
|
||||||
INSN(eret, 0b0100);
|
INSN(eret, 0b0100);
|
||||||
@ -1094,10 +1094,22 @@ public:
|
|||||||
enum operand_size { byte, halfword, word, xword };
|
enum operand_size { byte, halfword, word, xword };
|
||||||
|
|
||||||
void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
|
void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
|
||||||
Register Rn, enum operand_size sz, int op, int o0) {
|
Register Rn, enum operand_size sz, int op, bool ordered) {
|
||||||
starti;
|
starti;
|
||||||
f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
|
f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
|
||||||
rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
|
rf(Rs, 16), f(ordered, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void load_exclusive(Register dst, Register addr,
|
||||||
|
enum operand_size sz, bool ordered) {
|
||||||
|
load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
|
||||||
|
sz, 0b010, ordered);
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_exclusive(Register status, Register new_val, Register addr,
|
||||||
|
enum operand_size sz, bool ordered) {
|
||||||
|
load_store_exclusive(status, new_val, dummy_reg, addr,
|
||||||
|
sz, 0b000, ordered);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define INSN4(NAME, sz, op, o0) /* Four registers */ \
|
#define INSN4(NAME, sz, op, o0) /* Four registers */ \
|
||||||
@ -1109,19 +1121,19 @@ public:
|
|||||||
#define INSN3(NAME, sz, op, o0) /* Three registers */ \
|
#define INSN3(NAME, sz, op, o0) /* Three registers */ \
|
||||||
void NAME(Register Rs, Register Rt, Register Rn) { \
|
void NAME(Register Rs, Register Rt, Register Rn) { \
|
||||||
guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
|
guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
|
||||||
load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \
|
load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define INSN2(NAME, sz, op, o0) /* Two registers */ \
|
#define INSN2(NAME, sz, op, o0) /* Two registers */ \
|
||||||
void NAME(Register Rt, Register Rn) { \
|
void NAME(Register Rt, Register Rn) { \
|
||||||
load_store_exclusive((Register)0b11111, Rt, (Register)0b11111, \
|
load_store_exclusive(dummy_reg, Rt, dummy_reg, \
|
||||||
Rn, sz, op, o0); \
|
Rn, sz, op, o0); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
|
#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
|
||||||
void NAME(Register Rt1, Register Rt2, Register Rn) { \
|
void NAME(Register Rt1, Register Rt2, Register Rn) { \
|
||||||
guarantee(Rt1 != Rt2, "unpredictable instruction"); \
|
guarantee(Rt1 != Rt2, "unpredictable instruction"); \
|
||||||
load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \
|
load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// bytes
|
// bytes
|
||||||
@ -1169,6 +1181,46 @@ public:
|
|||||||
#undef INSN4
|
#undef INSN4
|
||||||
#undef INSN_FOO
|
#undef INSN_FOO
|
||||||
|
|
||||||
|
// 8.1 Compare and swap extensions
|
||||||
|
void lse_cas(Register Rs, Register Rt, Register Rn,
|
||||||
|
enum operand_size sz, bool a, bool r, bool not_pair) {
|
||||||
|
starti;
|
||||||
|
if (! not_pair) { // Pair
|
||||||
|
assert(sz == word || sz == xword, "invalid size");
|
||||||
|
/* The size bit is in bit 30, not 31 */
|
||||||
|
sz = (operand_size)(sz == word ? 0b00:0b01);
|
||||||
|
}
|
||||||
|
f(sz, 31, 30), f(0b001000, 29, 24), f(1, 23), f(a, 22), f(1, 21);
|
||||||
|
rf(Rs, 16), f(r, 15), f(0b11111, 14, 10), rf(Rn, 5), rf(Rt, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CAS
|
||||||
|
#define INSN(NAME, a, r) \
|
||||||
|
void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \
|
||||||
|
assert(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
|
||||||
|
lse_cas(Rs, Rt, Rn, sz, a, r, true); \
|
||||||
|
}
|
||||||
|
INSN(cas, false, false)
|
||||||
|
INSN(casa, true, false)
|
||||||
|
INSN(casl, false, true)
|
||||||
|
INSN(casal, true, true)
|
||||||
|
#undef INSN
|
||||||
|
|
||||||
|
// CASP
|
||||||
|
#define INSN(NAME, a, r) \
|
||||||
|
void NAME(operand_size sz, Register Rs, Register Rs1, \
|
||||||
|
Register Rt, Register Rt1, Register Rn) { \
|
||||||
|
assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 && \
|
||||||
|
Rs->successor() == Rs1 && Rt->successor() == Rt1 && \
|
||||||
|
Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers"); \
|
||||||
|
lse_cas(Rs, Rt, Rn, sz, a, r, false); \
|
||||||
|
}
|
||||||
|
INSN(casp, false, false)
|
||||||
|
INSN(caspa, true, false)
|
||||||
|
INSN(caspl, false, true)
|
||||||
|
INSN(caspal, true, true)
|
||||||
|
#undef INSN
|
||||||
|
|
||||||
// Load register (literal)
|
// Load register (literal)
|
||||||
#define INSN(NAME, opc, V) \
|
#define INSN(NAME, opc, V) \
|
||||||
void NAME(Register Rt, address dest) { \
|
void NAME(Register Rt, address dest) { \
|
||||||
|
@ -1556,38 +1556,54 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
|
void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
|
||||||
Label retry_load, nope;
|
if (UseLSE) {
|
||||||
// flush and load exclusive from the memory location
|
__ mov(rscratch1, cmpval);
|
||||||
// and fail if it is not what we expect
|
__ casal(Assembler::word, rscratch1, newval, addr);
|
||||||
__ bind(retry_load);
|
__ cmpw(rscratch1, cmpval);
|
||||||
__ ldaxrw(rscratch1, addr);
|
__ cset(rscratch1, Assembler::NE);
|
||||||
__ cmpw(rscratch1, cmpval);
|
} else {
|
||||||
__ cset(rscratch1, Assembler::NE);
|
Label retry_load, nope;
|
||||||
__ br(Assembler::NE, nope);
|
// flush and load exclusive from the memory location
|
||||||
// if we store+flush with no intervening write rscratch1 wil be zero
|
// and fail if it is not what we expect
|
||||||
__ stlxrw(rscratch1, newval, addr);
|
__ prfm(Address(addr), PSTL1STRM);
|
||||||
// retry so we only ever return after a load fails to compare
|
__ bind(retry_load);
|
||||||
// ensures we don't return a stale value after a failed write.
|
__ ldaxrw(rscratch1, addr);
|
||||||
__ cbnzw(rscratch1, retry_load);
|
__ cmpw(rscratch1, cmpval);
|
||||||
__ bind(nope);
|
__ cset(rscratch1, Assembler::NE);
|
||||||
|
__ br(Assembler::NE, nope);
|
||||||
|
// if we store+flush with no intervening write rscratch1 wil be zero
|
||||||
|
__ stlxrw(rscratch1, newval, addr);
|
||||||
|
// retry so we only ever return after a load fails to compare
|
||||||
|
// ensures we don't return a stale value after a failed write.
|
||||||
|
__ cbnzw(rscratch1, retry_load);
|
||||||
|
__ bind(nope);
|
||||||
|
}
|
||||||
__ membar(__ AnyAny);
|
__ membar(__ AnyAny);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
|
void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
|
||||||
Label retry_load, nope;
|
if (UseLSE) {
|
||||||
// flush and load exclusive from the memory location
|
__ mov(rscratch1, cmpval);
|
||||||
// and fail if it is not what we expect
|
__ casal(Assembler::xword, rscratch1, newval, addr);
|
||||||
__ bind(retry_load);
|
__ cmp(rscratch1, cmpval);
|
||||||
__ ldaxr(rscratch1, addr);
|
__ cset(rscratch1, Assembler::NE);
|
||||||
__ cmp(rscratch1, cmpval);
|
} else {
|
||||||
__ cset(rscratch1, Assembler::NE);
|
Label retry_load, nope;
|
||||||
__ br(Assembler::NE, nope);
|
// flush and load exclusive from the memory location
|
||||||
// if we store+flush with no intervening write rscratch1 wil be zero
|
// and fail if it is not what we expect
|
||||||
__ stlxr(rscratch1, newval, addr);
|
__ prfm(Address(addr), PSTL1STRM);
|
||||||
// retry so we only ever return after a load fails to compare
|
__ bind(retry_load);
|
||||||
// ensures we don't return a stale value after a failed write.
|
__ ldaxr(rscratch1, addr);
|
||||||
__ cbnz(rscratch1, retry_load);
|
__ cmp(rscratch1, cmpval);
|
||||||
__ bind(nope);
|
__ cset(rscratch1, Assembler::NE);
|
||||||
|
__ br(Assembler::NE, nope);
|
||||||
|
// if we store+flush with no intervening write rscratch1 wil be zero
|
||||||
|
__ stlxr(rscratch1, newval, addr);
|
||||||
|
// retry so we only ever return after a load fails to compare
|
||||||
|
// ensures we don't return a stale value after a failed write.
|
||||||
|
__ cbnz(rscratch1, retry_load);
|
||||||
|
__ bind(nope);
|
||||||
|
}
|
||||||
__ membar(__ AnyAny);
|
__ membar(__ AnyAny);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3156,6 +3172,7 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
|
|||||||
}
|
}
|
||||||
Label again;
|
Label again;
|
||||||
__ lea(tmp, addr);
|
__ lea(tmp, addr);
|
||||||
|
__ prfm(Address(tmp), PSTL1STRM);
|
||||||
__ bind(again);
|
__ bind(again);
|
||||||
(_masm->*lda)(dst, tmp);
|
(_masm->*lda)(dst, tmp);
|
||||||
(_masm->*add)(rscratch1, dst, inc);
|
(_masm->*add)(rscratch1, dst, inc);
|
||||||
@ -3175,6 +3192,7 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
|
|||||||
assert_different_registers(obj, addr.base(), tmp, rscratch2, dst);
|
assert_different_registers(obj, addr.base(), tmp, rscratch2, dst);
|
||||||
Label again;
|
Label again;
|
||||||
__ lea(tmp, addr);
|
__ lea(tmp, addr);
|
||||||
|
__ prfm(Address(tmp), PSTL1STRM);
|
||||||
__ bind(again);
|
__ bind(again);
|
||||||
(_masm->*lda)(dst, tmp);
|
(_masm->*lda)(dst, tmp);
|
||||||
(_masm->*stl)(rscratch2, obj, tmp);
|
(_masm->*stl)(rscratch2, obj, tmp);
|
||||||
|
@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, false);
|
|||||||
// avoid biased locking while we are bootstrapping the aarch64 build
|
// avoid biased locking while we are bootstrapping the aarch64 build
|
||||||
define_pd_global(bool, UseBiasedLocking, false);
|
define_pd_global(bool, UseBiasedLocking, false);
|
||||||
|
|
||||||
|
define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
|
||||||
|
|
||||||
#if defined(COMPILER1) || defined(COMPILER2)
|
#if defined(COMPILER1) || defined(COMPILER2)
|
||||||
define_pd_global(intx, InlineSmallCode, 1000);
|
define_pd_global(intx, InlineSmallCode, 1000);
|
||||||
#endif
|
#endif
|
||||||
@ -101,9 +103,13 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
|||||||
\
|
\
|
||||||
product(bool, UseCRC32, false, \
|
product(bool, UseCRC32, false, \
|
||||||
"Use CRC32 instructions for CRC32 computation") \
|
"Use CRC32 instructions for CRC32 computation") \
|
||||||
|
\
|
||||||
|
product(bool, UseLSE, false, \
|
||||||
|
"Use LSE instructions") \
|
||||||
|
|
||||||
// Don't attempt to use Neon on builtin sim until builtin sim supports it
|
// Don't attempt to use Neon on builtin sim until builtin sim supports it
|
||||||
#define UseCRC32 false
|
#define UseCRC32 false
|
||||||
|
#define UseSIMDForMemoryOps false
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define UseBuiltinSim false
|
#define UseBuiltinSim false
|
||||||
@ -121,6 +127,10 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
|||||||
"Use Neon for CRC32 computation") \
|
"Use Neon for CRC32 computation") \
|
||||||
product(bool, UseCRC32, false, \
|
product(bool, UseCRC32, false, \
|
||||||
"Use CRC32 instructions for CRC32 computation") \
|
"Use CRC32 instructions for CRC32 computation") \
|
||||||
|
product(bool, UseSIMDForMemoryOps, false, \
|
||||||
|
"Use SIMD instructions in generated memory move code") \
|
||||||
|
product(bool, UseLSE, false, \
|
||||||
|
"Use LSE instructions") \
|
||||||
product(bool, TraceTraps, false, "Trace all traps the signal handler")
|
product(bool, TraceTraps, false, "Trace all traps the signal handler")
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1638,6 +1638,7 @@ Address MacroAssembler::form_address(Register Rd, Register base, long byte_offse
|
|||||||
|
|
||||||
void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
|
void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
|
||||||
Label retry_load;
|
Label retry_load;
|
||||||
|
prfm(Address(counter_addr), PSTL1STRM);
|
||||||
bind(retry_load);
|
bind(retry_load);
|
||||||
// flush and load exclusive from the memory location
|
// flush and load exclusive from the memory location
|
||||||
ldxrw(tmp, counter_addr);
|
ldxrw(tmp, counter_addr);
|
||||||
@ -2070,25 +2071,32 @@ void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Reg
|
|||||||
// oldv holds comparison value
|
// oldv holds comparison value
|
||||||
// newv holds value to write in exchange
|
// newv holds value to write in exchange
|
||||||
// addr identifies memory word to compare against/update
|
// addr identifies memory word to compare against/update
|
||||||
// tmp returns 0/1 for success/failure
|
if (UseLSE) {
|
||||||
Label retry_load, nope;
|
mov(tmp, oldv);
|
||||||
|
casal(Assembler::xword, oldv, newv, addr);
|
||||||
bind(retry_load);
|
cmp(tmp, oldv);
|
||||||
// flush and load exclusive from the memory location
|
br(Assembler::EQ, succeed);
|
||||||
// and fail if it is not what we expect
|
membar(AnyAny);
|
||||||
ldaxr(tmp, addr);
|
} else {
|
||||||
cmp(tmp, oldv);
|
Label retry_load, nope;
|
||||||
br(Assembler::NE, nope);
|
prfm(Address(addr), PSTL1STRM);
|
||||||
// if we store+flush with no intervening write tmp wil be zero
|
bind(retry_load);
|
||||||
stlxr(tmp, newv, addr);
|
// flush and load exclusive from the memory location
|
||||||
cbzw(tmp, succeed);
|
// and fail if it is not what we expect
|
||||||
// retry so we only ever return after a load fails to compare
|
ldaxr(tmp, addr);
|
||||||
// ensures we don't return a stale value after a failed write.
|
cmp(tmp, oldv);
|
||||||
b(retry_load);
|
br(Assembler::NE, nope);
|
||||||
// if the memory word differs we return it in oldv and signal a fail
|
// if we store+flush with no intervening write tmp wil be zero
|
||||||
bind(nope);
|
stlxr(tmp, newv, addr);
|
||||||
membar(AnyAny);
|
cbzw(tmp, succeed);
|
||||||
mov(oldv, tmp);
|
// retry so we only ever return after a load fails to compare
|
||||||
|
// ensures we don't return a stale value after a failed write.
|
||||||
|
b(retry_load);
|
||||||
|
// if the memory word differs we return it in oldv and signal a fail
|
||||||
|
bind(nope);
|
||||||
|
membar(AnyAny);
|
||||||
|
mov(oldv, tmp);
|
||||||
|
}
|
||||||
if (fail)
|
if (fail)
|
||||||
b(*fail);
|
b(*fail);
|
||||||
}
|
}
|
||||||
@ -2099,28 +2107,64 @@ void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Regis
|
|||||||
// newv holds value to write in exchange
|
// newv holds value to write in exchange
|
||||||
// addr identifies memory word to compare against/update
|
// addr identifies memory word to compare against/update
|
||||||
// tmp returns 0/1 for success/failure
|
// tmp returns 0/1 for success/failure
|
||||||
Label retry_load, nope;
|
if (UseLSE) {
|
||||||
|
mov(tmp, oldv);
|
||||||
bind(retry_load);
|
casal(Assembler::word, oldv, newv, addr);
|
||||||
// flush and load exclusive from the memory location
|
cmp(tmp, oldv);
|
||||||
// and fail if it is not what we expect
|
br(Assembler::EQ, succeed);
|
||||||
ldaxrw(tmp, addr);
|
membar(AnyAny);
|
||||||
cmp(tmp, oldv);
|
} else {
|
||||||
br(Assembler::NE, nope);
|
Label retry_load, nope;
|
||||||
// if we store+flush with no intervening write tmp wil be zero
|
prfm(Address(addr), PSTL1STRM);
|
||||||
stlxrw(tmp, newv, addr);
|
bind(retry_load);
|
||||||
cbzw(tmp, succeed);
|
// flush and load exclusive from the memory location
|
||||||
// retry so we only ever return after a load fails to compare
|
// and fail if it is not what we expect
|
||||||
// ensures we don't return a stale value after a failed write.
|
ldaxrw(tmp, addr);
|
||||||
b(retry_load);
|
cmp(tmp, oldv);
|
||||||
// if the memory word differs we return it in oldv and signal a fail
|
br(Assembler::NE, nope);
|
||||||
bind(nope);
|
// if we store+flush with no intervening write tmp wil be zero
|
||||||
membar(AnyAny);
|
stlxrw(tmp, newv, addr);
|
||||||
mov(oldv, tmp);
|
cbzw(tmp, succeed);
|
||||||
|
// retry so we only ever return after a load fails to compare
|
||||||
|
// ensures we don't return a stale value after a failed write.
|
||||||
|
b(retry_load);
|
||||||
|
// if the memory word differs we return it in oldv and signal a fail
|
||||||
|
bind(nope);
|
||||||
|
membar(AnyAny);
|
||||||
|
mov(oldv, tmp);
|
||||||
|
}
|
||||||
if (fail)
|
if (fail)
|
||||||
b(*fail);
|
b(*fail);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A generic CAS; success or failure is in the EQ flag.
|
||||||
|
void MacroAssembler::cmpxchg(Register addr, Register expected,
|
||||||
|
Register new_val,
|
||||||
|
enum operand_size size,
|
||||||
|
bool acquire, bool release,
|
||||||
|
Register tmp) {
|
||||||
|
if (UseLSE) {
|
||||||
|
mov(tmp, expected);
|
||||||
|
lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true);
|
||||||
|
cmp(tmp, expected);
|
||||||
|
} else {
|
||||||
|
BLOCK_COMMENT("cmpxchg {");
|
||||||
|
Label retry_load, done;
|
||||||
|
prfm(Address(addr), PSTL1STRM);
|
||||||
|
bind(retry_load);
|
||||||
|
load_exclusive(tmp, addr, size, acquire);
|
||||||
|
if (size == xword)
|
||||||
|
cmp(tmp, expected);
|
||||||
|
else
|
||||||
|
cmpw(tmp, expected);
|
||||||
|
br(Assembler::NE, done);
|
||||||
|
store_exclusive(tmp, new_val, addr, size, release);
|
||||||
|
cbnzw(tmp, retry_load);
|
||||||
|
bind(done);
|
||||||
|
BLOCK_COMMENT("} cmpxchg");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static bool different(Register a, RegisterOrConstant b, Register c) {
|
static bool different(Register a, RegisterOrConstant b, Register c) {
|
||||||
if (b.is_constant())
|
if (b.is_constant())
|
||||||
return a != c;
|
return a != c;
|
||||||
@ -2135,6 +2179,7 @@ void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Registe
|
|||||||
result = different(prev, incr, addr) ? prev : rscratch2; \
|
result = different(prev, incr, addr) ? prev : rscratch2; \
|
||||||
\
|
\
|
||||||
Label retry_load; \
|
Label retry_load; \
|
||||||
|
prfm(Address(addr), PSTL1STRM); \
|
||||||
bind(retry_load); \
|
bind(retry_load); \
|
||||||
LDXR(result, addr); \
|
LDXR(result, addr); \
|
||||||
OP(rscratch1, result, incr); \
|
OP(rscratch1, result, incr); \
|
||||||
@ -2157,6 +2202,7 @@ void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {
|
|||||||
result = different(prev, newv, addr) ? prev : rscratch2; \
|
result = different(prev, newv, addr) ? prev : rscratch2; \
|
||||||
\
|
\
|
||||||
Label retry_load; \
|
Label retry_load; \
|
||||||
|
prfm(Address(addr), PSTL1STRM); \
|
||||||
bind(retry_load); \
|
bind(retry_load); \
|
||||||
LDXR(result, addr); \
|
LDXR(result, addr); \
|
||||||
STXR(rscratch1, newv, addr); \
|
STXR(rscratch1, newv, addr); \
|
||||||
|
@ -971,21 +971,10 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// A generic CAS; success or failure is in the EQ flag.
|
// A generic CAS; success or failure is in the EQ flag.
|
||||||
template <typename T1, typename T2>
|
|
||||||
void cmpxchg(Register addr, Register expected, Register new_val,
|
void cmpxchg(Register addr, Register expected, Register new_val,
|
||||||
T1 load_insn,
|
enum operand_size size,
|
||||||
void (MacroAssembler::*cmp_insn)(Register, Register),
|
bool acquire, bool release,
|
||||||
T2 store_insn,
|
Register tmp = rscratch1);
|
||||||
Register tmp = rscratch1) {
|
|
||||||
Label retry_load, done;
|
|
||||||
bind(retry_load);
|
|
||||||
(this->*load_insn)(tmp, addr);
|
|
||||||
(this->*cmp_insn)(tmp, expected);
|
|
||||||
br(Assembler::NE, done);
|
|
||||||
(this->*store_insn)(tmp, new_val, addr);
|
|
||||||
cbnzw(tmp, retry_load);
|
|
||||||
bind(done);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calls
|
// Calls
|
||||||
|
|
||||||
|
@ -107,6 +107,9 @@ CONSTANT_REGISTER_DECLARATION(Register, r31_sp, (31));
|
|||||||
CONSTANT_REGISTER_DECLARATION(Register, zr, (32));
|
CONSTANT_REGISTER_DECLARATION(Register, zr, (32));
|
||||||
CONSTANT_REGISTER_DECLARATION(Register, sp, (33));
|
CONSTANT_REGISTER_DECLARATION(Register, sp, (33));
|
||||||
|
|
||||||
|
// Used as a filler in instructions where a register field is unused.
|
||||||
|
const Register dummy_reg = r31_sp;
|
||||||
|
|
||||||
// Use FloatRegister as shortcut
|
// Use FloatRegister as shortcut
|
||||||
class FloatRegisterImpl;
|
class FloatRegisterImpl;
|
||||||
typedef FloatRegisterImpl* FloatRegister;
|
typedef FloatRegisterImpl* FloatRegister;
|
||||||
|
@ -729,7 +729,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
//
|
//
|
||||||
// count is a count of words.
|
// count is a count of words.
|
||||||
//
|
//
|
||||||
// Precondition: count >= 2
|
// Precondition: count >= 8
|
||||||
//
|
//
|
||||||
// Postconditions:
|
// Postconditions:
|
||||||
//
|
//
|
||||||
@ -741,6 +741,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
void generate_copy_longs(Label &start, Register s, Register d, Register count,
|
void generate_copy_longs(Label &start, Register s, Register d, Register count,
|
||||||
copy_direction direction) {
|
copy_direction direction) {
|
||||||
int unit = wordSize * direction;
|
int unit = wordSize * direction;
|
||||||
|
int bias = (UseSIMDForMemoryOps ? 4:2) * wordSize;
|
||||||
|
|
||||||
int offset;
|
int offset;
|
||||||
const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6,
|
const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6,
|
||||||
@ -750,7 +751,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7);
|
assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7);
|
||||||
assert_different_registers(s, d, count, rscratch1);
|
assert_different_registers(s, d, count, rscratch1);
|
||||||
|
|
||||||
Label again, large, small;
|
Label again, drain;
|
||||||
const char *stub_name;
|
const char *stub_name;
|
||||||
if (direction == copy_forwards)
|
if (direction == copy_forwards)
|
||||||
stub_name = "foward_copy_longs";
|
stub_name = "foward_copy_longs";
|
||||||
@ -759,57 +760,35 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||||
__ align(CodeEntryAlignment);
|
__ align(CodeEntryAlignment);
|
||||||
__ bind(start);
|
__ bind(start);
|
||||||
__ cmp(count, 8);
|
|
||||||
__ br(Assembler::LO, small);
|
|
||||||
if (direction == copy_forwards) {
|
if (direction == copy_forwards) {
|
||||||
__ sub(s, s, 2 * wordSize);
|
__ sub(s, s, bias);
|
||||||
__ sub(d, d, 2 * wordSize);
|
__ sub(d, d, bias);
|
||||||
}
|
|
||||||
__ subs(count, count, 16);
|
|
||||||
__ br(Assembler::GE, large);
|
|
||||||
|
|
||||||
// 8 <= count < 16 words. Copy 8.
|
|
||||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
|
||||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
|
||||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
|
||||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
|
||||||
|
|
||||||
__ stp(t0, t1, Address(d, 2 * unit));
|
|
||||||
__ stp(t2, t3, Address(d, 4 * unit));
|
|
||||||
__ stp(t4, t5, Address(d, 6 * unit));
|
|
||||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
|
||||||
|
|
||||||
if (direction == copy_forwards) {
|
|
||||||
__ add(s, s, 2 * wordSize);
|
|
||||||
__ add(d, d, 2 * wordSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ASSERT
|
||||||
|
// Make sure we are never given < 8 words
|
||||||
{
|
{
|
||||||
Label L1, L2;
|
Label L;
|
||||||
__ bind(small);
|
__ cmp(count, 8);
|
||||||
__ tbz(count, exact_log2(4), L1);
|
__ br(Assembler::GE, L);
|
||||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
__ stop("genrate_copy_longs called with < 8 words");
|
||||||
__ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
__ bind(L);
|
||||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
|
||||||
__ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
|
||||||
__ bind(L1);
|
|
||||||
|
|
||||||
__ tbz(count, 1, L2);
|
|
||||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
|
||||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
|
||||||
__ bind(L2);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
__ ret(lr);
|
|
||||||
|
|
||||||
__ align(CodeEntryAlignment);
|
|
||||||
__ bind(large);
|
|
||||||
|
|
||||||
// Fill 8 registers
|
// Fill 8 registers
|
||||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
if (UseSIMDForMemoryOps) {
|
||||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
__ ldpq(v0, v1, Address(s, 4 * unit));
|
||||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
__ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
|
||||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
} else {
|
||||||
|
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||||
|
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||||
|
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||||
|
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__ subs(count, count, 16);
|
||||||
|
__ br(Assembler::LO, drain);
|
||||||
|
|
||||||
int prefetch = PrefetchCopyIntervalInBytes;
|
int prefetch = PrefetchCopyIntervalInBytes;
|
||||||
bool use_stride = false;
|
bool use_stride = false;
|
||||||
@ -824,38 +803,56 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
if (PrefetchCopyIntervalInBytes > 0)
|
if (PrefetchCopyIntervalInBytes > 0)
|
||||||
__ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
|
__ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
|
||||||
|
|
||||||
__ stp(t0, t1, Address(d, 2 * unit));
|
if (UseSIMDForMemoryOps) {
|
||||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
__ stpq(v0, v1, Address(d, 4 * unit));
|
||||||
__ stp(t2, t3, Address(d, 4 * unit));
|
__ ldpq(v0, v1, Address(s, 4 * unit));
|
||||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
__ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
|
||||||
__ stp(t4, t5, Address(d, 6 * unit));
|
__ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
|
||||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
} else {
|
||||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
__ stp(t0, t1, Address(d, 2 * unit));
|
||||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||||
|
__ stp(t2, t3, Address(d, 4 * unit));
|
||||||
|
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||||
|
__ stp(t4, t5, Address(d, 6 * unit));
|
||||||
|
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||||
|
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||||
|
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||||
|
}
|
||||||
|
|
||||||
__ subs(count, count, 8);
|
__ subs(count, count, 8);
|
||||||
__ br(Assembler::HS, again);
|
__ br(Assembler::HS, again);
|
||||||
|
|
||||||
// Drain
|
// Drain
|
||||||
__ stp(t0, t1, Address(d, 2 * unit));
|
__ bind(drain);
|
||||||
__ stp(t2, t3, Address(d, 4 * unit));
|
if (UseSIMDForMemoryOps) {
|
||||||
__ stp(t4, t5, Address(d, 6 * unit));
|
__ stpq(v0, v1, Address(d, 4 * unit));
|
||||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
__ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
|
||||||
|
} else {
|
||||||
if (direction == copy_forwards) {
|
__ stp(t0, t1, Address(d, 2 * unit));
|
||||||
__ add(s, s, 2 * wordSize);
|
__ stp(t2, t3, Address(d, 4 * unit));
|
||||||
__ add(d, d, 2 * wordSize);
|
__ stp(t4, t5, Address(d, 6 * unit));
|
||||||
|
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
Label L1, L2;
|
Label L1, L2;
|
||||||
__ tbz(count, exact_log2(4), L1);
|
__ tbz(count, exact_log2(4), L1);
|
||||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
if (UseSIMDForMemoryOps) {
|
||||||
__ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
__ ldpq(v0, v1, Address(__ pre(s, 4 * unit)));
|
||||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
__ stpq(v0, v1, Address(__ pre(d, 4 * unit)));
|
||||||
__ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
} else {
|
||||||
|
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||||
|
__ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
|
||||||
|
__ stp(t0, t1, Address(d, 2 * unit));
|
||||||
|
__ stp(t2, t3, Address(__ pre(d, 4 * unit)));
|
||||||
|
}
|
||||||
__ bind(L1);
|
__ bind(L1);
|
||||||
|
|
||||||
|
if (direction == copy_forwards) {
|
||||||
|
__ add(s, s, 2 * wordSize);
|
||||||
|
__ add(d, d, 2 * wordSize);
|
||||||
|
}
|
||||||
|
|
||||||
__ tbz(count, 1, L2);
|
__ tbz(count, 1, L2);
|
||||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
||||||
@ -931,16 +928,135 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
int granularity = uabs(step);
|
int granularity = uabs(step);
|
||||||
const Register t0 = r3, t1 = r4;
|
const Register t0 = r3, t1 = r4;
|
||||||
|
|
||||||
|
// <= 96 bytes do inline. Direction doesn't matter because we always
|
||||||
|
// load all the data before writing anything
|
||||||
|
Label copy4, copy8, copy16, copy32, copy80, copy128, copy_big, finish;
|
||||||
|
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
|
||||||
|
const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
|
||||||
|
const Register send = r17, dend = r18;
|
||||||
|
|
||||||
|
if (PrefetchCopyIntervalInBytes > 0)
|
||||||
|
__ prfm(Address(s, 0), PLDL1KEEP);
|
||||||
|
__ cmp(count, (UseSIMDForMemoryOps ? 96:80)/granularity);
|
||||||
|
__ br(Assembler::HI, copy_big);
|
||||||
|
|
||||||
|
__ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
|
||||||
|
__ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
|
||||||
|
|
||||||
|
__ cmp(count, 16/granularity);
|
||||||
|
__ br(Assembler::LS, copy16);
|
||||||
|
|
||||||
|
__ cmp(count, 64/granularity);
|
||||||
|
__ br(Assembler::HI, copy80);
|
||||||
|
|
||||||
|
__ cmp(count, 32/granularity);
|
||||||
|
__ br(Assembler::LS, copy32);
|
||||||
|
|
||||||
|
// 33..64 bytes
|
||||||
|
if (UseSIMDForMemoryOps) {
|
||||||
|
__ ldpq(v0, v1, Address(s, 0));
|
||||||
|
__ ldpq(v2, v3, Address(send, -32));
|
||||||
|
__ stpq(v0, v1, Address(d, 0));
|
||||||
|
__ stpq(v2, v3, Address(dend, -32));
|
||||||
|
} else {
|
||||||
|
__ ldp(t0, t1, Address(s, 0));
|
||||||
|
__ ldp(t2, t3, Address(s, 16));
|
||||||
|
__ ldp(t4, t5, Address(send, -32));
|
||||||
|
__ ldp(t6, t7, Address(send, -16));
|
||||||
|
|
||||||
|
__ stp(t0, t1, Address(d, 0));
|
||||||
|
__ stp(t2, t3, Address(d, 16));
|
||||||
|
__ stp(t4, t5, Address(dend, -32));
|
||||||
|
__ stp(t6, t7, Address(dend, -16));
|
||||||
|
}
|
||||||
|
__ b(finish);
|
||||||
|
|
||||||
|
// 17..32 bytes
|
||||||
|
__ bind(copy32);
|
||||||
|
__ ldp(t0, t1, Address(s, 0));
|
||||||
|
__ ldp(t2, t3, Address(send, -16));
|
||||||
|
__ stp(t0, t1, Address(d, 0));
|
||||||
|
__ stp(t2, t3, Address(dend, -16));
|
||||||
|
__ b(finish);
|
||||||
|
|
||||||
|
// 65..80/96 bytes
|
||||||
|
// (96 bytes if SIMD because we do 32 byes per instruction)
|
||||||
|
__ bind(copy80);
|
||||||
|
if (UseSIMDForMemoryOps) {
|
||||||
|
__ ldpq(v0, v1, Address(s, 0));
|
||||||
|
__ ldpq(v2, v3, Address(s, 32));
|
||||||
|
__ ldpq(v4, v5, Address(send, -32));
|
||||||
|
__ stpq(v0, v1, Address(d, 0));
|
||||||
|
__ stpq(v2, v3, Address(d, 32));
|
||||||
|
__ stpq(v4, v5, Address(dend, -32));
|
||||||
|
} else {
|
||||||
|
__ ldp(t0, t1, Address(s, 0));
|
||||||
|
__ ldp(t2, t3, Address(s, 16));
|
||||||
|
__ ldp(t4, t5, Address(s, 32));
|
||||||
|
__ ldp(t6, t7, Address(s, 48));
|
||||||
|
__ ldp(t8, t9, Address(send, -16));
|
||||||
|
|
||||||
|
__ stp(t0, t1, Address(d, 0));
|
||||||
|
__ stp(t2, t3, Address(d, 16));
|
||||||
|
__ stp(t4, t5, Address(d, 32));
|
||||||
|
__ stp(t6, t7, Address(d, 48));
|
||||||
|
__ stp(t8, t9, Address(dend, -16));
|
||||||
|
}
|
||||||
|
__ b(finish);
|
||||||
|
|
||||||
|
// 0..16 bytes
|
||||||
|
__ bind(copy16);
|
||||||
|
__ cmp(count, 8/granularity);
|
||||||
|
__ br(Assembler::LO, copy8);
|
||||||
|
|
||||||
|
// 8..16 bytes
|
||||||
|
__ ldr(t0, Address(s, 0));
|
||||||
|
__ ldr(t1, Address(send, -8));
|
||||||
|
__ str(t0, Address(d, 0));
|
||||||
|
__ str(t1, Address(dend, -8));
|
||||||
|
__ b(finish);
|
||||||
|
|
||||||
|
if (granularity < 8) {
|
||||||
|
// 4..7 bytes
|
||||||
|
__ bind(copy8);
|
||||||
|
__ tbz(count, 2 - exact_log2(granularity), copy4);
|
||||||
|
__ ldrw(t0, Address(s, 0));
|
||||||
|
__ ldrw(t1, Address(send, -4));
|
||||||
|
__ strw(t0, Address(d, 0));
|
||||||
|
__ strw(t1, Address(dend, -4));
|
||||||
|
__ b(finish);
|
||||||
|
if (granularity < 4) {
|
||||||
|
// 0..3 bytes
|
||||||
|
__ bind(copy4);
|
||||||
|
__ cbz(count, finish); // get rid of 0 case
|
||||||
|
if (granularity == 2) {
|
||||||
|
__ ldrh(t0, Address(s, 0));
|
||||||
|
__ strh(t0, Address(d, 0));
|
||||||
|
} else { // granularity == 1
|
||||||
|
// Now 1..3 bytes. Handle the 1 and 2 byte case by copying
|
||||||
|
// the first and last byte.
|
||||||
|
// Handle the 3 byte case by loading and storing base + count/2
|
||||||
|
// (count == 1 (s+0)->(d+0), count == 2,3 (s+1) -> (d+1))
|
||||||
|
// This does means in the 1 byte case we load/store the same
|
||||||
|
// byte 3 times.
|
||||||
|
__ lsr(count, count, 1);
|
||||||
|
__ ldrb(t0, Address(s, 0));
|
||||||
|
__ ldrb(t1, Address(send, -1));
|
||||||
|
__ ldrb(t2, Address(s, count));
|
||||||
|
__ strb(t0, Address(d, 0));
|
||||||
|
__ strb(t1, Address(dend, -1));
|
||||||
|
__ strb(t2, Address(d, count));
|
||||||
|
}
|
||||||
|
__ b(finish);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__ bind(copy_big);
|
||||||
if (is_backwards) {
|
if (is_backwards) {
|
||||||
__ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
|
__ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
|
||||||
__ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
|
__ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
|
||||||
}
|
}
|
||||||
|
|
||||||
Label tail;
|
|
||||||
|
|
||||||
__ cmp(count, 16/granularity);
|
|
||||||
__ br(Assembler::LO, tail);
|
|
||||||
|
|
||||||
// Now we've got the small case out of the way we can align the
|
// Now we've got the small case out of the way we can align the
|
||||||
// source address on a 2-word boundary.
|
// source address on a 2-word boundary.
|
||||||
|
|
||||||
@ -986,8 +1102,6 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__ cmp(count, 16/granularity);
|
|
||||||
__ br(Assembler::LT, tail);
|
|
||||||
__ bind(aligned);
|
__ bind(aligned);
|
||||||
|
|
||||||
// s is now 2-word-aligned.
|
// s is now 2-word-aligned.
|
||||||
@ -1001,9 +1115,11 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
__ bl(copy_b);
|
__ bl(copy_b);
|
||||||
|
|
||||||
// And the tail.
|
// And the tail.
|
||||||
|
|
||||||
__ bind(tail);
|
|
||||||
copy_memory_small(s, d, count, tmp, step);
|
copy_memory_small(s, d, count, tmp, step);
|
||||||
|
|
||||||
|
if (granularity >= 8) __ bind(copy8);
|
||||||
|
if (granularity >= 4) __ bind(copy4);
|
||||||
|
__ bind(finish);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1984,6 +1984,7 @@ void TemplateInterpreterGenerator::count_bytecode() {
|
|||||||
__ push(rscratch3);
|
__ push(rscratch3);
|
||||||
Label L;
|
Label L;
|
||||||
__ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
|
__ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
|
||||||
|
__ prfm(Address(rscratch2), PSTL1STRM);
|
||||||
__ bind(L);
|
__ bind(L);
|
||||||
__ ldxr(rscratch1, rscratch2);
|
__ ldxr(rscratch1, rscratch2);
|
||||||
__ add(rscratch1, rscratch1, 1);
|
__ add(rscratch1, rscratch1, 1);
|
||||||
|
@ -61,6 +61,10 @@
|
|||||||
#define HWCAP_CRC32 (1<<7)
|
#define HWCAP_CRC32 (1<<7)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef HWCAP_ATOMICS
|
||||||
|
#define HWCAP_ATOMICS (1<<8)
|
||||||
|
#endif
|
||||||
|
|
||||||
int VM_Version::_cpu;
|
int VM_Version::_cpu;
|
||||||
int VM_Version::_model;
|
int VM_Version::_model;
|
||||||
int VM_Version::_model2;
|
int VM_Version::_model2;
|
||||||
@ -172,6 +176,7 @@ void VM_Version::get_processor_features() {
|
|||||||
if (auxv & HWCAP_AES) strcat(buf, ", aes");
|
if (auxv & HWCAP_AES) strcat(buf, ", aes");
|
||||||
if (auxv & HWCAP_SHA1) strcat(buf, ", sha1");
|
if (auxv & HWCAP_SHA1) strcat(buf, ", sha1");
|
||||||
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
|
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
|
||||||
|
if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
|
||||||
|
|
||||||
_features_string = os::strdup(buf);
|
_features_string = os::strdup(buf);
|
||||||
|
|
||||||
@ -191,6 +196,15 @@ void VM_Version::get_processor_features() {
|
|||||||
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
|
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (auxv & HWCAP_ATOMICS) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseLSE))
|
||||||
|
FLAG_SET_DEFAULT(UseLSE, true);
|
||||||
|
} else {
|
||||||
|
if (UseLSE) {
|
||||||
|
warning("UseLSE specified, but not supported on this CPU");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (auxv & HWCAP_AES) {
|
if (auxv & HWCAP_AES) {
|
||||||
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
|
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
|
||||||
UseAESIntrinsics =
|
UseAESIntrinsics =
|
||||||
|
@ -47,7 +47,7 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
|
|||||||
// The expected size in bytes of a cache line, used to pad data structures.
|
// The expected size in bytes of a cache line, used to pad data structures.
|
||||||
#define DEFAULT_CACHE_LINE_SIZE 128
|
#define DEFAULT_CACHE_LINE_SIZE 128
|
||||||
|
|
||||||
#if defined(COMPILER2) && defined(AIX)
|
#if defined(COMPILER2) && (defined(AIX) || defined(linux))
|
||||||
// Include Transactional Memory lock eliding optimization
|
// Include Transactional Memory lock eliding optimization
|
||||||
#define INCLUDE_RTM_OPT 1
|
#define INCLUDE_RTM_OPT 1
|
||||||
#endif
|
#endif
|
||||||
|
@ -76,6 +76,8 @@ define_pd_global(uintx, TypeProfileLevel, 111);
|
|||||||
|
|
||||||
define_pd_global(bool, CompactStrings, true);
|
define_pd_global(bool, CompactStrings, true);
|
||||||
|
|
||||||
|
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||||
|
|
||||||
// Platform dependent flag handling: flags only defined on this platform.
|
// Platform dependent flag handling: flags only defined on this platform.
|
||||||
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
||||||
\
|
\
|
||||||
|
@ -2137,8 +2137,6 @@ MachTypeNode *Matcher::make_decode_node() {
|
|||||||
return decode;
|
return decode;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
// Threshold size for cleararray.
|
|
||||||
const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
|
||||||
|
|
||||||
// false => size gets scaled to BytesPerLong, ok.
|
// false => size gets scaled to BytesPerLong, ok.
|
||||||
const bool Matcher::init_array_count_is_in_bytes = false;
|
const bool Matcher::init_array_count_is_in_bytes = false;
|
||||||
|
@ -255,7 +255,16 @@ void VM_Version::initialize() {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef linux
|
#ifdef linux
|
||||||
// TODO: check kernel version (we currently have too old versions only)
|
// At least Linux kernel 4.2, as the problematic behavior of syscalls
|
||||||
|
// being called in the middle of a transaction has been addressed.
|
||||||
|
// Please, refer to commit b4b56f9ecab40f3b4ef53e130c9f6663be491894
|
||||||
|
// in Linux kernel source tree: https://goo.gl/Kc5i7A
|
||||||
|
if (os::Linux::os_version_is_known()) {
|
||||||
|
if (os::Linux::os_version() >= 0x040200)
|
||||||
|
os_too_old = false;
|
||||||
|
} else {
|
||||||
|
vm_exit_during_initialization("RTM can not be enabled: kernel version is unknown.");
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
if (os_too_old) {
|
if (os_too_old) {
|
||||||
vm_exit_during_initialization("RTM is not supported on this OS version.");
|
vm_exit_during_initialization("RTM is not supported on this OS version.");
|
||||||
|
@ -90,6 +90,8 @@ define_pd_global(uintx, TypeProfileLevel, 111);
|
|||||||
|
|
||||||
define_pd_global(bool, CompactStrings, true);
|
define_pd_global(bool, CompactStrings, true);
|
||||||
|
|
||||||
|
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||||
|
|
||||||
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
||||||
\
|
\
|
||||||
product(intx, UseVIS, 99, \
|
product(intx, UseVIS, 99, \
|
||||||
|
@ -1980,9 +1980,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
|
|||||||
// No scaling for the parameter the ClearArray node.
|
// No scaling for the parameter the ClearArray node.
|
||||||
const bool Matcher::init_array_count_is_in_bytes = true;
|
const bool Matcher::init_array_count_is_in_bytes = true;
|
||||||
|
|
||||||
// Threshold size for cleararray.
|
|
||||||
const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
|
||||||
|
|
||||||
// No additional cost for CMOVL.
|
// No additional cost for CMOVL.
|
||||||
const int Matcher::long_cmove_cost() { return 0; }
|
const int Matcher::long_cmove_cost() { return 0; }
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -777,6 +777,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
|||||||
case 0x6E: // movd
|
case 0x6E: // movd
|
||||||
case 0x7E: // movd
|
case 0x7E: // movd
|
||||||
case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
|
case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
|
||||||
|
case 0xFE: // paddd
|
||||||
debug_only(has_disp32 = true);
|
debug_only(has_disp32 = true);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -926,6 +927,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
|||||||
ip++; // skip P2, move to opcode
|
ip++; // skip P2, move to opcode
|
||||||
// To find the end of instruction (which == end_pc_operand).
|
// To find the end of instruction (which == end_pc_operand).
|
||||||
switch (0xFF & *ip) {
|
switch (0xFF & *ip) {
|
||||||
|
case 0x22: // pinsrd r, r/a, #8
|
||||||
case 0x61: // pcmpestri r, r/a, #8
|
case 0x61: // pcmpestri r, r/a, #8
|
||||||
case 0x70: // pshufd r, r/a, #8
|
case 0x70: // pshufd r, r/a, #8
|
||||||
case 0x73: // psrldq r, #8
|
case 0x73: // psrldq r, #8
|
||||||
@ -3953,6 +3955,83 @@ void Assembler::setb(Condition cc, Register dst) {
|
|||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
|
||||||
|
assert(VM_Version::supports_ssse3(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
|
emit_int8((unsigned char)0x0F);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
emit_int8(imm8);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
|
||||||
|
assert(VM_Version::supports_sse4_1(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
|
emit_int8((unsigned char)0x0E);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
emit_int8(imm8);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
|
||||||
|
assert(VM_Version::supports_sha(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
|
||||||
|
emit_int8((unsigned char)0xCC);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
emit_int8((unsigned char)imm8);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
|
||||||
|
assert(VM_Version::supports_sha(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int8((unsigned char)0xC8);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
|
||||||
|
assert(VM_Version::supports_sha(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int8((unsigned char)0xC9);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
|
||||||
|
assert(VM_Version::supports_sha(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int8((unsigned char)0xCA);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
// xmm0 is implicit additional source to this instruction.
|
||||||
|
void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
|
||||||
|
assert(VM_Version::supports_sha(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int8((unsigned char)0xCB);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
|
||||||
|
assert(VM_Version::supports_sha(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int8((unsigned char)0xCC);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
|
||||||
|
assert(VM_Version::supports_sha(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int8((unsigned char)0xCD);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Assembler::shll(Register dst, int imm8) {
|
void Assembler::shll(Register dst, int imm8) {
|
||||||
assert(isShiftCount(imm8), "illegal shift count");
|
assert(isShiftCount(imm8), "illegal shift count");
|
||||||
int encode = prefix_and_encode(dst->encoding());
|
int encode = prefix_and_encode(dst->encoding());
|
||||||
@ -4931,6 +5010,15 @@ void Assembler::paddd(XMMRegister dst, XMMRegister src) {
|
|||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::paddd(XMMRegister dst, Address src) {
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
InstructionMark im(this);
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||||
|
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int8((unsigned char)0xFE);
|
||||||
|
emit_operand(dst, src);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::paddq(XMMRegister dst, XMMRegister src) {
|
void Assembler::paddq(XMMRegister dst, XMMRegister src) {
|
||||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||||
@ -5611,8 +5699,9 @@ void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
@ -5621,11 +5710,12 @@ void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
|
|||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
// 0x00 - insert into lower 128 bits
|
// 0x00 - insert into lower 128 bits
|
||||||
// 0x01 - insert into upper 128 bits
|
// 0x01 - insert into upper 128 bits
|
||||||
emit_int8(0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
|
void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
@ -5633,26 +5723,29 @@ void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
|||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
// 0x00 - insert into lower 256 bits
|
// 0x00 - insert into lower 256 bits
|
||||||
// 0x01 - insert into upper 256 bits
|
// 0x01 - insert into upper 256 bits
|
||||||
emit_int8(value & 0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) {
|
void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
assert(dst != xnoreg, "sanity");
|
assert(dst != xnoreg, "sanity");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
|
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
|
||||||
// swap src<->dst for encoding
|
// swap src<->dst for encoding
|
||||||
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x1A);
|
emit_int8(0x1A);
|
||||||
emit_operand(dst, src);
|
emit_operand(dst, src);
|
||||||
// 0x00 - insert into lower 256 bits
|
// 0x00 - insert into lower 256 bits
|
||||||
// 0x01 - insert into upper 128 bits
|
// 0x01 - insert into upper 256 bits
|
||||||
emit_int8(value & 0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
|
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
|
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
@ -5662,57 +5755,64 @@ void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
|||||||
// 0x01 - insert into q1 128 bits (128..255)
|
// 0x01 - insert into q1 128 bits (128..255)
|
||||||
// 0x02 - insert into q2 128 bits (256..383)
|
// 0x02 - insert into q2 128 bits (256..383)
|
||||||
// 0x03 - insert into q3 128 bits (384..511)
|
// 0x03 - insert into q3 128 bits (384..511)
|
||||||
emit_int8(value & 0x3);
|
emit_int8(imm8 & 0x03);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
|
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
assert(dst != xnoreg, "sanity");
|
assert(dst != xnoreg, "sanity");
|
||||||
|
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||||
// swap src<->dst for encoding
|
// swap src<->dst for encoding
|
||||||
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x18);
|
emit_int8(0x18);
|
||||||
emit_operand(dst, src);
|
emit_operand(dst, src);
|
||||||
// 0x00 - insert into q0 128 bits (0..127)
|
// 0x00 - insert into q0 128 bits (0..127)
|
||||||
// 0x01 - insert into q1 128 bits (128..255)
|
// 0x01 - insert into q1 128 bits (128..255)
|
||||||
// 0x02 - insert into q2 128 bits (256..383)
|
// 0x02 - insert into q2 128 bits (256..383)
|
||||||
// 0x03 - insert into q3 128 bits (384..511)
|
// 0x03 - insert into q3 128 bits (384..511)
|
||||||
emit_int8(value & 0x3);
|
emit_int8(imm8 & 0x03);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vinsertf128h(XMMRegister dst, Address src) {
|
void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
assert(dst != xnoreg, "sanity");
|
assert(dst != xnoreg, "sanity");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||||
// swap src<->dst for encoding
|
// swap src<->dst for encoding
|
||||||
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x18);
|
emit_int8(0x18);
|
||||||
emit_operand(dst, src);
|
emit_operand(dst, src);
|
||||||
|
// 0x00 - insert into lower 128 bits
|
||||||
// 0x01 - insert into upper 128 bits
|
// 0x01 - insert into upper 128 bits
|
||||||
emit_int8(0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
|
void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x19);
|
emit_int8(0x19);
|
||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
// 0x00 - insert into lower 128 bits
|
// 0x00 - extract from lower 128 bits
|
||||||
// 0x01 - insert into upper 128 bits
|
// 0x01 - extract from upper 128 bits
|
||||||
emit_int8(0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextractf128h(Address dst, XMMRegister src) {
|
void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
assert(src != xnoreg, "sanity");
|
assert(src != xnoreg, "sanity");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
@ -5720,12 +5820,14 @@ void Assembler::vextractf128h(Address dst, XMMRegister src) {
|
|||||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x19);
|
emit_int8(0x19);
|
||||||
emit_operand(src, dst);
|
emit_operand(src, dst);
|
||||||
|
// 0x00 - extract from lower 128 bits
|
||||||
// 0x01 - extract from upper 128 bits
|
// 0x01 - extract from upper 128 bits
|
||||||
emit_int8(0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx2(), "");
|
assert(VM_Version::supports_avx2(), "");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
@ -5734,11 +5836,12 @@ void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
|
|||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
// 0x00 - insert into lower 128 bits
|
// 0x00 - insert into lower 128 bits
|
||||||
// 0x01 - insert into upper 128 bits
|
// 0x01 - insert into upper 128 bits
|
||||||
emit_int8(0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
|
void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
@ -5746,39 +5849,44 @@ void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
|||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
// 0x00 - insert into lower 256 bits
|
// 0x00 - insert into lower 256 bits
|
||||||
// 0x01 - insert into upper 256 bits
|
// 0x01 - insert into upper 256 bits
|
||||||
emit_int8(value & 0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vinserti128h(XMMRegister dst, Address src) {
|
void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx2(), "");
|
assert(VM_Version::supports_avx2(), "");
|
||||||
assert(dst != xnoreg, "sanity");
|
assert(dst != xnoreg, "sanity");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
|
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||||
// swap src<->dst for encoding
|
// swap src<->dst for encoding
|
||||||
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x38);
|
emit_int8(0x38);
|
||||||
emit_operand(dst, src);
|
emit_operand(dst, src);
|
||||||
|
// 0x00 - insert into lower 128 bits
|
||||||
// 0x01 - insert into upper 128 bits
|
// 0x01 - insert into upper 128 bits
|
||||||
emit_int8(0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
|
void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x39);
|
emit_int8(0x39);
|
||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
// 0x00 - insert into lower 128 bits
|
// 0x00 - extract from lower 128 bits
|
||||||
// 0x01 - insert into upper 128 bits
|
// 0x01 - extract from upper 128 bits
|
||||||
emit_int8(0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextracti128h(Address dst, XMMRegister src) {
|
void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx2(), "");
|
assert(VM_Version::supports_avx2(), "");
|
||||||
assert(src != xnoreg, "sanity");
|
assert(src != xnoreg, "sanity");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
@ -5786,47 +5894,53 @@ void Assembler::vextracti128h(Address dst, XMMRegister src) {
|
|||||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x39);
|
emit_int8(0x39);
|
||||||
emit_operand(src, dst);
|
emit_operand(src, dst);
|
||||||
|
// 0x00 - extract from lower 128 bits
|
||||||
// 0x01 - extract from upper 128 bits
|
// 0x01 - extract from upper 128 bits
|
||||||
emit_int8(0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) {
|
void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x3B);
|
emit_int8(0x3B);
|
||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
// 0x00 - extract from lower 256 bits
|
// 0x00 - extract from lower 256 bits
|
||||||
// 0x01 - extract from upper 256 bits
|
// 0x01 - extract from upper 256 bits
|
||||||
emit_int8(value & 0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
|
void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
|
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x39);
|
emit_int8(0x39);
|
||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
// 0x00 - extract from bits 127:0
|
||||||
// 0x01 - extract from bits 255:128
|
// 0x01 - extract from bits 255:128
|
||||||
// 0x02 - extract from bits 383:256
|
// 0x02 - extract from bits 383:256
|
||||||
// 0x03 - extract from bits 511:384
|
// 0x03 - extract from bits 511:384
|
||||||
emit_int8(value & 0x3);
|
emit_int8(imm8 & 0x03);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) {
|
void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x1B);
|
emit_int8(0x1B);
|
||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
// 0x00 - extract from lower 256 bits
|
// 0x00 - extract from lower 256 bits
|
||||||
// 0x01 - extract from upper 256 bits
|
// 0x01 - extract from upper 256 bits
|
||||||
emit_int8(value & 0x1);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
|
void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
assert(src != xnoreg, "sanity");
|
assert(src != xnoreg, "sanity");
|
||||||
|
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit);
|
attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit);
|
||||||
@ -5835,11 +5949,12 @@ void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
|
|||||||
emit_operand(src, dst);
|
emit_operand(src, dst);
|
||||||
// 0x00 - extract from lower 256 bits
|
// 0x00 - extract from lower 256 bits
|
||||||
// 0x01 - extract from upper 256 bits
|
// 0x01 - extract from upper 256 bits
|
||||||
emit_int8(value & 0x01);
|
emit_int8(imm8 & 0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
|
void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
@ -5849,12 +5964,13 @@ void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
|
|||||||
// 0x01 - extract from bits 255:128
|
// 0x01 - extract from bits 255:128
|
||||||
// 0x02 - extract from bits 383:256
|
// 0x02 - extract from bits 383:256
|
||||||
// 0x03 - extract from bits 511:384
|
// 0x03 - extract from bits 511:384
|
||||||
emit_int8(value & 0x3);
|
emit_int8(imm8 & 0x03);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
|
void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
assert(src != xnoreg, "sanity");
|
assert(src != xnoreg, "sanity");
|
||||||
|
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||||
InstructionMark im(this);
|
InstructionMark im(this);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||||
@ -5865,19 +5981,21 @@ void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
|
|||||||
// 0x01 - extract from bits 255:128
|
// 0x01 - extract from bits 255:128
|
||||||
// 0x02 - extract from bits 383:256
|
// 0x02 - extract from bits 383:256
|
||||||
// 0x03 - extract from bits 511:384
|
// 0x03 - extract from bits 511:384
|
||||||
emit_int8(value & 0x3);
|
emit_int8(imm8 & 0x03);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
|
void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
assert(VM_Version::supports_evex(), "");
|
assert(VM_Version::supports_evex(), "");
|
||||||
|
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
emit_int8(0x19);
|
emit_int8(0x19);
|
||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
// 0x00 - extract from bits 127:0
|
||||||
// 0x01 - extract from bits 255:128
|
// 0x01 - extract from bits 255:128
|
||||||
// 0x02 - extract from bits 383:256
|
// 0x02 - extract from bits 383:256
|
||||||
// 0x03 - extract from bits 511:384
|
// 0x03 - extract from bits 511:384
|
||||||
emit_int8(value & 0x3);
|
emit_int8(imm8 & 0x03);
|
||||||
}
|
}
|
||||||
|
|
||||||
// duplicate 4-bytes integer data from src into 8 locations in dest
|
// duplicate 4-bytes integer data from src into 8 locations in dest
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -1672,6 +1672,18 @@ private:
|
|||||||
|
|
||||||
void setb(Condition cc, Register dst);
|
void setb(Condition cc, Register dst);
|
||||||
|
|
||||||
|
void palignr(XMMRegister dst, XMMRegister src, int imm8);
|
||||||
|
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
|
||||||
|
|
||||||
|
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
|
||||||
|
void sha1nexte(XMMRegister dst, XMMRegister src);
|
||||||
|
void sha1msg1(XMMRegister dst, XMMRegister src);
|
||||||
|
void sha1msg2(XMMRegister dst, XMMRegister src);
|
||||||
|
// xmm0 is implicit additional source to the following instruction.
|
||||||
|
void sha256rnds2(XMMRegister dst, XMMRegister src);
|
||||||
|
void sha256msg1(XMMRegister dst, XMMRegister src);
|
||||||
|
void sha256msg2(XMMRegister dst, XMMRegister src);
|
||||||
|
|
||||||
void shldl(Register dst, Register src);
|
void shldl(Register dst, Register src);
|
||||||
void shldl(Register dst, Register src, int8_t imm8);
|
void shldl(Register dst, Register src, int8_t imm8);
|
||||||
|
|
||||||
@ -1868,6 +1880,7 @@ private:
|
|||||||
void paddb(XMMRegister dst, XMMRegister src);
|
void paddb(XMMRegister dst, XMMRegister src);
|
||||||
void paddw(XMMRegister dst, XMMRegister src);
|
void paddw(XMMRegister dst, XMMRegister src);
|
||||||
void paddd(XMMRegister dst, XMMRegister src);
|
void paddd(XMMRegister dst, XMMRegister src);
|
||||||
|
void paddd(XMMRegister dst, Address src);
|
||||||
void paddq(XMMRegister dst, XMMRegister src);
|
void paddq(XMMRegister dst, XMMRegister src);
|
||||||
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
@ -1958,33 +1971,31 @@ private:
|
|||||||
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||||
|
|
||||||
// Copy low 128bit into high 128bit of YMM registers.
|
// 128bit copy from/to 256bit (YMM) vector registers
|
||||||
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||||
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||||
void vextractf128h(XMMRegister dst, XMMRegister src);
|
void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||||
void vextracti128h(XMMRegister dst, XMMRegister src);
|
void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||||
|
void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||||
|
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||||
|
void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
|
||||||
|
void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
|
||||||
|
|
||||||
// Load/store high 128bit of YMM registers which does not destroy other half.
|
// 256bit copy from/to 512bit (ZMM) vector registers
|
||||||
void vinsertf128h(XMMRegister dst, Address src);
|
void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||||
void vinserti128h(XMMRegister dst, Address src);
|
void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||||
void vextractf128h(Address dst, XMMRegister src);
|
void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||||
void vextracti128h(Address dst, XMMRegister src);
|
void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||||
|
void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
|
||||||
|
void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||||
|
|
||||||
// Copy low 256bit into high 256bit of ZMM registers.
|
// 128bit copy from/to 256bit (YMM) or 512bit (ZMM) vector registers
|
||||||
void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
|
void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||||
void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
|
void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||||
void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
|
void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||||
void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
|
void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
|
||||||
void vextractf64x4h(Address dst, XMMRegister src, int value);
|
void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||||
void vinsertf64x4h(XMMRegister dst, Address src, int value);
|
void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||||
|
|
||||||
// Copy targeted 128bit segments of the ZMM registers
|
|
||||||
void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
|
|
||||||
void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
|
|
||||||
void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
|
|
||||||
void vextractf32x4h(Address dst, XMMRegister src, int value);
|
|
||||||
void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
|
|
||||||
void vinsertf32x4h(XMMRegister dst, Address src, int value);
|
|
||||||
|
|
||||||
// duplicate 4-bytes integer data from src into 8 locations in dest
|
// duplicate 4-bytes integer data from src into 8 locations in dest
|
||||||
void vpbroadcastd(XMMRegister dst, XMMRegister src);
|
void vpbroadcastd(XMMRegister dst, XMMRegister src);
|
||||||
|
@ -97,6 +97,8 @@ define_pd_global(bool, CompactStrings, true);
|
|||||||
|
|
||||||
define_pd_global(bool, PreserveFramePointer, false);
|
define_pd_global(bool, PreserveFramePointer, false);
|
||||||
|
|
||||||
|
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||||
|
|
||||||
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
||||||
\
|
\
|
||||||
develop(bool, IEEEPrecision, true, \
|
develop(bool, IEEEPrecision, true, \
|
||||||
|
@ -3445,7 +3445,7 @@ void MacroAssembler::movptr(Address dst, Register src) {
|
|||||||
|
|
||||||
void MacroAssembler::movdqu(Address dst, XMMRegister src) {
|
void MacroAssembler::movdqu(Address dst, XMMRegister src) {
|
||||||
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
|
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
|
||||||
Assembler::vextractf32x4h(dst, src, 0);
|
Assembler::vextractf32x4(dst, src, 0);
|
||||||
} else {
|
} else {
|
||||||
Assembler::movdqu(dst, src);
|
Assembler::movdqu(dst, src);
|
||||||
}
|
}
|
||||||
@ -3453,7 +3453,7 @@ void MacroAssembler::movdqu(Address dst, XMMRegister src) {
|
|||||||
|
|
||||||
void MacroAssembler::movdqu(XMMRegister dst, Address src) {
|
void MacroAssembler::movdqu(XMMRegister dst, Address src) {
|
||||||
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
|
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
|
||||||
Assembler::vinsertf32x4h(dst, src, 0);
|
Assembler::vinsertf32x4(dst, dst, src, 0);
|
||||||
} else {
|
} else {
|
||||||
Assembler::movdqu(dst, src);
|
Assembler::movdqu(dst, src);
|
||||||
}
|
}
|
||||||
@ -3478,7 +3478,7 @@ void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
|
|||||||
|
|
||||||
void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
|
void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
|
||||||
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
|
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
|
||||||
Assembler::vextractf64x4h(dst, src, 0);
|
vextractf64x4_low(dst, src);
|
||||||
} else {
|
} else {
|
||||||
Assembler::vmovdqu(dst, src);
|
Assembler::vmovdqu(dst, src);
|
||||||
}
|
}
|
||||||
@ -3486,7 +3486,7 @@ void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
|
|||||||
|
|
||||||
void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
|
void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
|
||||||
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
|
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
|
||||||
Assembler::vinsertf64x4h(dst, src, 0);
|
vinsertf64x4_low(dst, src);
|
||||||
} else {
|
} else {
|
||||||
Assembler::vmovdqu(dst, src);
|
Assembler::vmovdqu(dst, src);
|
||||||
}
|
}
|
||||||
@ -5649,14 +5649,14 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
|||||||
// Save upper half of ZMM registers
|
// Save upper half of ZMM registers
|
||||||
subptr(rsp, 32*num_xmm_regs);
|
subptr(rsp, 32*num_xmm_regs);
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
|
vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
|
assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
|
||||||
// Save upper half of YMM registers
|
// Save upper half of YMM registers
|
||||||
subptr(rsp, 16*num_xmm_regs);
|
subptr(rsp, 16*num_xmm_regs);
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
|
vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -5665,7 +5665,7 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
|||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
if (VM_Version::supports_evex()) {
|
if (VM_Version::supports_evex()) {
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0);
|
vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
@ -5753,7 +5753,7 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
|||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
if (VM_Version::supports_evex()) {
|
if (VM_Version::supports_evex()) {
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0);
|
vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
@ -5771,12 +5771,12 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
|||||||
if (MaxVectorSize > 16) {
|
if (MaxVectorSize > 16) {
|
||||||
// Restore upper half of YMM registers.
|
// Restore upper half of YMM registers.
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
|
vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
|
||||||
}
|
}
|
||||||
addptr(rsp, 16*num_xmm_regs);
|
addptr(rsp, 16*num_xmm_regs);
|
||||||
if(UseAVX > 2) {
|
if(UseAVX > 2) {
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
|
vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
|
||||||
}
|
}
|
||||||
addptr(rsp, 32*num_xmm_regs);
|
addptr(rsp, 32*num_xmm_regs);
|
||||||
}
|
}
|
||||||
@ -7198,21 +7198,50 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
|
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, bool is_large) {
|
||||||
// cnt - number of qwords (8-byte words).
|
// cnt - number of qwords (8-byte words).
|
||||||
// base - start address, qword aligned.
|
// base - start address, qword aligned.
|
||||||
|
// is_large - if optimizers know cnt is larger than InitArrayShortSize
|
||||||
assert(base==rdi, "base register must be edi for rep stos");
|
assert(base==rdi, "base register must be edi for rep stos");
|
||||||
assert(tmp==rax, "tmp register must be eax for rep stos");
|
assert(tmp==rax, "tmp register must be eax for rep stos");
|
||||||
assert(cnt==rcx, "cnt register must be ecx for rep stos");
|
assert(cnt==rcx, "cnt register must be ecx for rep stos");
|
||||||
|
assert(InitArrayShortSize % BytesPerLong == 0,
|
||||||
|
"InitArrayShortSize should be the multiple of BytesPerLong");
|
||||||
|
|
||||||
|
Label DONE;
|
||||||
|
|
||||||
xorptr(tmp, tmp);
|
xorptr(tmp, tmp);
|
||||||
|
|
||||||
|
if (!is_large) {
|
||||||
|
Label LOOP, LONG;
|
||||||
|
cmpptr(cnt, InitArrayShortSize/BytesPerLong);
|
||||||
|
jccb(Assembler::greater, LONG);
|
||||||
|
|
||||||
|
NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
|
||||||
|
|
||||||
|
decrement(cnt);
|
||||||
|
jccb(Assembler::negative, DONE); // Zero length
|
||||||
|
|
||||||
|
// Use individual pointer-sized stores for small counts:
|
||||||
|
BIND(LOOP);
|
||||||
|
movptr(Address(base, cnt, Address::times_ptr), tmp);
|
||||||
|
decrement(cnt);
|
||||||
|
jccb(Assembler::greaterEqual, LOOP);
|
||||||
|
jmpb(DONE);
|
||||||
|
|
||||||
|
BIND(LONG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use longer rep-prefixed ops for non-small counts:
|
||||||
if (UseFastStosb) {
|
if (UseFastStosb) {
|
||||||
shlptr(cnt,3); // convert to number of bytes
|
shlptr(cnt, 3); // convert to number of bytes
|
||||||
rep_stosb();
|
rep_stosb();
|
||||||
} else {
|
} else {
|
||||||
NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
|
NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
|
||||||
rep_stos();
|
rep_stos();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BIND(DONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef COMPILER2
|
#ifdef COMPILER2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -48,7 +48,6 @@ class MacroAssembler: public Assembler {
|
|||||||
// This is the base routine called by the different versions of call_VM_leaf. The interpreter
|
// This is the base routine called by the different versions of call_VM_leaf. The interpreter
|
||||||
// may customize this version by overriding it for its purposes (e.g., to save/restore
|
// may customize this version by overriding it for its purposes (e.g., to save/restore
|
||||||
// additional registers when doing a VM call).
|
// additional registers when doing a VM call).
|
||||||
#define COMMA ,
|
|
||||||
|
|
||||||
virtual void call_VM_leaf_base(
|
virtual void call_VM_leaf_base(
|
||||||
address entry_point, // the entry point
|
address entry_point, // the entry point
|
||||||
@ -903,35 +902,66 @@ class MacroAssembler: public Assembler {
|
|||||||
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
|
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
|
||||||
void ldmxcsr(AddressLiteral src);
|
void ldmxcsr(AddressLiteral src);
|
||||||
|
|
||||||
|
void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
|
||||||
|
XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
|
||||||
|
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||||
|
bool multi_block);
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
|
||||||
|
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||||
|
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||||
|
bool multi_block, XMMRegister shuf_mask);
|
||||||
|
#else
|
||||||
|
void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
|
||||||
|
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||||
|
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||||
|
bool multi_block);
|
||||||
|
#endif
|
||||||
|
|
||||||
void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||||
Register rax, Register rcx, Register rdx, Register tmp);
|
Register rax, Register rcx, Register rdx, Register tmp);
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||||
Register rax, Register rcx, Register rdx, Register tmp1 LP64_ONLY(COMMA Register tmp2));
|
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
|
||||||
|
|
||||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||||
Register rdx NOT_LP64(COMMA Register tmp) LP64_ONLY(COMMA Register tmp1)
|
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
|
||||||
LP64_ONLY(COMMA Register tmp2) LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4));
|
|
||||||
|
|
||||||
void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||||
Register rax, Register rbx LP64_ONLY(COMMA Register rcx), Register rdx
|
Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2,
|
||||||
LP64_ONLY(COMMA Register tmp1) LP64_ONLY(COMMA Register tmp2)
|
Register tmp3, Register tmp4);
|
||||||
LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4));
|
|
||||||
|
|
||||||
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||||
Register rax, Register rcx, Register rdx NOT_LP64(COMMA Register tmp)
|
Register rax, Register rcx, Register rdx, Register tmp1,
|
||||||
LP64_ONLY(COMMA Register r8) LP64_ONLY(COMMA Register r9)
|
Register tmp2, Register tmp3, Register tmp4);
|
||||||
LP64_ONLY(COMMA Register r10) LP64_ONLY(COMMA Register r11));
|
#else
|
||||||
|
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||||
|
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||||
|
Register rax, Register rcx, Register rdx, Register tmp1);
|
||||||
|
|
||||||
|
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||||
|
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||||
|
Register rdx, Register tmp);
|
||||||
|
|
||||||
|
void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||||
|
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||||
|
Register rax, Register rbx, Register rdx);
|
||||||
|
|
||||||
|
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||||
|
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||||
|
Register rax, Register rcx, Register rdx, Register tmp);
|
||||||
|
|
||||||
#ifndef _LP64
|
|
||||||
void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
|
void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
|
||||||
Register edx, Register ebx, Register esi, Register edi,
|
Register edx, Register ebx, Register esi, Register edi,
|
||||||
Register ebp, Register esp);
|
Register ebp, Register esp);
|
||||||
|
|
||||||
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
|
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
|
||||||
Register esi, Register edi, Register ebp, Register esp);
|
Register esi, Register edi, Register ebp, Register esp);
|
||||||
#endif
|
#endif
|
||||||
@ -1185,14 +1215,131 @@ public:
|
|||||||
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
|
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
|
||||||
void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
|
void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
|
||||||
|
|
||||||
// Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
|
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||||
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
if (UseAVX > 1) { // vinserti128 is available only in AVX2
|
||||||
if (UseAVX > 1) // vinserti128h is available only in AVX2
|
Assembler::vinserti128(dst, nds, src, imm8);
|
||||||
Assembler::vinserti128h(dst, nds, src);
|
} else {
|
||||||
else
|
Assembler::vinsertf128(dst, nds, src, imm8);
|
||||||
Assembler::vinsertf128h(dst, nds, src);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||||
|
if (UseAVX > 1) { // vinserti128 is available only in AVX2
|
||||||
|
Assembler::vinserti128(dst, nds, src, imm8);
|
||||||
|
} else {
|
||||||
|
Assembler::vinsertf128(dst, nds, src, imm8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
|
if (UseAVX > 1) { // vextracti128 is available only in AVX2
|
||||||
|
Assembler::vextracti128(dst, src, imm8);
|
||||||
|
} else {
|
||||||
|
Assembler::vextractf128(dst, src, imm8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
|
||||||
|
if (UseAVX > 1) { // vextracti128 is available only in AVX2
|
||||||
|
Assembler::vextracti128(dst, src, imm8);
|
||||||
|
} else {
|
||||||
|
Assembler::vextractf128(dst, src, imm8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 128bit copy to/from high 128 bits of 256bit (YMM) vector registers
|
||||||
|
void vinserti128_high(XMMRegister dst, XMMRegister src) {
|
||||||
|
vinserti128(dst, dst, src, 1);
|
||||||
|
}
|
||||||
|
void vinserti128_high(XMMRegister dst, Address src) {
|
||||||
|
vinserti128(dst, dst, src, 1);
|
||||||
|
}
|
||||||
|
void vextracti128_high(XMMRegister dst, XMMRegister src) {
|
||||||
|
vextracti128(dst, src, 1);
|
||||||
|
}
|
||||||
|
void vextracti128_high(Address dst, XMMRegister src) {
|
||||||
|
vextracti128(dst, src, 1);
|
||||||
|
}
|
||||||
|
void vinsertf128_high(XMMRegister dst, XMMRegister src) {
|
||||||
|
vinsertf128(dst, dst, src, 1);
|
||||||
|
}
|
||||||
|
void vinsertf128_high(XMMRegister dst, Address src) {
|
||||||
|
vinsertf128(dst, dst, src, 1);
|
||||||
|
}
|
||||||
|
void vextractf128_high(XMMRegister dst, XMMRegister src) {
|
||||||
|
vextractf128(dst, src, 1);
|
||||||
|
}
|
||||||
|
void vextractf128_high(Address dst, XMMRegister src) {
|
||||||
|
vextractf128(dst, src, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers
|
||||||
|
void vinserti64x4_high(XMMRegister dst, XMMRegister src) {
|
||||||
|
vinserti64x4(dst, dst, src, 1);
|
||||||
|
}
|
||||||
|
void vinsertf64x4_high(XMMRegister dst, XMMRegister src) {
|
||||||
|
vinsertf64x4(dst, dst, src, 1);
|
||||||
|
}
|
||||||
|
void vextracti64x4_high(XMMRegister dst, XMMRegister src) {
|
||||||
|
vextracti64x4(dst, src, 1);
|
||||||
|
}
|
||||||
|
void vextractf64x4_high(XMMRegister dst, XMMRegister src) {
|
||||||
|
vextractf64x4(dst, src, 1);
|
||||||
|
}
|
||||||
|
void vextractf64x4_high(Address dst, XMMRegister src) {
|
||||||
|
vextractf64x4(dst, src, 1);
|
||||||
|
}
|
||||||
|
void vinsertf64x4_high(XMMRegister dst, Address src) {
|
||||||
|
vinsertf64x4(dst, dst, src, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 128bit copy to/from low 128 bits of 256bit (YMM) vector registers
|
||||||
|
void vinserti128_low(XMMRegister dst, XMMRegister src) {
|
||||||
|
vinserti128(dst, dst, src, 0);
|
||||||
|
}
|
||||||
|
void vinserti128_low(XMMRegister dst, Address src) {
|
||||||
|
vinserti128(dst, dst, src, 0);
|
||||||
|
}
|
||||||
|
void vextracti128_low(XMMRegister dst, XMMRegister src) {
|
||||||
|
vextracti128(dst, src, 0);
|
||||||
|
}
|
||||||
|
void vextracti128_low(Address dst, XMMRegister src) {
|
||||||
|
vextracti128(dst, src, 0);
|
||||||
|
}
|
||||||
|
void vinsertf128_low(XMMRegister dst, XMMRegister src) {
|
||||||
|
vinsertf128(dst, dst, src, 0);
|
||||||
|
}
|
||||||
|
void vinsertf128_low(XMMRegister dst, Address src) {
|
||||||
|
vinsertf128(dst, dst, src, 0);
|
||||||
|
}
|
||||||
|
void vextractf128_low(XMMRegister dst, XMMRegister src) {
|
||||||
|
vextractf128(dst, src, 0);
|
||||||
|
}
|
||||||
|
void vextractf128_low(Address dst, XMMRegister src) {
|
||||||
|
vextractf128(dst, src, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers
|
||||||
|
void vinserti64x4_low(XMMRegister dst, XMMRegister src) {
|
||||||
|
vinserti64x4(dst, dst, src, 0);
|
||||||
|
}
|
||||||
|
void vinsertf64x4_low(XMMRegister dst, XMMRegister src) {
|
||||||
|
vinsertf64x4(dst, dst, src, 0);
|
||||||
|
}
|
||||||
|
void vextracti64x4_low(XMMRegister dst, XMMRegister src) {
|
||||||
|
vextracti64x4(dst, src, 0);
|
||||||
|
}
|
||||||
|
void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
|
||||||
|
vextractf64x4(dst, src, 0);
|
||||||
|
}
|
||||||
|
void vextractf64x4_low(Address dst, XMMRegister src) {
|
||||||
|
vextractf64x4(dst, src, 0);
|
||||||
|
}
|
||||||
|
void vinsertf64x4_low(XMMRegister dst, Address src) {
|
||||||
|
vinsertf64x4(dst, dst, src, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Carry-Less Multiplication Quadword
|
// Carry-Less Multiplication Quadword
|
||||||
void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||||
// 0x00 - multiply lower 64 bits [0:63]
|
// 0x00 - multiply lower 64 bits [0:63]
|
||||||
@ -1284,8 +1431,9 @@ public:
|
|||||||
// C2 compiled method's prolog code.
|
// C2 compiled method's prolog code.
|
||||||
void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b);
|
void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b);
|
||||||
|
|
||||||
// clear memory of size 'cnt' qwords, starting at 'base'.
|
// clear memory of size 'cnt' qwords, starting at 'base';
|
||||||
void clear_mem(Register base, Register cnt, Register rtmp);
|
// if 'is_large' is set, do not try to produce short loop
|
||||||
|
void clear_mem(Register base, Register cnt, Register rtmp, bool is_large);
|
||||||
|
|
||||||
#ifdef COMPILER2
|
#ifdef COMPILER2
|
||||||
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
||||||
|
495
hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp
Normal file
495
hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp
Normal file
@ -0,0 +1,495 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation.
|
||||||
|
*
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "precompiled.hpp"
|
||||||
|
#include "asm/assembler.hpp"
|
||||||
|
#include "asm/assembler.inline.hpp"
|
||||||
|
#include "runtime/stubRoutines.hpp"
|
||||||
|
#include "macroAssembler_x86.hpp"
|
||||||
|
|
||||||
|
// ofs and limit are used for multi-block byte array.
|
||||||
|
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||||
|
void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
|
||||||
|
XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
|
||||||
|
Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) {
|
||||||
|
|
||||||
|
Label start, done_hash, loop0;
|
||||||
|
|
||||||
|
address upper_word_mask = StubRoutines::x86::upper_word_mask_addr();
|
||||||
|
address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr();
|
||||||
|
|
||||||
|
bind(start);
|
||||||
|
movdqu(abcd, Address(state, 0));
|
||||||
|
pinsrd(e0, Address(state, 16), 3);
|
||||||
|
movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000
|
||||||
|
pand(e0, shuf_mask);
|
||||||
|
pshufd(abcd, abcd, 0x1B);
|
||||||
|
movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f
|
||||||
|
|
||||||
|
bind(loop0);
|
||||||
|
// Save hash values for addition after rounds
|
||||||
|
movdqu(Address(rsp, 0), e0);
|
||||||
|
movdqu(Address(rsp, 16), abcd);
|
||||||
|
|
||||||
|
|
||||||
|
// Rounds 0 - 3
|
||||||
|
movdqu(msg0, Address(buf, 0));
|
||||||
|
pshufb(msg0, shuf_mask);
|
||||||
|
paddd(e0, msg0);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1rnds4(abcd, e0, 0);
|
||||||
|
|
||||||
|
// Rounds 4 - 7
|
||||||
|
movdqu(msg1, Address(buf, 16));
|
||||||
|
pshufb(msg1, shuf_mask);
|
||||||
|
sha1nexte(e1, msg1);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1rnds4(abcd, e1, 0);
|
||||||
|
sha1msg1(msg0, msg1);
|
||||||
|
|
||||||
|
// Rounds 8 - 11
|
||||||
|
movdqu(msg2, Address(buf, 32));
|
||||||
|
pshufb(msg2, shuf_mask);
|
||||||
|
sha1nexte(e0, msg2);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1rnds4(abcd, e0, 0);
|
||||||
|
sha1msg1(msg1, msg2);
|
||||||
|
pxor(msg0, msg2);
|
||||||
|
|
||||||
|
// Rounds 12 - 15
|
||||||
|
movdqu(msg3, Address(buf, 48));
|
||||||
|
pshufb(msg3, shuf_mask);
|
||||||
|
sha1nexte(e1, msg3);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1msg2(msg0, msg3);
|
||||||
|
sha1rnds4(abcd, e1, 0);
|
||||||
|
sha1msg1(msg2, msg3);
|
||||||
|
pxor(msg1, msg3);
|
||||||
|
|
||||||
|
// Rounds 16 - 19
|
||||||
|
sha1nexte(e0, msg0);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1msg2(msg1, msg0);
|
||||||
|
sha1rnds4(abcd, e0, 0);
|
||||||
|
sha1msg1(msg3, msg0);
|
||||||
|
pxor(msg2, msg0);
|
||||||
|
|
||||||
|
// Rounds 20 - 23
|
||||||
|
sha1nexte(e1, msg1);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1msg2(msg2, msg1);
|
||||||
|
sha1rnds4(abcd, e1, 1);
|
||||||
|
sha1msg1(msg0, msg1);
|
||||||
|
pxor(msg3, msg1);
|
||||||
|
|
||||||
|
// Rounds 24 - 27
|
||||||
|
sha1nexte(e0, msg2);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1msg2(msg3, msg2);
|
||||||
|
sha1rnds4(abcd, e0, 1);
|
||||||
|
sha1msg1(msg1, msg2);
|
||||||
|
pxor(msg0, msg2);
|
||||||
|
|
||||||
|
// Rounds 28 - 31
|
||||||
|
sha1nexte(e1, msg3);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1msg2(msg0, msg3);
|
||||||
|
sha1rnds4(abcd, e1, 1);
|
||||||
|
sha1msg1(msg2, msg3);
|
||||||
|
pxor(msg1, msg3);
|
||||||
|
|
||||||
|
// Rounds 32 - 35
|
||||||
|
sha1nexte(e0, msg0);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1msg2(msg1, msg0);
|
||||||
|
sha1rnds4(abcd, e0, 1);
|
||||||
|
sha1msg1(msg3, msg0);
|
||||||
|
pxor(msg2, msg0);
|
||||||
|
|
||||||
|
// Rounds 36 - 39
|
||||||
|
sha1nexte(e1, msg1);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1msg2(msg2, msg1);
|
||||||
|
sha1rnds4(abcd, e1, 1);
|
||||||
|
sha1msg1(msg0, msg1);
|
||||||
|
pxor(msg3, msg1);
|
||||||
|
|
||||||
|
// Rounds 40 - 43
|
||||||
|
sha1nexte(e0, msg2);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1msg2(msg3, msg2);
|
||||||
|
sha1rnds4(abcd, e0, 2);
|
||||||
|
sha1msg1(msg1, msg2);
|
||||||
|
pxor(msg0, msg2);
|
||||||
|
|
||||||
|
// Rounds 44 - 47
|
||||||
|
sha1nexte(e1, msg3);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1msg2(msg0, msg3);
|
||||||
|
sha1rnds4(abcd, e1, 2);
|
||||||
|
sha1msg1(msg2, msg3);
|
||||||
|
pxor(msg1, msg3);
|
||||||
|
|
||||||
|
// Rounds 48 - 51
|
||||||
|
sha1nexte(e0, msg0);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1msg2(msg1, msg0);
|
||||||
|
sha1rnds4(abcd, e0, 2);
|
||||||
|
sha1msg1(msg3, msg0);
|
||||||
|
pxor(msg2, msg0);
|
||||||
|
|
||||||
|
// Rounds 52 - 55
|
||||||
|
sha1nexte(e1, msg1);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1msg2(msg2, msg1);
|
||||||
|
sha1rnds4(abcd, e1, 2);
|
||||||
|
sha1msg1(msg0, msg1);
|
||||||
|
pxor(msg3, msg1);
|
||||||
|
|
||||||
|
// Rounds 56 - 59
|
||||||
|
sha1nexte(e0, msg2);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1msg2(msg3, msg2);
|
||||||
|
sha1rnds4(abcd, e0, 2);
|
||||||
|
sha1msg1(msg1, msg2);
|
||||||
|
pxor(msg0, msg2);
|
||||||
|
|
||||||
|
// Rounds 60 - 63
|
||||||
|
sha1nexte(e1, msg3);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1msg2(msg0, msg3);
|
||||||
|
sha1rnds4(abcd, e1, 3);
|
||||||
|
sha1msg1(msg2, msg3);
|
||||||
|
pxor(msg1, msg3);
|
||||||
|
|
||||||
|
// Rounds 64 - 67
|
||||||
|
sha1nexte(e0, msg0);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1msg2(msg1, msg0);
|
||||||
|
sha1rnds4(abcd, e0, 3);
|
||||||
|
sha1msg1(msg3, msg0);
|
||||||
|
pxor(msg2, msg0);
|
||||||
|
|
||||||
|
// Rounds 68 - 71
|
||||||
|
sha1nexte(e1, msg1);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1msg2(msg2, msg1);
|
||||||
|
sha1rnds4(abcd, e1, 3);
|
||||||
|
pxor(msg3, msg1);
|
||||||
|
|
||||||
|
// Rounds 72 - 75
|
||||||
|
sha1nexte(e0, msg2);
|
||||||
|
movdqa(e1, abcd);
|
||||||
|
sha1msg2(msg3, msg2);
|
||||||
|
sha1rnds4(abcd, e0, 3);
|
||||||
|
|
||||||
|
// Rounds 76 - 79
|
||||||
|
sha1nexte(e1, msg3);
|
||||||
|
movdqa(e0, abcd);
|
||||||
|
sha1rnds4(abcd, e1, 3);
|
||||||
|
|
||||||
|
// add current hash values with previously saved
|
||||||
|
movdqu(msg0, Address(rsp, 0));
|
||||||
|
sha1nexte(e0, msg0);
|
||||||
|
movdqu(msg0, Address(rsp, 16));
|
||||||
|
paddd(abcd, msg0);
|
||||||
|
|
||||||
|
if (multi_block) {
|
||||||
|
// increment data pointer and loop if more to process
|
||||||
|
addptr(buf, 64);
|
||||||
|
addptr(ofs, 64);
|
||||||
|
cmpptr(ofs, limit);
|
||||||
|
jcc(Assembler::belowEqual, loop0);
|
||||||
|
movptr(rax, ofs); //return ofs
|
||||||
|
}
|
||||||
|
// write hash values back in the correct order
|
||||||
|
pshufd(abcd, abcd, 0x1b);
|
||||||
|
movdqu(Address(state, 0), abcd);
|
||||||
|
pextrd(Address(state, 16), e0, 3);
|
||||||
|
|
||||||
|
bind(done_hash);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// xmm0 (msg) is used as an implicit argument to sh256rnds2
|
||||||
|
// and state0 and state1 can never use xmm0 register.
|
||||||
|
// ofs and limit are used for multi-block byte array.
|
||||||
|
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||||
|
#ifdef _LP64
|
||||||
|
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
|
||||||
|
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||||
|
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||||
|
bool multi_block, XMMRegister shuf_mask) {
|
||||||
|
#else
|
||||||
|
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
|
||||||
|
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||||
|
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||||
|
bool multi_block) {
|
||||||
|
#endif
|
||||||
|
Label start, done_hash, loop0;
|
||||||
|
|
||||||
|
address K256 = StubRoutines::x86::k256_addr();
|
||||||
|
address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr();
|
||||||
|
|
||||||
|
bind(start);
|
||||||
|
movdqu(state0, Address(state, 0));
|
||||||
|
movdqu(state1, Address(state, 16));
|
||||||
|
|
||||||
|
pshufd(state0, state0, 0xB1);
|
||||||
|
pshufd(state1, state1, 0x1B);
|
||||||
|
movdqa(msgtmp4, state0);
|
||||||
|
palignr(state0, state1, 8);
|
||||||
|
pblendw(state1, msgtmp4, 0xF0);
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask));
|
||||||
|
#endif
|
||||||
|
lea(rax, ExternalAddress(K256));
|
||||||
|
|
||||||
|
bind(loop0);
|
||||||
|
movdqu(Address(rsp, 0), state0);
|
||||||
|
movdqu(Address(rsp, 16), state1);
|
||||||
|
|
||||||
|
// Rounds 0-3
|
||||||
|
movdqu(msg, Address(buf, 0));
|
||||||
|
#ifdef _LP64
|
||||||
|
pshufb(msg, shuf_mask);
|
||||||
|
#else
|
||||||
|
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
|
||||||
|
#endif
|
||||||
|
movdqa(msgtmp0, msg);
|
||||||
|
paddd(msg, Address(rax, 0));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
|
||||||
|
// Rounds 4-7
|
||||||
|
movdqu(msg, Address(buf, 16));
|
||||||
|
#ifdef _LP64
|
||||||
|
pshufb(msg, shuf_mask);
|
||||||
|
#else
|
||||||
|
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
|
||||||
|
#endif
|
||||||
|
movdqa(msgtmp1, msg);
|
||||||
|
paddd(msg, Address(rax, 16));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp0, msgtmp1);
|
||||||
|
|
||||||
|
// Rounds 8-11
|
||||||
|
movdqu(msg, Address(buf, 32));
|
||||||
|
#ifdef _LP64
|
||||||
|
pshufb(msg, shuf_mask);
|
||||||
|
#else
|
||||||
|
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
|
||||||
|
#endif
|
||||||
|
movdqa(msgtmp2, msg);
|
||||||
|
paddd(msg, Address(rax, 32));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp1, msgtmp2);
|
||||||
|
|
||||||
|
// Rounds 12-15
|
||||||
|
movdqu(msg, Address(buf, 48));
|
||||||
|
#ifdef _LP64
|
||||||
|
pshufb(msg, shuf_mask);
|
||||||
|
#else
|
||||||
|
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
|
||||||
|
#endif
|
||||||
|
movdqa(msgtmp3, msg);
|
||||||
|
paddd(msg, Address(rax, 48));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp3);
|
||||||
|
palignr(msgtmp4, msgtmp2, 4);
|
||||||
|
paddd(msgtmp0, msgtmp4);
|
||||||
|
sha256msg2(msgtmp0, msgtmp3);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp2, msgtmp3);
|
||||||
|
|
||||||
|
// Rounds 16-19
|
||||||
|
movdqa(msg, msgtmp0);
|
||||||
|
paddd(msg, Address(rax, 64));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp0);
|
||||||
|
palignr(msgtmp4, msgtmp3, 4);
|
||||||
|
paddd(msgtmp1, msgtmp4);
|
||||||
|
sha256msg2(msgtmp1, msgtmp0);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp3, msgtmp0);
|
||||||
|
|
||||||
|
// Rounds 20-23
|
||||||
|
movdqa(msg, msgtmp1);
|
||||||
|
paddd(msg, Address(rax, 80));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp1);
|
||||||
|
palignr(msgtmp4, msgtmp0, 4);
|
||||||
|
paddd(msgtmp2, msgtmp4);
|
||||||
|
sha256msg2(msgtmp2, msgtmp1);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp0, msgtmp1);
|
||||||
|
|
||||||
|
// Rounds 24-27
|
||||||
|
movdqa(msg, msgtmp2);
|
||||||
|
paddd(msg, Address(rax, 96));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp2);
|
||||||
|
palignr(msgtmp4, msgtmp1, 4);
|
||||||
|
paddd(msgtmp3, msgtmp4);
|
||||||
|
sha256msg2(msgtmp3, msgtmp2);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp1, msgtmp2);
|
||||||
|
|
||||||
|
// Rounds 28-31
|
||||||
|
movdqa(msg, msgtmp3);
|
||||||
|
paddd(msg, Address(rax, 112));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp3);
|
||||||
|
palignr(msgtmp4, msgtmp2, 4);
|
||||||
|
paddd(msgtmp0, msgtmp4);
|
||||||
|
sha256msg2(msgtmp0, msgtmp3);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp2, msgtmp3);
|
||||||
|
|
||||||
|
// Rounds 32-35
|
||||||
|
movdqa(msg, msgtmp0);
|
||||||
|
paddd(msg, Address(rax, 128));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp0);
|
||||||
|
palignr(msgtmp4, msgtmp3, 4);
|
||||||
|
paddd(msgtmp1, msgtmp4);
|
||||||
|
sha256msg2(msgtmp1, msgtmp0);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp3, msgtmp0);
|
||||||
|
|
||||||
|
// Rounds 36-39
|
||||||
|
movdqa(msg, msgtmp1);
|
||||||
|
paddd(msg, Address(rax, 144));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp1);
|
||||||
|
palignr(msgtmp4, msgtmp0, 4);
|
||||||
|
paddd(msgtmp2, msgtmp4);
|
||||||
|
sha256msg2(msgtmp2, msgtmp1);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp0, msgtmp1);
|
||||||
|
|
||||||
|
// Rounds 40-43
|
||||||
|
movdqa(msg, msgtmp2);
|
||||||
|
paddd(msg, Address(rax, 160));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp2);
|
||||||
|
palignr(msgtmp4, msgtmp1, 4);
|
||||||
|
paddd(msgtmp3, msgtmp4);
|
||||||
|
sha256msg2(msgtmp3, msgtmp2);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp1, msgtmp2);
|
||||||
|
|
||||||
|
// Rounds 44-47
|
||||||
|
movdqa(msg, msgtmp3);
|
||||||
|
paddd(msg, Address(rax, 176));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp3);
|
||||||
|
palignr(msgtmp4, msgtmp2, 4);
|
||||||
|
paddd(msgtmp0, msgtmp4);
|
||||||
|
sha256msg2(msgtmp0, msgtmp3);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp2, msgtmp3);
|
||||||
|
|
||||||
|
// Rounds 48-51
|
||||||
|
movdqa(msg, msgtmp0);
|
||||||
|
paddd(msg, Address(rax, 192));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp0);
|
||||||
|
palignr(msgtmp4, msgtmp3, 4);
|
||||||
|
paddd(msgtmp1, msgtmp4);
|
||||||
|
sha256msg2(msgtmp1, msgtmp0);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
sha256msg1(msgtmp3, msgtmp0);
|
||||||
|
|
||||||
|
// Rounds 52-55
|
||||||
|
movdqa(msg, msgtmp1);
|
||||||
|
paddd(msg, Address(rax, 208));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp1);
|
||||||
|
palignr(msgtmp4, msgtmp0, 4);
|
||||||
|
paddd(msgtmp2, msgtmp4);
|
||||||
|
sha256msg2(msgtmp2, msgtmp1);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
|
||||||
|
// Rounds 56-59
|
||||||
|
movdqa(msg, msgtmp2);
|
||||||
|
paddd(msg, Address(rax, 224));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
movdqa(msgtmp4, msgtmp2);
|
||||||
|
palignr(msgtmp4, msgtmp1, 4);
|
||||||
|
paddd(msgtmp3, msgtmp4);
|
||||||
|
sha256msg2(msgtmp3, msgtmp2);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
|
||||||
|
// Rounds 60-63
|
||||||
|
movdqa(msg, msgtmp3);
|
||||||
|
paddd(msg, Address(rax, 240));
|
||||||
|
sha256rnds2(state1, state0);
|
||||||
|
pshufd(msg, msg, 0x0E);
|
||||||
|
sha256rnds2(state0, state1);
|
||||||
|
movdqu(msg, Address(rsp, 0));
|
||||||
|
paddd(state0, msg);
|
||||||
|
movdqu(msg, Address(rsp, 16));
|
||||||
|
paddd(state1, msg);
|
||||||
|
|
||||||
|
if (multi_block) {
|
||||||
|
// increment data pointer and loop if more to process
|
||||||
|
addptr(buf, 64);
|
||||||
|
addptr(ofs, 64);
|
||||||
|
cmpptr(ofs, limit);
|
||||||
|
jcc(Assembler::belowEqual, loop0);
|
||||||
|
movptr(rax, ofs); //return ofs
|
||||||
|
}
|
||||||
|
|
||||||
|
pshufd(state0, state0, 0x1B);
|
||||||
|
pshufd(state1, state1, 0xB1);
|
||||||
|
movdqa(msgtmp4, state0);
|
||||||
|
pblendw(state0, state1, 0xF0);
|
||||||
|
palignr(state1, msgtmp4, 8);
|
||||||
|
|
||||||
|
movdqu(Address(state, 0), state0);
|
||||||
|
movdqu(Address(state, 16), state1);
|
||||||
|
|
||||||
|
bind(done_hash);
|
||||||
|
|
||||||
|
}
|
@ -208,13 +208,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
|||||||
__ subptr(rsp, ymm_bytes);
|
__ subptr(rsp, ymm_bytes);
|
||||||
// Save upper half of YMM registers
|
// Save upper half of YMM registers
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
__ vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
|
__ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
|
||||||
}
|
}
|
||||||
if (UseAVX > 2) {
|
if (UseAVX > 2) {
|
||||||
__ subptr(rsp, zmm_bytes);
|
__ subptr(rsp, zmm_bytes);
|
||||||
// Save upper half of ZMM registers
|
// Save upper half of ZMM registers
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
__ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
|
__ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -304,13 +304,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
|||||||
if (UseAVX > 2) {
|
if (UseAVX > 2) {
|
||||||
// Restore upper half of ZMM registers.
|
// Restore upper half of ZMM registers.
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
|
__ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
|
||||||
}
|
}
|
||||||
__ addptr(rsp, zmm_bytes);
|
__ addptr(rsp, zmm_bytes);
|
||||||
}
|
}
|
||||||
// Restore upper half of YMM registers.
|
// Restore upper half of YMM registers.
|
||||||
for (int n = 0; n < num_xmm_regs; n++) {
|
for (int n = 0; n < num_xmm_regs; n++) {
|
||||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
|
__ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
|
||||||
}
|
}
|
||||||
__ addptr(rsp, ymm_bytes);
|
__ addptr(rsp, ymm_bytes);
|
||||||
}
|
}
|
||||||
|
@ -179,13 +179,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
|||||||
// Save upper half of YMM registers(0..15)
|
// Save upper half of YMM registers(0..15)
|
||||||
int base_addr = XSAVE_AREA_YMM_BEGIN;
|
int base_addr = XSAVE_AREA_YMM_BEGIN;
|
||||||
for (int n = 0; n < 16; n++) {
|
for (int n = 0; n < 16; n++) {
|
||||||
__ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
|
__ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n));
|
||||||
}
|
}
|
||||||
if (VM_Version::supports_evex()) {
|
if (VM_Version::supports_evex()) {
|
||||||
// Save upper half of ZMM registers(0..15)
|
// Save upper half of ZMM registers(0..15)
|
||||||
base_addr = XSAVE_AREA_ZMM_BEGIN;
|
base_addr = XSAVE_AREA_ZMM_BEGIN;
|
||||||
for (int n = 0; n < 16; n++) {
|
for (int n = 0; n < 16; n++) {
|
||||||
__ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
|
__ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
|
||||||
}
|
}
|
||||||
// Save full ZMM registers(16..num_xmm_regs)
|
// Save full ZMM registers(16..num_xmm_regs)
|
||||||
base_addr = XSAVE_AREA_UPPERBANK;
|
base_addr = XSAVE_AREA_UPPERBANK;
|
||||||
@ -333,13 +333,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
|||||||
// Restore upper half of YMM registers (0..15)
|
// Restore upper half of YMM registers (0..15)
|
||||||
int base_addr = XSAVE_AREA_YMM_BEGIN;
|
int base_addr = XSAVE_AREA_YMM_BEGIN;
|
||||||
for (int n = 0; n < 16; n++) {
|
for (int n = 0; n < 16; n++) {
|
||||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16));
|
__ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
|
||||||
}
|
}
|
||||||
if (VM_Version::supports_evex()) {
|
if (VM_Version::supports_evex()) {
|
||||||
// Restore upper half of ZMM registers (0..15)
|
// Restore upper half of ZMM registers (0..15)
|
||||||
base_addr = XSAVE_AREA_ZMM_BEGIN;
|
base_addr = XSAVE_AREA_ZMM_BEGIN;
|
||||||
for (int n = 0; n < 16; n++) {
|
for (int n = 0; n < 16; n++) {
|
||||||
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
|
__ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
|
||||||
}
|
}
|
||||||
// Restore full ZMM registers(16..num_xmm_regs)
|
// Restore full ZMM registers(16..num_xmm_regs)
|
||||||
base_addr = XSAVE_AREA_UPPERBANK;
|
base_addr = XSAVE_AREA_UPPERBANK;
|
||||||
|
@ -3068,6 +3068,136 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
address generate_upper_word_mask() {
|
||||||
|
__ align(64);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||||
|
__ emit_data(0xFFFFFFFF, relocInfo::none, 0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
address generate_shuffle_byte_flip_mask() {
|
||||||
|
__ align(64);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x08090a0b, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ofs and limit are use for multi-block byte array.
|
||||||
|
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||||
|
address generate_sha1_implCompress(bool multi_block, const char *name) {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", name);
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
Register buf = rax;
|
||||||
|
Register state = rdx;
|
||||||
|
Register ofs = rcx;
|
||||||
|
Register limit = rdi;
|
||||||
|
|
||||||
|
const Address buf_param(rbp, 8 + 0);
|
||||||
|
const Address state_param(rbp, 8 + 4);
|
||||||
|
const Address ofs_param(rbp, 8 + 8);
|
||||||
|
const Address limit_param(rbp, 8 + 12);
|
||||||
|
|
||||||
|
const XMMRegister abcd = xmm0;
|
||||||
|
const XMMRegister e0 = xmm1;
|
||||||
|
const XMMRegister e1 = xmm2;
|
||||||
|
const XMMRegister msg0 = xmm3;
|
||||||
|
|
||||||
|
const XMMRegister msg1 = xmm4;
|
||||||
|
const XMMRegister msg2 = xmm5;
|
||||||
|
const XMMRegister msg3 = xmm6;
|
||||||
|
const XMMRegister shuf_mask = xmm7;
|
||||||
|
|
||||||
|
__ enter();
|
||||||
|
__ subptr(rsp, 8 * wordSize);
|
||||||
|
if (multi_block) {
|
||||||
|
__ push(limit);
|
||||||
|
}
|
||||||
|
__ movptr(buf, buf_param);
|
||||||
|
__ movptr(state, state_param);
|
||||||
|
if (multi_block) {
|
||||||
|
__ movptr(ofs, ofs_param);
|
||||||
|
__ movptr(limit, limit_param);
|
||||||
|
}
|
||||||
|
|
||||||
|
__ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
|
||||||
|
buf, state, ofs, limit, rsp, multi_block);
|
||||||
|
|
||||||
|
if (multi_block) {
|
||||||
|
__ pop(limit);
|
||||||
|
}
|
||||||
|
__ addptr(rsp, 8 * wordSize);
|
||||||
|
__ leave();
|
||||||
|
__ ret(0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
address generate_pshuffle_byte_flip_mask() {
|
||||||
|
__ align(64);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x08090a0b, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ofs and limit are use for multi-block byte array.
|
||||||
|
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||||
|
address generate_sha256_implCompress(bool multi_block, const char *name) {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", name);
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
Register buf = rbx;
|
||||||
|
Register state = rsi;
|
||||||
|
Register ofs = rdx;
|
||||||
|
Register limit = rcx;
|
||||||
|
|
||||||
|
const Address buf_param(rbp, 8 + 0);
|
||||||
|
const Address state_param(rbp, 8 + 4);
|
||||||
|
const Address ofs_param(rbp, 8 + 8);
|
||||||
|
const Address limit_param(rbp, 8 + 12);
|
||||||
|
|
||||||
|
const XMMRegister msg = xmm0;
|
||||||
|
const XMMRegister state0 = xmm1;
|
||||||
|
const XMMRegister state1 = xmm2;
|
||||||
|
const XMMRegister msgtmp0 = xmm3;
|
||||||
|
|
||||||
|
const XMMRegister msgtmp1 = xmm4;
|
||||||
|
const XMMRegister msgtmp2 = xmm5;
|
||||||
|
const XMMRegister msgtmp3 = xmm6;
|
||||||
|
const XMMRegister msgtmp4 = xmm7;
|
||||||
|
|
||||||
|
__ enter();
|
||||||
|
__ subptr(rsp, 8 * wordSize);
|
||||||
|
handleSOERegisters(true /*saving*/);
|
||||||
|
__ movptr(buf, buf_param);
|
||||||
|
__ movptr(state, state_param);
|
||||||
|
if (multi_block) {
|
||||||
|
__ movptr(ofs, ofs_param);
|
||||||
|
__ movptr(limit, limit_param);
|
||||||
|
}
|
||||||
|
|
||||||
|
__ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
|
||||||
|
buf, state, ofs, limit, rsp, multi_block);
|
||||||
|
|
||||||
|
handleSOERegisters(false);
|
||||||
|
__ addptr(rsp, 8 * wordSize);
|
||||||
|
__ leave();
|
||||||
|
__ ret(0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
// byte swap x86 long
|
// byte swap x86 long
|
||||||
address generate_ghash_long_swap_mask() {
|
address generate_ghash_long_swap_mask() {
|
||||||
@ -3772,6 +3902,19 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UseSHA1Intrinsics) {
|
||||||
|
StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
|
||||||
|
StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
|
||||||
|
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
|
||||||
|
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
|
||||||
|
}
|
||||||
|
if (UseSHA256Intrinsics) {
|
||||||
|
StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
|
||||||
|
StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
|
||||||
|
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
|
||||||
|
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
|
||||||
|
}
|
||||||
|
|
||||||
// Generate GHASH intrinsics code
|
// Generate GHASH intrinsics code
|
||||||
if (UseGHASHIntrinsics) {
|
if (UseGHASHIntrinsics) {
|
||||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||||
|
@ -275,7 +275,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
}
|
}
|
||||||
if (VM_Version::supports_evex()) {
|
if (VM_Version::supports_evex()) {
|
||||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||||
__ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
|
__ vextractf32x4(xmm_save(i), as_XMMRegister(i), 0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||||
@ -393,7 +393,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
// emit the restores for xmm regs
|
// emit the restores for xmm regs
|
||||||
if (VM_Version::supports_evex()) {
|
if (VM_Version::supports_evex()) {
|
||||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||||
__ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
|
__ vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||||
@ -3695,6 +3695,133 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
address generate_upper_word_mask() {
|
||||||
|
__ align(64);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||||
|
__ emit_data64(0xFFFFFFFF00000000, relocInfo::none);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
address generate_shuffle_byte_flip_mask() {
|
||||||
|
__ align(64);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
|
||||||
|
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ofs and limit are use for multi-block byte array.
|
||||||
|
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||||
|
address generate_sha1_implCompress(bool multi_block, const char *name) {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", name);
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
Register buf = c_rarg0;
|
||||||
|
Register state = c_rarg1;
|
||||||
|
Register ofs = c_rarg2;
|
||||||
|
Register limit = c_rarg3;
|
||||||
|
|
||||||
|
const XMMRegister abcd = xmm0;
|
||||||
|
const XMMRegister e0 = xmm1;
|
||||||
|
const XMMRegister e1 = xmm2;
|
||||||
|
const XMMRegister msg0 = xmm3;
|
||||||
|
|
||||||
|
const XMMRegister msg1 = xmm4;
|
||||||
|
const XMMRegister msg2 = xmm5;
|
||||||
|
const XMMRegister msg3 = xmm6;
|
||||||
|
const XMMRegister shuf_mask = xmm7;
|
||||||
|
|
||||||
|
__ enter();
|
||||||
|
|
||||||
|
#ifdef _WIN64
|
||||||
|
// save the xmm registers which must be preserved 6-7
|
||||||
|
__ subptr(rsp, 4 * wordSize);
|
||||||
|
__ movdqu(Address(rsp, 0), xmm6);
|
||||||
|
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__ subptr(rsp, 4 * wordSize);
|
||||||
|
|
||||||
|
__ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
|
||||||
|
buf, state, ofs, limit, rsp, multi_block);
|
||||||
|
|
||||||
|
__ addptr(rsp, 4 * wordSize);
|
||||||
|
#ifdef _WIN64
|
||||||
|
// restore xmm regs belonging to calling function
|
||||||
|
__ movdqu(xmm6, Address(rsp, 0));
|
||||||
|
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||||
|
__ addptr(rsp, 4 * wordSize);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__ leave();
|
||||||
|
__ ret(0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
address generate_pshuffle_byte_flip_mask() {
|
||||||
|
__ align(64);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data64(0x0405060700010203, relocInfo::none);
|
||||||
|
__ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ofs and limit are use for multi-block byte array.
|
||||||
|
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||||
|
address generate_sha256_implCompress(bool multi_block, const char *name) {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", name);
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
Register buf = c_rarg0;
|
||||||
|
Register state = c_rarg1;
|
||||||
|
Register ofs = c_rarg2;
|
||||||
|
Register limit = c_rarg3;
|
||||||
|
|
||||||
|
const XMMRegister msg = xmm0;
|
||||||
|
const XMMRegister state0 = xmm1;
|
||||||
|
const XMMRegister state1 = xmm2;
|
||||||
|
const XMMRegister msgtmp0 = xmm3;
|
||||||
|
|
||||||
|
const XMMRegister msgtmp1 = xmm4;
|
||||||
|
const XMMRegister msgtmp2 = xmm5;
|
||||||
|
const XMMRegister msgtmp3 = xmm6;
|
||||||
|
const XMMRegister msgtmp4 = xmm7;
|
||||||
|
|
||||||
|
const XMMRegister shuf_mask = xmm8;
|
||||||
|
|
||||||
|
__ enter();
|
||||||
|
#ifdef _WIN64
|
||||||
|
// save the xmm registers which must be preserved 6-7
|
||||||
|
__ subptr(rsp, 6 * wordSize);
|
||||||
|
__ movdqu(Address(rsp, 0), xmm6);
|
||||||
|
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||||
|
__ movdqu(Address(rsp, 4 * wordSize), xmm8);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__ subptr(rsp, 4 * wordSize);
|
||||||
|
|
||||||
|
__ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
|
||||||
|
buf, state, ofs, limit, rsp, multi_block, shuf_mask);
|
||||||
|
|
||||||
|
__ addptr(rsp, 4 * wordSize);
|
||||||
|
#ifdef _WIN64
|
||||||
|
// restore xmm regs belonging to calling function
|
||||||
|
__ movdqu(xmm6, Address(rsp, 0));
|
||||||
|
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||||
|
__ movdqu(xmm8, Address(rsp, 4 * wordSize));
|
||||||
|
__ addptr(rsp, 6 * wordSize);
|
||||||
|
#endif
|
||||||
|
__ leave();
|
||||||
|
__ ret(0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
// This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
|
// This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
|
||||||
// to hide instruction latency
|
// to hide instruction latency
|
||||||
//
|
//
|
||||||
@ -4974,6 +5101,19 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UseSHA1Intrinsics) {
|
||||||
|
StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
|
||||||
|
StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
|
||||||
|
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
|
||||||
|
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
|
||||||
|
}
|
||||||
|
if (UseSHA256Intrinsics) {
|
||||||
|
StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
|
||||||
|
StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
|
||||||
|
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
|
||||||
|
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
|
||||||
|
}
|
||||||
|
|
||||||
// Generate GHASH intrinsics code
|
// Generate GHASH intrinsics code
|
||||||
if (UseGHASHIntrinsics) {
|
if (UseGHASHIntrinsics) {
|
||||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||||
|
@ -29,6 +29,12 @@
|
|||||||
#include "runtime/thread.inline.hpp"
|
#include "runtime/thread.inline.hpp"
|
||||||
#include "crc32c.h"
|
#include "crc32c.h"
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define ALIGNED_(x) __declspec(align(x))
|
||||||
|
#else
|
||||||
|
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||||
|
#endif
|
||||||
|
|
||||||
// Implementation of the platform-specific part of StubRoutines - for
|
// Implementation of the platform-specific part of StubRoutines - for
|
||||||
// a description of how to extend it, see the stubRoutines.hpp file.
|
// a description of how to extend it, see the stubRoutines.hpp file.
|
||||||
|
|
||||||
@ -37,6 +43,10 @@ address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
|
|||||||
address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
|
address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
|
||||||
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
|
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
|
||||||
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
|
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
|
||||||
|
address StubRoutines::x86::_upper_word_mask_addr = NULL;
|
||||||
|
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
|
||||||
|
address StubRoutines::x86::_k256_adr = NULL;
|
||||||
|
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
|
||||||
|
|
||||||
uint64_t StubRoutines::x86::_crc_by128_masks[] =
|
uint64_t StubRoutines::x86::_crc_by128_masks[] =
|
||||||
{
|
{
|
||||||
@ -236,3 +246,23 @@ void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported)
|
|||||||
_crc32c_table = (juint*)pclmulqdq_table;
|
_crc32c_table = (juint*)pclmulqdq_table;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ALIGNED_(64) juint StubRoutines::x86::_k256[] =
|
||||||
|
{
|
||||||
|
0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
|
||||||
|
0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
|
||||||
|
0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
|
||||||
|
0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
|
||||||
|
0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
|
||||||
|
0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
|
||||||
|
0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
|
||||||
|
0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
|
||||||
|
0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
|
||||||
|
0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
|
||||||
|
0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
|
||||||
|
0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
|
||||||
|
0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
|
||||||
|
0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
|
||||||
|
0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
|
||||||
|
0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
|
||||||
|
};
|
||||||
|
@ -46,6 +46,17 @@
|
|||||||
static address _ghash_long_swap_mask_addr;
|
static address _ghash_long_swap_mask_addr;
|
||||||
static address _ghash_byte_swap_mask_addr;
|
static address _ghash_byte_swap_mask_addr;
|
||||||
|
|
||||||
|
// upper word mask for sha1
|
||||||
|
static address _upper_word_mask_addr;
|
||||||
|
// byte flip mask for sha1
|
||||||
|
static address _shuffle_byte_flip_mask_addr;
|
||||||
|
|
||||||
|
//k256 table for sha256
|
||||||
|
static juint _k256[];
|
||||||
|
static address _k256_adr;
|
||||||
|
// byte flip mask for sha256
|
||||||
|
static address _pshuffle_byte_flip_mask_addr;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
||||||
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
||||||
@ -53,5 +64,9 @@
|
|||||||
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
||||||
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
|
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
|
||||||
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
|
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
|
||||||
|
static address upper_word_mask_addr() { return _upper_word_mask_addr; }
|
||||||
|
static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; }
|
||||||
|
static address k256_addr() { return _k256_adr; }
|
||||||
|
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
|
||||||
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
|
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
|
||||||
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
||||||
|
@ -68,10 +68,11 @@
|
|||||||
declare_constant(VM_Version::CPU_AVX512DQ) \
|
declare_constant(VM_Version::CPU_AVX512DQ) \
|
||||||
declare_constant(VM_Version::CPU_AVX512PF) \
|
declare_constant(VM_Version::CPU_AVX512PF) \
|
||||||
declare_constant(VM_Version::CPU_AVX512ER) \
|
declare_constant(VM_Version::CPU_AVX512ER) \
|
||||||
declare_constant(VM_Version::CPU_AVX512CD) \
|
declare_constant(VM_Version::CPU_AVX512CD)
|
||||||
declare_constant(VM_Version::CPU_AVX512BW)
|
|
||||||
|
|
||||||
#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
|
#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
|
||||||
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL)
|
declare_preprocessor_constant("VM_Version::CPU_AVX512BW", CPU_AVX512BW) \
|
||||||
|
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \
|
||||||
|
declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA)
|
||||||
|
|
||||||
#endif // CPU_X86_VM_VMSTRUCTS_X86_HPP
|
#endif // CPU_X86_VM_VMSTRUCTS_X86_HPP
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -385,7 +385,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
|||||||
|
|
||||||
__ movdl(xmm0, rcx);
|
__ movdl(xmm0, rcx);
|
||||||
__ pshufd(xmm0, xmm0, 0x00);
|
__ pshufd(xmm0, xmm0, 0x00);
|
||||||
__ vinsertf128h(xmm0, xmm0, xmm0);
|
__ vinsertf128_high(xmm0, xmm0);
|
||||||
__ vmovdqu(xmm7, xmm0);
|
__ vmovdqu(xmm7, xmm0);
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
__ vmovdqu(xmm8, xmm0);
|
__ vmovdqu(xmm8, xmm0);
|
||||||
@ -577,7 +577,7 @@ void VM_Version::get_processor_features() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char buf[256];
|
char buf[256];
|
||||||
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
cores_per_cpu(), threads_per_core(),
|
cores_per_cpu(), threads_per_core(),
|
||||||
cpu_family(), _model, _stepping,
|
cpu_family(), _model, _stepping,
|
||||||
(supports_cmov() ? ", cmov" : ""),
|
(supports_cmov() ? ", cmov" : ""),
|
||||||
@ -608,7 +608,8 @@ void VM_Version::get_processor_features() {
|
|||||||
(supports_bmi1() ? ", bmi1" : ""),
|
(supports_bmi1() ? ", bmi1" : ""),
|
||||||
(supports_bmi2() ? ", bmi2" : ""),
|
(supports_bmi2() ? ", bmi2" : ""),
|
||||||
(supports_adx() ? ", adx" : ""),
|
(supports_adx() ? ", adx" : ""),
|
||||||
(supports_evex() ? ", evex" : ""));
|
(supports_evex() ? ", evex" : ""),
|
||||||
|
(supports_sha() ? ", sha" : ""));
|
||||||
_features_string = os::strdup(buf);
|
_features_string = os::strdup(buf);
|
||||||
|
|
||||||
// UseSSE is set to the smaller of what hardware supports and what
|
// UseSSE is set to the smaller of what hardware supports and what
|
||||||
@ -730,17 +731,29 @@ void VM_Version::get_processor_features() {
|
|||||||
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UseSHA) {
|
if (supports_sha()) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseSHA)) {
|
||||||
|
UseSHA = true;
|
||||||
|
}
|
||||||
|
} else if (UseSHA) {
|
||||||
warning("SHA instructions are not available on this CPU");
|
warning("SHA instructions are not available on this CPU");
|
||||||
FLAG_SET_DEFAULT(UseSHA, false);
|
FLAG_SET_DEFAULT(UseSHA, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UseSHA1Intrinsics) {
|
if (UseSHA) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
|
||||||
|
FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
|
||||||
|
}
|
||||||
|
} else if (UseSHA1Intrinsics) {
|
||||||
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
|
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
|
||||||
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
|
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UseSHA256Intrinsics) {
|
if (UseSHA) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
|
||||||
|
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
|
||||||
|
}
|
||||||
|
} else if (UseSHA256Intrinsics) {
|
||||||
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
|
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
|
||||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
||||||
}
|
}
|
||||||
@ -750,6 +763,10 @@ void VM_Version::get_processor_features() {
|
|||||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
|
||||||
|
FLAG_SET_DEFAULT(UseSHA, false);
|
||||||
|
}
|
||||||
|
|
||||||
if (UseAdler32Intrinsics) {
|
if (UseAdler32Intrinsics) {
|
||||||
warning("Adler32Intrinsics not available on this CPU.");
|
warning("Adler32Intrinsics not available on this CPU.");
|
||||||
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
||||||
|
@ -221,7 +221,7 @@ class VM_Version : public Abstract_VM_Version {
|
|||||||
avx512pf : 1,
|
avx512pf : 1,
|
||||||
avx512er : 1,
|
avx512er : 1,
|
||||||
avx512cd : 1,
|
avx512cd : 1,
|
||||||
: 1,
|
sha : 1,
|
||||||
avx512bw : 1,
|
avx512bw : 1,
|
||||||
avx512vl : 1;
|
avx512vl : 1;
|
||||||
} bits;
|
} bits;
|
||||||
@ -282,11 +282,13 @@ protected:
|
|||||||
CPU_AVX512DQ = (1 << 27),
|
CPU_AVX512DQ = (1 << 27),
|
||||||
CPU_AVX512PF = (1 << 28),
|
CPU_AVX512PF = (1 << 28),
|
||||||
CPU_AVX512ER = (1 << 29),
|
CPU_AVX512ER = (1 << 29),
|
||||||
CPU_AVX512CD = (1 << 30),
|
CPU_AVX512CD = (1 << 30)
|
||||||
CPU_AVX512BW = (1 << 31)
|
// Keeping sign bit 31 unassigned.
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CPU_AVX512VL UCONST64(0x100000000) // EVEX instructions with smaller vector length : enums are limited to 32bit
|
#define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
|
||||||
|
#define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
|
||||||
|
#define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
|
||||||
|
|
||||||
enum Extended_Family {
|
enum Extended_Family {
|
||||||
// AMD
|
// AMD
|
||||||
@ -516,6 +518,8 @@ protected:
|
|||||||
result |= CPU_ADX;
|
result |= CPU_ADX;
|
||||||
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
|
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
|
||||||
result |= CPU_BMI2;
|
result |= CPU_BMI2;
|
||||||
|
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
|
||||||
|
result |= CPU_SHA;
|
||||||
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
|
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
|
||||||
result |= CPU_LZCNT;
|
result |= CPU_LZCNT;
|
||||||
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
|
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
|
||||||
@ -721,6 +725,7 @@ public:
|
|||||||
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
|
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
|
||||||
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
|
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
|
||||||
static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
|
static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
|
||||||
|
static bool supports_sha() { return (_features & CPU_SHA) != 0; }
|
||||||
// Intel features
|
// Intel features
|
||||||
static bool is_intel_family_core() { return is_intel() &&
|
static bool is_intel_family_core() { return is_intel() &&
|
||||||
extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
|
extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
|
||||||
|
@ -3179,13 +3179,13 @@ instruct Repl32B(vecY dst, rRegI src) %{
|
|||||||
"punpcklbw $dst,$dst\n\t"
|
"punpcklbw $dst,$dst\n\t"
|
||||||
"pshuflw $dst,$dst,0x00\n\t"
|
"pshuflw $dst,$dst,0x00\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate32B" %}
|
"vinserti128_high $dst,$dst\t! replicate32B" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||||
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3196,12 +3196,12 @@ instruct Repl32B_mem(vecY dst, memory mem) %{
|
|||||||
format %{ "punpcklbw $dst,$mem\n\t"
|
format %{ "punpcklbw $dst,$mem\n\t"
|
||||||
"pshuflw $dst,$dst,0x00\n\t"
|
"pshuflw $dst,$dst,0x00\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate32B" %}
|
"vinserti128_high $dst,$dst\t! replicate32B" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ punpcklbw($dst$$XMMRegister, $mem$$Address);
|
__ punpcklbw($dst$$XMMRegister, $mem$$Address);
|
||||||
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3223,11 +3223,11 @@ instruct Repl32B_imm(vecY dst, immI con) %{
|
|||||||
match(Set dst (ReplicateB con));
|
match(Set dst (ReplicateB con));
|
||||||
format %{ "movq $dst,[$constantaddress]\n\t"
|
format %{ "movq $dst,[$constantaddress]\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
|
"vinserti128_high $dst,$dst\t! lreplicate32B($con)" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
|
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3298,12 +3298,12 @@ instruct Repl16S(vecY dst, rRegI src) %{
|
|||||||
format %{ "movd $dst,$src\n\t"
|
format %{ "movd $dst,$src\n\t"
|
||||||
"pshuflw $dst,$dst,0x00\n\t"
|
"pshuflw $dst,$dst,0x00\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate16S" %}
|
"vinserti128_high $dst,$dst\t! replicate16S" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||||
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3313,11 +3313,11 @@ instruct Repl16S_mem(vecY dst, memory mem) %{
|
|||||||
match(Set dst (ReplicateS (LoadS mem)));
|
match(Set dst (ReplicateS (LoadS mem)));
|
||||||
format %{ "pshuflw $dst,$mem,0x00\n\t"
|
format %{ "pshuflw $dst,$mem,0x00\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate16S" %}
|
"vinserti128_high $dst,$dst\t! replicate16S" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
|
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3327,11 +3327,11 @@ instruct Repl16S_imm(vecY dst, immI con) %{
|
|||||||
match(Set dst (ReplicateS con));
|
match(Set dst (ReplicateS con));
|
||||||
format %{ "movq $dst,[$constantaddress]\n\t"
|
format %{ "movq $dst,[$constantaddress]\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
|
"vinserti128_high $dst,$dst\t! replicate16S($con)" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
|
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3363,11 +3363,11 @@ instruct Repl8I(vecY dst, rRegI src) %{
|
|||||||
match(Set dst (ReplicateI src));
|
match(Set dst (ReplicateI src));
|
||||||
format %{ "movd $dst,$src\n\t"
|
format %{ "movd $dst,$src\n\t"
|
||||||
"pshufd $dst,$dst,0x00\n\t"
|
"pshufd $dst,$dst,0x00\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate8I" %}
|
"vinserti128_high $dst,$dst\t! replicate8I" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||||
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3376,10 +3376,10 @@ instruct Repl8I_mem(vecY dst, memory mem) %{
|
|||||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||||
match(Set dst (ReplicateI (LoadI mem)));
|
match(Set dst (ReplicateI (LoadI mem)));
|
||||||
format %{ "pshufd $dst,$mem,0x00\n\t"
|
format %{ "pshufd $dst,$mem,0x00\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate8I" %}
|
"vinserti128_high $dst,$dst\t! replicate8I" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3401,11 +3401,11 @@ instruct Repl8I_imm(vecY dst, immI con) %{
|
|||||||
match(Set dst (ReplicateI con));
|
match(Set dst (ReplicateI con));
|
||||||
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
|
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst" %}
|
"vinserti128_high $dst,$dst" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
|
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3430,11 +3430,11 @@ instruct Repl4L(vecY dst, rRegL src) %{
|
|||||||
match(Set dst (ReplicateL src));
|
match(Set dst (ReplicateL src));
|
||||||
format %{ "movdq $dst,$src\n\t"
|
format %{ "movdq $dst,$src\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate4L" %}
|
"vinserti128_high $dst,$dst\t! replicate4L" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movdq($dst$$XMMRegister, $src$$Register);
|
__ movdq($dst$$XMMRegister, $src$$Register);
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3447,13 +3447,13 @@ instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
|
|||||||
"movdl $tmp,$src.hi\n\t"
|
"movdl $tmp,$src.hi\n\t"
|
||||||
"punpckldq $dst,$tmp\n\t"
|
"punpckldq $dst,$tmp\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate4L" %}
|
"vinserti128_high $dst,$dst\t! replicate4L" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||||
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
|
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
|
||||||
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
|
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3464,11 +3464,11 @@ instruct Repl4L_imm(vecY dst, immL con) %{
|
|||||||
match(Set dst (ReplicateL con));
|
match(Set dst (ReplicateL con));
|
||||||
format %{ "movq $dst,[$constantaddress]\n\t"
|
format %{ "movq $dst,[$constantaddress]\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
|
"vinserti128_high $dst,$dst\t! replicate4L($con)" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movq($dst$$XMMRegister, $constantaddress($con));
|
__ movq($dst$$XMMRegister, $constantaddress($con));
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3478,11 +3478,11 @@ instruct Repl4L_mem(vecY dst, memory mem) %{
|
|||||||
match(Set dst (ReplicateL (LoadL mem)));
|
match(Set dst (ReplicateL (LoadL mem)));
|
||||||
format %{ "movq $dst,$mem\n\t"
|
format %{ "movq $dst,$mem\n\t"
|
||||||
"punpcklqdq $dst,$dst\n\t"
|
"punpcklqdq $dst,$dst\n\t"
|
||||||
"vinserti128h $dst,$dst,$dst\t! replicate4L" %}
|
"vinserti128_high $dst,$dst\t! replicate4L" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ movq($dst$$XMMRegister, $mem$$Address);
|
__ movq($dst$$XMMRegister, $mem$$Address);
|
||||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3511,10 +3511,10 @@ instruct Repl8F(vecY dst, regF src) %{
|
|||||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||||
match(Set dst (ReplicateF src));
|
match(Set dst (ReplicateF src));
|
||||||
format %{ "pshufd $dst,$src,0x00\n\t"
|
format %{ "pshufd $dst,$src,0x00\n\t"
|
||||||
"vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
|
"vinsertf128_high $dst,$dst\t! replicate8F" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
|
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
|
||||||
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3523,10 +3523,10 @@ instruct Repl8F_mem(vecY dst, memory mem) %{
|
|||||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||||
match(Set dst (ReplicateF (LoadF mem)));
|
match(Set dst (ReplicateF (LoadF mem)));
|
||||||
format %{ "pshufd $dst,$mem,0x00\n\t"
|
format %{ "pshufd $dst,$mem,0x00\n\t"
|
||||||
"vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
|
"vinsertf128_high $dst,$dst\t! replicate8F" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||||
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3576,10 +3576,10 @@ instruct Repl4D(vecY dst, regD src) %{
|
|||||||
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
|
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
|
||||||
match(Set dst (ReplicateD src));
|
match(Set dst (ReplicateD src));
|
||||||
format %{ "pshufd $dst,$src,0x44\n\t"
|
format %{ "pshufd $dst,$src,0x44\n\t"
|
||||||
"vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
|
"vinsertf128_high $dst,$dst\t! replicate4D" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
|
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
|
||||||
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -3588,10 +3588,10 @@ instruct Repl4D_mem(vecY dst, memory mem) %{
|
|||||||
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
|
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
|
||||||
match(Set dst (ReplicateD (LoadD mem)));
|
match(Set dst (ReplicateD (LoadD mem)));
|
||||||
format %{ "pshufd $dst,$mem,0x44\n\t"
|
format %{ "pshufd $dst,$mem,0x44\n\t"
|
||||||
"vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
|
"vinsertf128_high $dst,$dst\t! replicate4D" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
|
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
|
||||||
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -4791,7 +4791,7 @@ instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
|
|||||||
effect(TEMP tmp, TEMP tmp2);
|
effect(TEMP tmp, TEMP tmp2);
|
||||||
format %{ "vphaddd $tmp,$src2,$src2\n\t"
|
format %{ "vphaddd $tmp,$src2,$src2\n\t"
|
||||||
"vphaddd $tmp,$tmp,$tmp2\n\t"
|
"vphaddd $tmp,$tmp,$tmp2\n\t"
|
||||||
"vextracti128 $tmp2,$tmp\n\t"
|
"vextracti128_high $tmp2,$tmp\n\t"
|
||||||
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
||||||
"movd $tmp2,$src1\n\t"
|
"movd $tmp2,$src1\n\t"
|
||||||
"vpaddd $tmp2,$tmp2,$tmp\n\t"
|
"vpaddd $tmp2,$tmp2,$tmp\n\t"
|
||||||
@ -4800,7 +4800,7 @@ instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
|
|||||||
int vector_len = 1;
|
int vector_len = 1;
|
||||||
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
|
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
|
||||||
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
||||||
__ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
|
__ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
||||||
__ movdl($tmp2$$XMMRegister, $src1$$Register);
|
__ movdl($tmp2$$XMMRegister, $src1$$Register);
|
||||||
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||||
@ -4813,7 +4813,7 @@ instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp,
|
|||||||
predicate(UseAVX > 2);
|
predicate(UseAVX > 2);
|
||||||
match(Set dst (AddReductionVI src1 src2));
|
match(Set dst (AddReductionVI src1 src2));
|
||||||
effect(TEMP tmp, TEMP tmp2);
|
effect(TEMP tmp, TEMP tmp2);
|
||||||
format %{ "vextracti128 $tmp,$src2\n\t"
|
format %{ "vextracti128_high $tmp,$src2\n\t"
|
||||||
"vpaddd $tmp,$tmp,$src2\n\t"
|
"vpaddd $tmp,$tmp,$src2\n\t"
|
||||||
"pshufd $tmp2,$tmp,0xE\n\t"
|
"pshufd $tmp2,$tmp,0xE\n\t"
|
||||||
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
||||||
@ -4824,7 +4824,7 @@ instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp,
|
|||||||
"movd $dst,$tmp2\t! add reduction8I" %}
|
"movd $dst,$tmp2\t! add reduction8I" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vector_len = 0;
|
int vector_len = 0;
|
||||||
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
|
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
|
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
|
||||||
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
||||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
||||||
@ -4841,9 +4841,9 @@ instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp,
|
|||||||
predicate(UseAVX > 2);
|
predicate(UseAVX > 2);
|
||||||
match(Set dst (AddReductionVI src1 src2));
|
match(Set dst (AddReductionVI src1 src2));
|
||||||
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
|
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
|
||||||
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
|
format %{ "vextracti64x4_high $tmp3,$src2\n\t"
|
||||||
"vpaddd $tmp3,$tmp3,$src2\n\t"
|
"vpaddd $tmp3,$tmp3,$src2\n\t"
|
||||||
"vextracti128 $tmp,$tmp3\n\t"
|
"vextracti128_high $tmp,$tmp3\n\t"
|
||||||
"vpaddd $tmp,$tmp,$tmp3\n\t"
|
"vpaddd $tmp,$tmp,$tmp3\n\t"
|
||||||
"pshufd $tmp2,$tmp,0xE\n\t"
|
"pshufd $tmp2,$tmp,0xE\n\t"
|
||||||
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
||||||
@ -4853,9 +4853,9 @@ instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp,
|
|||||||
"vpaddd $tmp2,$tmp,$tmp2\n\t"
|
"vpaddd $tmp2,$tmp,$tmp2\n\t"
|
||||||
"movd $dst,$tmp2\t! mul reduction16I" %}
|
"movd $dst,$tmp2\t! mul reduction16I" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
__ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
||||||
__ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
|
__ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
|
||||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
|
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
|
||||||
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
||||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
||||||
@ -4892,7 +4892,7 @@ instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
|
|||||||
predicate(UseAVX > 2);
|
predicate(UseAVX > 2);
|
||||||
match(Set dst (AddReductionVL src1 src2));
|
match(Set dst (AddReductionVL src1 src2));
|
||||||
effect(TEMP tmp, TEMP tmp2);
|
effect(TEMP tmp, TEMP tmp2);
|
||||||
format %{ "vextracti128 $tmp,$src2\n\t"
|
format %{ "vextracti128_high $tmp,$src2\n\t"
|
||||||
"vpaddq $tmp2,$tmp,$src2\n\t"
|
"vpaddq $tmp2,$tmp,$src2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||||
@ -4900,7 +4900,7 @@ instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
|
|||||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||||
"movdq $dst,$tmp2\t! add reduction4L" %}
|
"movdq $dst,$tmp2\t! add reduction4L" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
|
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
|
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||||
@ -4915,9 +4915,9 @@ instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
|
|||||||
predicate(UseAVX > 2);
|
predicate(UseAVX > 2);
|
||||||
match(Set dst (AddReductionVL src1 src2));
|
match(Set dst (AddReductionVL src1 src2));
|
||||||
effect(TEMP tmp, TEMP tmp2);
|
effect(TEMP tmp, TEMP tmp2);
|
||||||
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
|
format %{ "vextracti64x4_high $tmp2,$src2\n\t"
|
||||||
"vpaddq $tmp2,$tmp2,$src2\n\t"
|
"vpaddq $tmp2,$tmp2,$src2\n\t"
|
||||||
"vextracti128 $tmp,$tmp2\n\t"
|
"vextracti128_high $tmp,$tmp2\n\t"
|
||||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||||
@ -4925,9 +4925,9 @@ instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
|
|||||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||||
"movdq $dst,$tmp2\t! add reduction8L" %}
|
"movdq $dst,$tmp2\t! add reduction8L" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
__ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
||||||
__ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
|
__ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||||
@ -5026,7 +5026,7 @@ instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
|
|||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
"pshufd $tmp,$src2,0x03\n\t"
|
"pshufd $tmp,$src2,0x03\n\t"
|
||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
"vextractf128 $tmp2,$src2\n\t"
|
"vextractf128_high $tmp2,$src2\n\t"
|
||||||
"vaddss $dst,$dst,$tmp2\n\t"
|
"vaddss $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
@ -5042,7 +5042,7 @@ instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
|
|||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
|
__ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5065,7 +5065,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
"pshufd $tmp,$src2,0x03\n\t"
|
"pshufd $tmp,$src2,0x03\n\t"
|
||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x1\n\t"
|
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||||
"vaddss $dst,$dst,$tmp2\n\t"
|
"vaddss $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
@ -5073,7 +5073,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x03\n\t"
|
"pshufd $tmp,$tmp2,0x03\n\t"
|
||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x2\n\t"
|
"vextractf32x4 $tmp2,$src2,0x2\n\t"
|
||||||
"vaddss $dst,$dst,$tmp2\n\t"
|
"vaddss $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
@ -5081,7 +5081,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x03\n\t"
|
"pshufd $tmp,$tmp2,0x03\n\t"
|
||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x3\n\t"
|
"vextractf32x4 $tmp2,$src2,0x3\n\t"
|
||||||
"vaddss $dst,$dst,$tmp2\n\t"
|
"vaddss $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||||
"vaddss $dst,$dst,$tmp\n\t"
|
"vaddss $dst,$dst,$tmp\n\t"
|
||||||
@ -5097,7 +5097,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5105,7 +5105,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5113,7 +5113,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5162,7 +5162,7 @@ instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
|
|||||||
format %{ "vaddsd $dst,$dst,$src2\n\t"
|
format %{ "vaddsd $dst,$dst,$src2\n\t"
|
||||||
"pshufd $tmp,$src2,0xE\n\t"
|
"pshufd $tmp,$src2,0xE\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp\n\t"
|
"vaddsd $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4h $tmp2,$src2, 0x1\n\t"
|
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp2\n\t"
|
"vaddsd $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
|
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
|
||||||
@ -5170,7 +5170,7 @@ instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
|
|||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5185,15 +5185,15 @@ instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
|
|||||||
format %{ "vaddsd $dst,$dst,$src2\n\t"
|
format %{ "vaddsd $dst,$dst,$src2\n\t"
|
||||||
"pshufd $tmp,$src2,0xE\n\t"
|
"pshufd $tmp,$src2,0xE\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp\n\t"
|
"vaddsd $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x1\n\t"
|
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp2\n\t"
|
"vaddsd $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp\n\t"
|
"vaddsd $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x2\n\t"
|
"vextractf32x4 $tmp2,$src2,0x2\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp2\n\t"
|
"vaddsd $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp\n\t"
|
"vaddsd $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x3\n\t"
|
"vextractf32x4 $tmp2,$src2,0x3\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp2\n\t"
|
"vaddsd $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
|
"vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
|
||||||
@ -5201,15 +5201,15 @@ instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
|
|||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5307,7 +5307,7 @@ instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
|
|||||||
predicate(UseAVX > 0);
|
predicate(UseAVX > 0);
|
||||||
match(Set dst (MulReductionVI src1 src2));
|
match(Set dst (MulReductionVI src1 src2));
|
||||||
effect(TEMP tmp, TEMP tmp2);
|
effect(TEMP tmp, TEMP tmp2);
|
||||||
format %{ "vextracti128 $tmp,$src2\n\t"
|
format %{ "vextracti128_high $tmp,$src2\n\t"
|
||||||
"vpmulld $tmp,$tmp,$src2\n\t"
|
"vpmulld $tmp,$tmp,$src2\n\t"
|
||||||
"pshufd $tmp2,$tmp,0xE\n\t"
|
"pshufd $tmp2,$tmp,0xE\n\t"
|
||||||
"vpmulld $tmp,$tmp,$tmp2\n\t"
|
"vpmulld $tmp,$tmp,$tmp2\n\t"
|
||||||
@ -5318,7 +5318,7 @@ instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
|
|||||||
"movd $dst,$tmp2\t! mul reduction8I" %}
|
"movd $dst,$tmp2\t! mul reduction8I" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vector_len = 0;
|
int vector_len = 0;
|
||||||
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
|
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
|
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
|
||||||
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
||||||
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
||||||
@ -5335,9 +5335,9 @@ instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF
|
|||||||
predicate(UseAVX > 2);
|
predicate(UseAVX > 2);
|
||||||
match(Set dst (MulReductionVI src1 src2));
|
match(Set dst (MulReductionVI src1 src2));
|
||||||
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
|
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
|
||||||
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
|
format %{ "vextracti64x4_high $tmp3,$src2\n\t"
|
||||||
"vpmulld $tmp3,$tmp3,$src2\n\t"
|
"vpmulld $tmp3,$tmp3,$src2\n\t"
|
||||||
"vextracti128 $tmp,$tmp3\n\t"
|
"vextracti128_high $tmp,$tmp3\n\t"
|
||||||
"vpmulld $tmp,$tmp,$src2\n\t"
|
"vpmulld $tmp,$tmp,$src2\n\t"
|
||||||
"pshufd $tmp2,$tmp,0xE\n\t"
|
"pshufd $tmp2,$tmp,0xE\n\t"
|
||||||
"vpmulld $tmp,$tmp,$tmp2\n\t"
|
"vpmulld $tmp,$tmp,$tmp2\n\t"
|
||||||
@ -5347,9 +5347,9 @@ instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF
|
|||||||
"vpmulld $tmp2,$tmp,$tmp2\n\t"
|
"vpmulld $tmp2,$tmp,$tmp2\n\t"
|
||||||
"movd $dst,$tmp2\t! mul reduction16I" %}
|
"movd $dst,$tmp2\t! mul reduction16I" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
__ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
||||||
__ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
|
__ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
|
||||||
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
|
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
|
||||||
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
||||||
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
||||||
@ -5386,7 +5386,7 @@ instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
|
|||||||
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
|
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
|
||||||
match(Set dst (MulReductionVL src1 src2));
|
match(Set dst (MulReductionVL src1 src2));
|
||||||
effect(TEMP tmp, TEMP tmp2);
|
effect(TEMP tmp, TEMP tmp2);
|
||||||
format %{ "vextracti128 $tmp,$src2\n\t"
|
format %{ "vextracti128_high $tmp,$src2\n\t"
|
||||||
"vpmullq $tmp2,$tmp,$src2\n\t"
|
"vpmullq $tmp2,$tmp,$src2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||||
@ -5394,7 +5394,7 @@ instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
|
|||||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||||
"movdq $dst,$tmp2\t! mul reduction4L" %}
|
"movdq $dst,$tmp2\t! mul reduction4L" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
|
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
|
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||||
@ -5409,9 +5409,9 @@ instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
|
|||||||
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
|
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
|
||||||
match(Set dst (MulReductionVL src1 src2));
|
match(Set dst (MulReductionVL src1 src2));
|
||||||
effect(TEMP tmp, TEMP tmp2);
|
effect(TEMP tmp, TEMP tmp2);
|
||||||
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
|
format %{ "vextracti64x4_high $tmp2,$src2\n\t"
|
||||||
"vpmullq $tmp2,$tmp2,$src2\n\t"
|
"vpmullq $tmp2,$tmp2,$src2\n\t"
|
||||||
"vextracti128 $tmp,$tmp2\n\t"
|
"vextracti128_high $tmp,$tmp2\n\t"
|
||||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||||
@ -5419,9 +5419,9 @@ instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
|
|||||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||||
"movdq $dst,$tmp2\t! mul reduction8L" %}
|
"movdq $dst,$tmp2\t! mul reduction8L" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
__ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
||||||
__ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
|
__ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||||
@ -5520,7 +5520,7 @@ instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
|
|||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
"pshufd $tmp,$src2,0x03\n\t"
|
"pshufd $tmp,$src2,0x03\n\t"
|
||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
"vextractf128 $tmp2,$src2\n\t"
|
"vextractf128_high $tmp2,$src2\n\t"
|
||||||
"vmulss $dst,$dst,$tmp2\n\t"
|
"vmulss $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
@ -5536,7 +5536,7 @@ instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
|
|||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
|
__ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5559,7 +5559,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
"pshufd $tmp,$src2,0x03\n\t"
|
"pshufd $tmp,$src2,0x03\n\t"
|
||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x1\n\t"
|
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||||
"vmulss $dst,$dst,$tmp2\n\t"
|
"vmulss $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
@ -5567,7 +5567,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x03\n\t"
|
"pshufd $tmp,$tmp2,0x03\n\t"
|
||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x2\n\t"
|
"vextractf32x4 $tmp2,$src2,0x2\n\t"
|
||||||
"vmulss $dst,$dst,$tmp2\n\t"
|
"vmulss $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
@ -5575,7 +5575,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x03\n\t"
|
"pshufd $tmp,$tmp2,0x03\n\t"
|
||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x3\n\t"
|
"vextractf32x4 $tmp2,$src2,0x3\n\t"
|
||||||
"vmulss $dst,$dst,$tmp2\n\t"
|
"vmulss $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||||
"vmulss $dst,$dst,$tmp\n\t"
|
"vmulss $dst,$dst,$tmp\n\t"
|
||||||
@ -5591,7 +5591,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5599,7 +5599,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5607,7 +5607,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
|||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5656,7 +5656,7 @@ instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
|
|||||||
format %{ "vmulsd $dst,$dst,$src2\n\t"
|
format %{ "vmulsd $dst,$dst,$src2\n\t"
|
||||||
"pshufd $tmp,$src2,0xE\n\t"
|
"pshufd $tmp,$src2,0xE\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp\n\t"
|
"vmulsd $dst,$dst,$tmp\n\t"
|
||||||
"vextractf128 $tmp2,$src2\n\t"
|
"vextractf128_high $tmp2,$src2\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp2\n\t"
|
"vmulsd $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
|
"vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
|
||||||
@ -5664,7 +5664,7 @@ instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
|
|||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
|
__ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
@ -5679,15 +5679,15 @@ instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
|
|||||||
format %{ "vmulsd $dst,$dst,$src2\n\t"
|
format %{ "vmulsd $dst,$dst,$src2\n\t"
|
||||||
"pshufd $tmp,$src2,0xE\n\t"
|
"pshufd $tmp,$src2,0xE\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp\n\t"
|
"vmulsd $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x1\n\t"
|
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp2\n\t"
|
"vmulsd $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$src2,0xE\n\t"
|
"pshufd $tmp,$src2,0xE\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp\n\t"
|
"vmulsd $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x2\n\t"
|
"vextractf32x4 $tmp2,$src2,0x2\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp2\n\t"
|
"vmulsd $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp\n\t"
|
"vmulsd $dst,$dst,$tmp\n\t"
|
||||||
"vextractf32x4 $tmp2,$src2, 0x3\n\t"
|
"vextractf32x4 $tmp2,$src2,0x3\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp2\n\t"
|
"vmulsd $dst,$dst,$tmp2\n\t"
|
||||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||||
"vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
|
"vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
|
||||||
@ -5695,15 +5695,15 @@ instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
|
|||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||||
|
@ -1420,9 +1420,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
|
|||||||
// The ecx parameter to rep stos for the ClearArray node is in dwords.
|
// The ecx parameter to rep stos for the ClearArray node is in dwords.
|
||||||
const bool Matcher::init_array_count_is_in_bytes = false;
|
const bool Matcher::init_array_count_is_in_bytes = false;
|
||||||
|
|
||||||
// Threshold size for cleararray.
|
|
||||||
const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
|
||||||
|
|
||||||
// Needs 2 CMOV's for longs.
|
// Needs 2 CMOV's for longs.
|
||||||
const int Matcher::long_cmove_cost() { return 1; }
|
const int Matcher::long_cmove_cost() { return 1; }
|
||||||
|
|
||||||
@ -11369,27 +11366,54 @@ instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
|
|||||||
// =======================================================================
|
// =======================================================================
|
||||||
// fast clearing of an array
|
// fast clearing of an array
|
||||||
instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||||
predicate(!UseFastStosb);
|
predicate(!((ClearArrayNode*)n)->is_large());
|
||||||
match(Set dummy (ClearArray cnt base));
|
match(Set dummy (ClearArray cnt base));
|
||||||
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
||||||
format %{ "XOR EAX,EAX\t# ClearArray:\n\t"
|
|
||||||
"SHL ECX,1\t# Convert doublewords to words\n\t"
|
format %{ $$template
|
||||||
"REP STOS\t# store EAX into [EDI++] while ECX--" %}
|
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
|
||||||
|
$$emit$$"CMP InitArrayShortSize,rcx\n\t"
|
||||||
|
$$emit$$"JG LARGE\n\t"
|
||||||
|
$$emit$$"SHL ECX, 1\n\t"
|
||||||
|
$$emit$$"DEC ECX\n\t"
|
||||||
|
$$emit$$"JS DONE\t# Zero length\n\t"
|
||||||
|
$$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
|
||||||
|
$$emit$$"DEC ECX\n\t"
|
||||||
|
$$emit$$"JGE LOOP\n\t"
|
||||||
|
$$emit$$"JMP DONE\n\t"
|
||||||
|
$$emit$$"# LARGE:\n\t"
|
||||||
|
if (UseFastStosb) {
|
||||||
|
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
|
||||||
|
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
|
||||||
|
} else {
|
||||||
|
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
|
||||||
|
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
|
||||||
|
}
|
||||||
|
$$emit$$"# DONE"
|
||||||
|
%}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
|
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||||
predicate(UseFastStosb);
|
predicate(((ClearArrayNode*)n)->is_large());
|
||||||
match(Set dummy (ClearArray cnt base));
|
match(Set dummy (ClearArray cnt base));
|
||||||
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
||||||
format %{ "XOR EAX,EAX\t# ClearArray:\n\t"
|
format %{ $$template
|
||||||
"SHL ECX,3\t# Convert doublewords to bytes\n\t"
|
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
|
||||||
"REP STOSB\t# store EAX into [EDI++] while ECX--" %}
|
if (UseFastStosb) {
|
||||||
|
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
|
||||||
|
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
|
||||||
|
} else {
|
||||||
|
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
|
||||||
|
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
|
||||||
|
}
|
||||||
|
$$emit$$"# DONE"
|
||||||
|
%}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
|
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
@ -1637,9 +1637,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
|
|||||||
// The ecx parameter to rep stosq for the ClearArray node is in words.
|
// The ecx parameter to rep stosq for the ClearArray node is in words.
|
||||||
const bool Matcher::init_array_count_is_in_bytes = false;
|
const bool Matcher::init_array_count_is_in_bytes = false;
|
||||||
|
|
||||||
// Threshold size for cleararray.
|
|
||||||
const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
|
||||||
|
|
||||||
// No additional cost for CMOVL.
|
// No additional cost for CMOVL.
|
||||||
const int Matcher::long_cmove_cost() { return 0; }
|
const int Matcher::long_cmove_cost() { return 0; }
|
||||||
|
|
||||||
@ -10460,31 +10457,55 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
|
|||||||
instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
|
instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
|
||||||
rFlagsReg cr)
|
rFlagsReg cr)
|
||||||
%{
|
%{
|
||||||
predicate(!UseFastStosb);
|
predicate(!((ClearArrayNode*)n)->is_large());
|
||||||
match(Set dummy (ClearArray cnt base));
|
match(Set dummy (ClearArray cnt base));
|
||||||
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
||||||
|
|
||||||
format %{ "xorq rax, rax\t# ClearArray:\n\t"
|
format %{ $$template
|
||||||
"rep stosq\t# Store rax to *rdi++ while rcx--" %}
|
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
|
||||||
|
$$emit$$"cmp InitArrayShortSize,rcx\n\t"
|
||||||
|
$$emit$$"jg LARGE\n\t"
|
||||||
|
$$emit$$"dec rcx\n\t"
|
||||||
|
$$emit$$"js DONE\t# Zero length\n\t"
|
||||||
|
$$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
|
||||||
|
$$emit$$"dec rcx\n\t"
|
||||||
|
$$emit$$"jge LOOP\n\t"
|
||||||
|
$$emit$$"jmp DONE\n\t"
|
||||||
|
$$emit$$"# LARGE:\n\t"
|
||||||
|
if (UseFastStosb) {
|
||||||
|
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
|
||||||
|
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
|
||||||
|
} else {
|
||||||
|
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
|
||||||
|
}
|
||||||
|
$$emit$$"# DONE"
|
||||||
|
%}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
|
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
|
instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
|
||||||
rFlagsReg cr)
|
rFlagsReg cr)
|
||||||
%{
|
%{
|
||||||
predicate(UseFastStosb);
|
predicate(((ClearArrayNode*)n)->is_large());
|
||||||
match(Set dummy (ClearArray cnt base));
|
match(Set dummy (ClearArray cnt base));
|
||||||
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
||||||
format %{ "xorq rax, rax\t# ClearArray:\n\t"
|
|
||||||
"shlq rcx,3\t# Convert doublewords to bytes\n\t"
|
format %{ $$template
|
||||||
"rep stosb\t# Store rax to *rdi++ while rcx--" %}
|
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
|
||||||
ins_encode %{
|
if (UseFastStosb) {
|
||||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
|
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
|
||||||
|
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
|
||||||
|
} else {
|
||||||
|
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
|
||||||
|
}
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_encode %{
|
||||||
|
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||||
|
@ -203,7 +203,8 @@ public class AMD64 extends Architecture {
|
|||||||
AVX512ER,
|
AVX512ER,
|
||||||
AVX512CD,
|
AVX512CD,
|
||||||
AVX512BW,
|
AVX512BW,
|
||||||
AVX512VL
|
AVX512VL,
|
||||||
|
SHA
|
||||||
}
|
}
|
||||||
|
|
||||||
private final EnumSet<CPUFeature> features;
|
private final EnumSet<CPUFeature> features;
|
||||||
|
@ -122,6 +122,9 @@ public class AMD64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFacto
|
|||||||
if ((config.vmVersionFeatures & config.amd64AVX512VL) != 0) {
|
if ((config.vmVersionFeatures & config.amd64AVX512VL) != 0) {
|
||||||
features.add(AMD64.CPUFeature.AVX512VL);
|
features.add(AMD64.CPUFeature.AVX512VL);
|
||||||
}
|
}
|
||||||
|
if ((config.vmVersionFeatures & config.amd64SHA) != 0) {
|
||||||
|
features.add(AMD64.CPUFeature.SHA);
|
||||||
|
}
|
||||||
return features;
|
return features;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +41,6 @@ import jdk.vm.ci.meta.DeoptimizationAction;
|
|||||||
import jdk.vm.ci.meta.DeoptimizationReason;
|
import jdk.vm.ci.meta.DeoptimizationReason;
|
||||||
import jdk.vm.ci.meta.JavaConstant;
|
import jdk.vm.ci.meta.JavaConstant;
|
||||||
import jdk.vm.ci.meta.JavaKind;
|
import jdk.vm.ci.meta.JavaKind;
|
||||||
import jdk.vm.ci.meta.JavaType;
|
|
||||||
import jdk.vm.ci.meta.MetaAccessProvider;
|
import jdk.vm.ci.meta.MetaAccessProvider;
|
||||||
import jdk.vm.ci.meta.ResolvedJavaField;
|
import jdk.vm.ci.meta.ResolvedJavaField;
|
||||||
import jdk.vm.ci.meta.ResolvedJavaMethod;
|
import jdk.vm.ci.meta.ResolvedJavaMethod;
|
||||||
@ -111,23 +110,26 @@ public class HotSpotMetaAccessProvider implements MetaAccessProvider, HotSpotPro
|
|||||||
}
|
}
|
||||||
|
|
||||||
public ResolvedJavaField lookupJavaField(Field reflectionField) {
|
public ResolvedJavaField lookupJavaField(Field reflectionField) {
|
||||||
String name = reflectionField.getName();
|
|
||||||
Class<?> fieldHolder = reflectionField.getDeclaringClass();
|
Class<?> fieldHolder = reflectionField.getDeclaringClass();
|
||||||
Class<?> fieldType = reflectionField.getType();
|
|
||||||
// java.lang.reflect.Field's modifiers should be enough here since VM internal modifier bits
|
|
||||||
// are not used (yet).
|
|
||||||
final int modifiers = reflectionField.getModifiers();
|
|
||||||
final long offset = Modifier.isStatic(modifiers) ? UNSAFE.staticFieldOffset(reflectionField) : UNSAFE.objectFieldOffset(reflectionField);
|
|
||||||
|
|
||||||
HotSpotResolvedObjectType holder = fromObjectClass(fieldHolder);
|
HotSpotResolvedObjectType holder = fromObjectClass(fieldHolder);
|
||||||
JavaType type = runtime.fromClass(fieldType);
|
if (Modifier.isStatic(reflectionField.getModifiers())) {
|
||||||
|
final long offset = UNSAFE.staticFieldOffset(reflectionField);
|
||||||
if (offset != -1) {
|
for (ResolvedJavaField field : holder.getStaticFields()) {
|
||||||
HotSpotResolvedObjectType resolved = holder;
|
if (offset == ((HotSpotResolvedJavaField) field).offset()) {
|
||||||
return resolved.createField(name, type, offset, modifiers);
|
return field;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new JVMCIError("unresolved field %s", reflectionField);
|
final long offset = UNSAFE.objectFieldOffset(reflectionField);
|
||||||
|
for (ResolvedJavaField field : holder.getInstanceFields(false)) {
|
||||||
|
if (offset == ((HotSpotResolvedJavaField) field).offset()) {
|
||||||
|
return field;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
throw new JVMCIError("unresolved field %s", reflectionField);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int intMaskRight(int n) {
|
private static int intMaskRight(int n) {
|
||||||
|
@ -945,6 +945,7 @@ public class HotSpotVMConfig {
|
|||||||
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512CD", archs = {"amd64"}) @Stable public long amd64AVX512CD;
|
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512CD", archs = {"amd64"}) @Stable public long amd64AVX512CD;
|
||||||
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512BW", archs = {"amd64"}) @Stable public long amd64AVX512BW;
|
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512BW", archs = {"amd64"}) @Stable public long amd64AVX512BW;
|
||||||
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512VL", archs = {"amd64"}) @Stable public long amd64AVX512VL;
|
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512VL", archs = {"amd64"}) @Stable public long amd64AVX512VL;
|
||||||
|
@HotSpotVMConstant(name = "VM_Version::CPU_SHA", archs = {"amd64"}) @Stable public long amd64SHA;
|
||||||
|
|
||||||
// SPARC specific values
|
// SPARC specific values
|
||||||
@HotSpotVMConstant(name = "VM_Version::vis3_instructions_m", archs = {"sparc"}) @Stable public int sparcVis3Instructions;
|
@HotSpotVMConstant(name = "VM_Version::vis3_instructions_m", archs = {"sparc"}) @Stable public int sparcVis3Instructions;
|
||||||
|
@ -144,6 +144,7 @@ pthread_t os::Linux::_main_thread;
|
|||||||
int os::Linux::_page_size = -1;
|
int os::Linux::_page_size = -1;
|
||||||
const int os::Linux::_vm_default_page_size = (8 * K);
|
const int os::Linux::_vm_default_page_size = (8 * K);
|
||||||
bool os::Linux::_supports_fast_thread_cpu_time = false;
|
bool os::Linux::_supports_fast_thread_cpu_time = false;
|
||||||
|
uint32_t os::Linux::_os_version = 0;
|
||||||
const char * os::Linux::_glibc_version = NULL;
|
const char * os::Linux::_glibc_version = NULL;
|
||||||
const char * os::Linux::_libpthread_version = NULL;
|
const char * os::Linux::_libpthread_version = NULL;
|
||||||
pthread_condattr_t os::Linux::_condattr[1];
|
pthread_condattr_t os::Linux::_condattr[1];
|
||||||
@ -4356,6 +4357,48 @@ jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
|
|||||||
return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
|
return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void os::Linux::initialize_os_info() {
|
||||||
|
assert(_os_version == 0, "OS info already initialized");
|
||||||
|
|
||||||
|
struct utsname _uname;
|
||||||
|
|
||||||
|
uint32_t major;
|
||||||
|
uint32_t minor;
|
||||||
|
uint32_t fix;
|
||||||
|
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
// Kernel version is unknown if
|
||||||
|
// verification below fails.
|
||||||
|
_os_version = 0x01000000;
|
||||||
|
|
||||||
|
rc = uname(&_uname);
|
||||||
|
if (rc != -1) {
|
||||||
|
|
||||||
|
rc = sscanf(_uname.release,"%d.%d.%d", &major, &minor, &fix);
|
||||||
|
if (rc == 3) {
|
||||||
|
|
||||||
|
if (major < 256 && minor < 256 && fix < 256) {
|
||||||
|
// Kernel version format is as expected,
|
||||||
|
// set it overriding unknown state.
|
||||||
|
_os_version = (major << 16) |
|
||||||
|
(minor << 8 ) |
|
||||||
|
(fix << 0 ) ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t os::Linux::os_version() {
|
||||||
|
assert(_os_version != 0, "not initialized");
|
||||||
|
return _os_version & 0x00FFFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool os::Linux::os_version_is_known() {
|
||||||
|
assert(_os_version != 0, "not initialized");
|
||||||
|
return _os_version & 0x01000000 ? false : true;
|
||||||
|
}
|
||||||
|
|
||||||
/////
|
/////
|
||||||
// glibc on Linux platform uses non-documented flag
|
// glibc on Linux platform uses non-documented flag
|
||||||
// to indicate, that some special sort of signal
|
// to indicate, that some special sort of signal
|
||||||
@ -4578,6 +4621,8 @@ void os::init(void) {
|
|||||||
|
|
||||||
Linux::initialize_system_info();
|
Linux::initialize_system_info();
|
||||||
|
|
||||||
|
Linux::initialize_os_info();
|
||||||
|
|
||||||
// main_thread points to the aboriginal thread
|
// main_thread points to the aboriginal thread
|
||||||
Linux::_main_thread = pthread_self();
|
Linux::_main_thread = pthread_self();
|
||||||
|
|
||||||
|
@ -56,6 +56,15 @@ class Linux {
|
|||||||
|
|
||||||
static GrowableArray<int>* _cpu_to_node;
|
static GrowableArray<int>* _cpu_to_node;
|
||||||
|
|
||||||
|
// 0x00000000 = uninitialized,
|
||||||
|
// 0x01000000 = kernel version unknown,
|
||||||
|
// otherwise a 32-bit number:
|
||||||
|
// Ox00AABBCC
|
||||||
|
// AA, Major Version
|
||||||
|
// BB, Minor Version
|
||||||
|
// CC, Fix Version
|
||||||
|
static uint32_t _os_version;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
static julong _physical_memory;
|
static julong _physical_memory;
|
||||||
@ -198,6 +207,10 @@ class Linux {
|
|||||||
|
|
||||||
static jlong fast_thread_cpu_time(clockid_t clockid);
|
static jlong fast_thread_cpu_time(clockid_t clockid);
|
||||||
|
|
||||||
|
static void initialize_os_info();
|
||||||
|
static bool os_version_is_known();
|
||||||
|
static uint32_t os_version();
|
||||||
|
|
||||||
// pthread_cond clock suppport
|
// pthread_cond clock suppport
|
||||||
private:
|
private:
|
||||||
static pthread_condattr_t _condattr[1];
|
static pthread_condattr_t _condattr[1];
|
||||||
|
@ -471,7 +471,7 @@ void Canonicalizer::do_Intrinsic (Intrinsic* x) {
|
|||||||
InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant();
|
InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant();
|
||||||
if (c != NULL && !c->value()->is_null_object()) {
|
if (c != NULL && !c->value()->is_null_object()) {
|
||||||
// ciInstance::java_mirror_type() returns non-NULL only for Java mirrors
|
// ciInstance::java_mirror_type() returns non-NULL only for Java mirrors
|
||||||
ciType* t = c->value()->as_instance()->java_mirror_type();
|
ciType* t = c->value()->java_mirror_type();
|
||||||
if (t->is_klass()) {
|
if (t->is_klass()) {
|
||||||
// substitute cls.isInstance(obj) of a constant Class into
|
// substitute cls.isInstance(obj) of a constant Class into
|
||||||
// an InstantOf instruction
|
// an InstantOf instruction
|
||||||
@ -487,6 +487,17 @@ void Canonicalizer::do_Intrinsic (Intrinsic* x) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case vmIntrinsics::_isPrimitive : {
|
||||||
|
assert(x->number_of_arguments() == 1, "wrong type");
|
||||||
|
|
||||||
|
// Class.isPrimitive is known on constant classes:
|
||||||
|
InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant();
|
||||||
|
if (c != NULL && !c->value()->is_null_object()) {
|
||||||
|
ciType* t = c->value()->java_mirror_type();
|
||||||
|
set_constant(t->is_primitive_type());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,6 +148,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
|
|||||||
case vmIntrinsics::_longBitsToDouble:
|
case vmIntrinsics::_longBitsToDouble:
|
||||||
case vmIntrinsics::_getClass:
|
case vmIntrinsics::_getClass:
|
||||||
case vmIntrinsics::_isInstance:
|
case vmIntrinsics::_isInstance:
|
||||||
|
case vmIntrinsics::_isPrimitive:
|
||||||
case vmIntrinsics::_currentThread:
|
case vmIntrinsics::_currentThread:
|
||||||
case vmIntrinsics::_dabs:
|
case vmIntrinsics::_dabs:
|
||||||
case vmIntrinsics::_dsqrt:
|
case vmIntrinsics::_dsqrt:
|
||||||
|
@ -1296,6 +1296,25 @@ void LIRGenerator::do_getClass(Intrinsic* x) {
|
|||||||
__ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
|
__ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// java.lang.Class::isPrimitive()
|
||||||
|
void LIRGenerator::do_isPrimitive(Intrinsic* x) {
|
||||||
|
assert(x->number_of_arguments() == 1, "wrong type");
|
||||||
|
|
||||||
|
LIRItem rcvr(x->argument_at(0), this);
|
||||||
|
rcvr.load_item();
|
||||||
|
LIR_Opr temp = new_register(T_METADATA);
|
||||||
|
LIR_Opr result = rlock_result(x);
|
||||||
|
|
||||||
|
CodeEmitInfo* info = NULL;
|
||||||
|
if (x->needs_null_check()) {
|
||||||
|
info = state_for(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info);
|
||||||
|
__ cmp(lir_cond_notEqual, temp, LIR_OprFact::intConst(0));
|
||||||
|
__ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Example: Thread.currentThread()
|
// Example: Thread.currentThread()
|
||||||
void LIRGenerator::do_currentThread(Intrinsic* x) {
|
void LIRGenerator::do_currentThread(Intrinsic* x) {
|
||||||
@ -3098,6 +3117,7 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
|
|||||||
|
|
||||||
case vmIntrinsics::_Object_init: do_RegisterFinalizer(x); break;
|
case vmIntrinsics::_Object_init: do_RegisterFinalizer(x); break;
|
||||||
case vmIntrinsics::_isInstance: do_isInstance(x); break;
|
case vmIntrinsics::_isInstance: do_isInstance(x); break;
|
||||||
|
case vmIntrinsics::_isPrimitive: do_isPrimitive(x); break;
|
||||||
case vmIntrinsics::_getClass: do_getClass(x); break;
|
case vmIntrinsics::_getClass: do_getClass(x); break;
|
||||||
case vmIntrinsics::_currentThread: do_currentThread(x); break;
|
case vmIntrinsics::_currentThread: do_currentThread(x); break;
|
||||||
|
|
||||||
|
@ -246,6 +246,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
|
|||||||
|
|
||||||
void do_RegisterFinalizer(Intrinsic* x);
|
void do_RegisterFinalizer(Intrinsic* x);
|
||||||
void do_isInstance(Intrinsic* x);
|
void do_isInstance(Intrinsic* x);
|
||||||
|
void do_isPrimitive(Intrinsic* x);
|
||||||
void do_getClass(Intrinsic* x);
|
void do_getClass(Intrinsic* x);
|
||||||
void do_currentThread(Intrinsic* x);
|
void do_currentThread(Intrinsic* x);
|
||||||
void do_MathIntrinsic(Intrinsic* x);
|
void do_MathIntrinsic(Intrinsic* x);
|
||||||
|
@ -1035,14 +1035,15 @@
|
|||||||
do_name( updateByteBuffer_A_name, "updateByteBuffer") \
|
do_name( updateByteBuffer_A_name, "updateByteBuffer") \
|
||||||
\
|
\
|
||||||
/* support for Unsafe */ \
|
/* support for Unsafe */ \
|
||||||
do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \
|
|
||||||
do_class(jdk_internal_misc_Unsafe, "jdk/internal/misc/Unsafe") \
|
do_class(jdk_internal_misc_Unsafe, "jdk/internal/misc/Unsafe") \
|
||||||
\
|
\
|
||||||
do_intrinsic(_allocateInstance, jdk_internal_misc_Unsafe, allocateInstance_name, allocateInstance_signature, F_RN) \
|
do_intrinsic(_allocateInstance, jdk_internal_misc_Unsafe, allocateInstance_name, allocateInstance_signature, F_RN) \
|
||||||
do_name( allocateInstance_name, "allocateInstance") \
|
do_name( allocateInstance_name, "allocateInstance") \
|
||||||
do_signature(allocateInstance_signature, "(Ljava/lang/Class;)Ljava/lang/Object;") \
|
do_signature(allocateInstance_signature, "(Ljava/lang/Class;)Ljava/lang/Object;") \
|
||||||
|
do_intrinsic(_allocateUninitializedArray, jdk_internal_misc_Unsafe, allocateUninitializedArray_name, newArray_signature, F_R) \
|
||||||
|
do_name( allocateUninitializedArray_name, "allocateUninitializedArray0") \
|
||||||
do_intrinsic(_copyMemory, jdk_internal_misc_Unsafe, copyMemory_name, copyMemory_signature, F_RN) \
|
do_intrinsic(_copyMemory, jdk_internal_misc_Unsafe, copyMemory_name, copyMemory_signature, F_RN) \
|
||||||
do_name( copyMemory_name, "copyMemory") \
|
do_name( copyMemory_name, "copyMemory0") \
|
||||||
do_signature(copyMemory_signature, "(Ljava/lang/Object;JLjava/lang/Object;JJ)V") \
|
do_signature(copyMemory_signature, "(Ljava/lang/Object;JLjava/lang/Object;JJ)V") \
|
||||||
do_intrinsic(_loadFence, jdk_internal_misc_Unsafe, loadFence_name, loadFence_signature, F_RN) \
|
do_intrinsic(_loadFence, jdk_internal_misc_Unsafe, loadFence_name, loadFence_signature, F_RN) \
|
||||||
do_name( loadFence_name, "loadFence") \
|
do_name( loadFence_name, "loadFence") \
|
||||||
|
@ -639,11 +639,12 @@
|
|||||||
declare_constant(VM_Version::CPU_AVX512DQ) \
|
declare_constant(VM_Version::CPU_AVX512DQ) \
|
||||||
declare_constant(VM_Version::CPU_AVX512PF) \
|
declare_constant(VM_Version::CPU_AVX512PF) \
|
||||||
declare_constant(VM_Version::CPU_AVX512ER) \
|
declare_constant(VM_Version::CPU_AVX512ER) \
|
||||||
declare_constant(VM_Version::CPU_AVX512CD) \
|
declare_constant(VM_Version::CPU_AVX512CD)
|
||||||
declare_constant(VM_Version::CPU_AVX512BW)
|
|
||||||
|
|
||||||
#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
|
#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
|
||||||
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL)
|
declare_preprocessor_constant("VM_Version::CPU_AVX512BW", CPU_AVX512BW) \
|
||||||
|
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \
|
||||||
|
declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA)
|
||||||
|
|
||||||
#endif // TARGET_ARCH_x86
|
#endif // TARGET_ARCH_x86
|
||||||
|
|
||||||
|
@ -1338,73 +1338,6 @@ vmSymbols::SID Method::klass_id_for_intrinsics(const Klass* holder) {
|
|||||||
return vmSymbols::find_sid(klass_name);
|
return vmSymbols::find_sid(klass_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_unsafe_alias(vmSymbols::SID name_id) {
|
|
||||||
// All 70 intrinsic candidate methods from sun.misc.Unsafe in 1.8.
|
|
||||||
// Some have the same method name but different signature, e.g.
|
|
||||||
// getByte(long), getByte(Object,long)
|
|
||||||
switch (name_id) {
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(allocateInstance_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(copyMemory_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(loadFence_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(storeFence_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(fullFence_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getObject_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getBoolean_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getByte_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getShort_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getChar_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getInt_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getLong_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getFloat_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getDouble_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putObject_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putBoolean_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putByte_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putShort_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putChar_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putInt_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putLong_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putFloat_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putDouble_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getObjectVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getBooleanVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getByteVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getShortVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getCharVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getIntVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getLongVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getFloatVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getDoubleVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putObjectVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putBooleanVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putByteVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putShortVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putCharVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putIntVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putLongVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putFloatVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putDoubleVolatile_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAddress_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putAddress_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapObject_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapLong_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapInt_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedObject_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedLong_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedInt_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndAddInt_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndAddLong_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetInt_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetLong_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetObject_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(park_name):
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(unpark_name):
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Method::init_intrinsic_id() {
|
void Method::init_intrinsic_id() {
|
||||||
assert(_intrinsic_id == vmIntrinsics::_none, "do this just once");
|
assert(_intrinsic_id == vmIntrinsics::_none, "do this just once");
|
||||||
const uintptr_t max_id_uint = right_n_bits((int)(sizeof(_intrinsic_id) * BitsPerByte));
|
const uintptr_t max_id_uint = right_n_bits((int)(sizeof(_intrinsic_id) * BitsPerByte));
|
||||||
@ -1457,14 +1390,6 @@ void Method::init_intrinsic_id() {
|
|||||||
if (is_static() != MethodHandles::is_signature_polymorphic_static(id))
|
if (is_static() != MethodHandles::is_signature_polymorphic_static(id))
|
||||||
id = vmIntrinsics::_none;
|
id = vmIntrinsics::_none;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case vmSymbols::VM_SYMBOL_ENUM_NAME(sun_misc_Unsafe):
|
|
||||||
// Map sun.misc.Unsafe to jdk.internal.misc.Unsafe
|
|
||||||
if (!is_unsafe_alias(name_id)) break;
|
|
||||||
// pretend it is the corresponding method in the internal Unsafe class:
|
|
||||||
klass_id = vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_misc_Unsafe);
|
|
||||||
id = vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (id != vmIntrinsics::_none) {
|
if (id != vmIntrinsics::_none) {
|
||||||
|
@ -498,6 +498,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
|||||||
case vmIntrinsics::_currentTimeMillis:
|
case vmIntrinsics::_currentTimeMillis:
|
||||||
case vmIntrinsics::_nanoTime:
|
case vmIntrinsics::_nanoTime:
|
||||||
case vmIntrinsics::_allocateInstance:
|
case vmIntrinsics::_allocateInstance:
|
||||||
|
case vmIntrinsics::_allocateUninitializedArray:
|
||||||
case vmIntrinsics::_newArray:
|
case vmIntrinsics::_newArray:
|
||||||
case vmIntrinsics::_getLength:
|
case vmIntrinsics::_getLength:
|
||||||
case vmIntrinsics::_copyOf:
|
case vmIntrinsics::_copyOf:
|
||||||
|
@ -1118,7 +1118,11 @@ class Compile : public Phase {
|
|||||||
bool in_scratch_emit_size() const { return _in_scratch_emit_size; }
|
bool in_scratch_emit_size() const { return _in_scratch_emit_size; }
|
||||||
|
|
||||||
enum ScratchBufferBlob {
|
enum ScratchBufferBlob {
|
||||||
|
#if defined(PPC64)
|
||||||
|
MAX_inst_size = 2048,
|
||||||
|
#else
|
||||||
MAX_inst_size = 1024,
|
MAX_inst_size = 1024,
|
||||||
|
#endif
|
||||||
MAX_locs_size = 128, // number of relocInfo elements
|
MAX_locs_size = 128, // number of relocInfo elements
|
||||||
MAX_const_size = 128,
|
MAX_const_size = 128,
|
||||||
MAX_stubs_size = 128
|
MAX_stubs_size = 128
|
||||||
|
@ -48,6 +48,7 @@
|
|||||||
#include "opto/runtime.hpp"
|
#include "opto/runtime.hpp"
|
||||||
#include "opto/subnode.hpp"
|
#include "opto/subnode.hpp"
|
||||||
#include "prims/nativeLookup.hpp"
|
#include "prims/nativeLookup.hpp"
|
||||||
|
#include "prims/unsafe.hpp"
|
||||||
#include "runtime/sharedRuntime.hpp"
|
#include "runtime/sharedRuntime.hpp"
|
||||||
#ifdef TRACE_HAVE_INTRINSICS
|
#ifdef TRACE_HAVE_INTRINSICS
|
||||||
#include "trace/traceMacros.hpp"
|
#include "trace/traceMacros.hpp"
|
||||||
@ -248,6 +249,7 @@ class LibraryCallKit : public GraphKit {
|
|||||||
bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
|
bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
|
||||||
static bool klass_needs_init_guard(Node* kls);
|
static bool klass_needs_init_guard(Node* kls);
|
||||||
bool inline_unsafe_allocate();
|
bool inline_unsafe_allocate();
|
||||||
|
bool inline_unsafe_newArray(bool uninitialized);
|
||||||
bool inline_unsafe_copyMemory();
|
bool inline_unsafe_copyMemory();
|
||||||
bool inline_native_currentThread();
|
bool inline_native_currentThread();
|
||||||
|
|
||||||
@ -255,8 +257,6 @@ class LibraryCallKit : public GraphKit {
|
|||||||
bool inline_native_isInterrupted();
|
bool inline_native_isInterrupted();
|
||||||
bool inline_native_Class_query(vmIntrinsics::ID id);
|
bool inline_native_Class_query(vmIntrinsics::ID id);
|
||||||
bool inline_native_subtype_check();
|
bool inline_native_subtype_check();
|
||||||
|
|
||||||
bool inline_native_newArray();
|
|
||||||
bool inline_native_getLength();
|
bool inline_native_getLength();
|
||||||
bool inline_array_copyOf(bool is_copyOfRange);
|
bool inline_array_copyOf(bool is_copyOfRange);
|
||||||
bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
|
bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
|
||||||
@ -711,7 +711,6 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|||||||
case vmIntrinsics::_nanoTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
|
case vmIntrinsics::_nanoTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
|
||||||
case vmIntrinsics::_allocateInstance: return inline_unsafe_allocate();
|
case vmIntrinsics::_allocateInstance: return inline_unsafe_allocate();
|
||||||
case vmIntrinsics::_copyMemory: return inline_unsafe_copyMemory();
|
case vmIntrinsics::_copyMemory: return inline_unsafe_copyMemory();
|
||||||
case vmIntrinsics::_newArray: return inline_native_newArray();
|
|
||||||
case vmIntrinsics::_getLength: return inline_native_getLength();
|
case vmIntrinsics::_getLength: return inline_native_getLength();
|
||||||
case vmIntrinsics::_copyOf: return inline_array_copyOf(false);
|
case vmIntrinsics::_copyOf: return inline_array_copyOf(false);
|
||||||
case vmIntrinsics::_copyOfRange: return inline_array_copyOf(true);
|
case vmIntrinsics::_copyOfRange: return inline_array_copyOf(true);
|
||||||
@ -720,6 +719,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|||||||
case vmIntrinsics::_Objects_checkIndex: return inline_objects_checkIndex();
|
case vmIntrinsics::_Objects_checkIndex: return inline_objects_checkIndex();
|
||||||
case vmIntrinsics::_clone: return inline_native_clone(intrinsic()->is_virtual());
|
case vmIntrinsics::_clone: return inline_native_clone(intrinsic()->is_virtual());
|
||||||
|
|
||||||
|
case vmIntrinsics::_allocateUninitializedArray: return inline_unsafe_newArray(true);
|
||||||
|
case vmIntrinsics::_newArray: return inline_unsafe_newArray(false);
|
||||||
|
|
||||||
case vmIntrinsics::_isAssignableFrom: return inline_native_subtype_check();
|
case vmIntrinsics::_isAssignableFrom: return inline_native_subtype_check();
|
||||||
|
|
||||||
case vmIntrinsics::_isInstance:
|
case vmIntrinsics::_isInstance:
|
||||||
@ -2303,9 +2305,6 @@ void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Interpret Unsafe.fieldOffset cookies correctly:
|
|
||||||
extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
|
|
||||||
|
|
||||||
const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) {
|
const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) {
|
||||||
// Attempt to infer a sharper value type from the offset and base type.
|
// Attempt to infer a sharper value type from the offset and base type.
|
||||||
ciKlass* sharpened_klass = NULL;
|
ciKlass* sharpened_klass = NULL;
|
||||||
@ -3782,9 +3781,17 @@ Node* LibraryCallKit::generate_array_guard_common(Node* kls, RegionNode* region,
|
|||||||
|
|
||||||
//-----------------------inline_native_newArray--------------------------
|
//-----------------------inline_native_newArray--------------------------
|
||||||
// private static native Object java.lang.reflect.newArray(Class<?> componentType, int length);
|
// private static native Object java.lang.reflect.newArray(Class<?> componentType, int length);
|
||||||
bool LibraryCallKit::inline_native_newArray() {
|
// private native Object Unsafe.allocateUninitializedArray0(Class<?> cls, int size);
|
||||||
Node* mirror = argument(0);
|
bool LibraryCallKit::inline_unsafe_newArray(bool uninitialized) {
|
||||||
Node* count_val = argument(1);
|
Node* mirror;
|
||||||
|
Node* count_val;
|
||||||
|
if (uninitialized) {
|
||||||
|
mirror = argument(1);
|
||||||
|
count_val = argument(2);
|
||||||
|
} else {
|
||||||
|
mirror = argument(0);
|
||||||
|
count_val = argument(1);
|
||||||
|
}
|
||||||
|
|
||||||
mirror = null_check(mirror);
|
mirror = null_check(mirror);
|
||||||
// If mirror or obj is dead, only null-path is taken.
|
// If mirror or obj is dead, only null-path is taken.
|
||||||
@ -3829,6 +3836,12 @@ bool LibraryCallKit::inline_native_newArray() {
|
|||||||
result_val->init_req(_normal_path, obj);
|
result_val->init_req(_normal_path, obj);
|
||||||
result_io ->init_req(_normal_path, i_o());
|
result_io ->init_req(_normal_path, i_o());
|
||||||
result_mem->init_req(_normal_path, reset_memory());
|
result_mem->init_req(_normal_path, reset_memory());
|
||||||
|
|
||||||
|
if (uninitialized) {
|
||||||
|
// Mark the allocation so that zeroing is skipped
|
||||||
|
AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(obj, &_gvn);
|
||||||
|
alloc->maybe_set_complete(&_gvn);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the combined state.
|
// Return the combined state.
|
||||||
@ -4417,7 +4430,7 @@ bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//----------------------inline_unsafe_copyMemory-------------------------
|
//----------------------inline_unsafe_copyMemory-------------------------
|
||||||
// public native void Unsafe.copyMemory(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes);
|
// public native void Unsafe.copyMemory0(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes);
|
||||||
bool LibraryCallKit::inline_unsafe_copyMemory() {
|
bool LibraryCallKit::inline_unsafe_copyMemory() {
|
||||||
if (callee()->is_static()) return false; // caller must have the capability!
|
if (callee()->is_static()) return false; // caller must have the capability!
|
||||||
null_check_receiver(); // null-check receiver
|
null_check_receiver(); // null-check receiver
|
||||||
|
@ -399,10 +399,6 @@ public:
|
|||||||
// Optional scaling for the parameter to the ClearArray/CopyArray node.
|
// Optional scaling for the parameter to the ClearArray/CopyArray node.
|
||||||
static const bool init_array_count_is_in_bytes;
|
static const bool init_array_count_is_in_bytes;
|
||||||
|
|
||||||
// Threshold small size (in bytes) for a ClearArray/CopyArray node.
|
|
||||||
// Anything this size or smaller may get converted to discrete scalar stores.
|
|
||||||
static const int init_array_short_size;
|
|
||||||
|
|
||||||
// Some hardware needs 2 CMOV's for longs.
|
// Some hardware needs 2 CMOV's for longs.
|
||||||
static const int long_cmove_cost();
|
static const int long_cmove_cost();
|
||||||
|
|
||||||
|
@ -2741,6 +2741,9 @@ Node* ClearArrayNode::Identity(PhaseGVN* phase) {
|
|||||||
//------------------------------Idealize---------------------------------------
|
//------------------------------Idealize---------------------------------------
|
||||||
// Clearing a short array is faster with stores
|
// Clearing a short array is faster with stores
|
||||||
Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
|
Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
|
||||||
|
// Already know this is a large node, do not try to ideal it
|
||||||
|
if (_is_large) return NULL;
|
||||||
|
|
||||||
const int unit = BytesPerLong;
|
const int unit = BytesPerLong;
|
||||||
const TypeX* t = phase->type(in(2))->isa_intptr_t();
|
const TypeX* t = phase->type(in(2))->isa_intptr_t();
|
||||||
if (!t) return NULL;
|
if (!t) return NULL;
|
||||||
@ -2753,8 +2756,11 @@ Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
|
|||||||
// (see jck test stmt114.stmt11402.val).
|
// (see jck test stmt114.stmt11402.val).
|
||||||
if (size <= 0 || size % unit != 0) return NULL;
|
if (size <= 0 || size % unit != 0) return NULL;
|
||||||
intptr_t count = size / unit;
|
intptr_t count = size / unit;
|
||||||
// Length too long; use fast hardware clear
|
// Length too long; communicate this to matchers and assemblers.
|
||||||
if (size > Matcher::init_array_short_size) return NULL;
|
// Assemblers are responsible to produce fast hardware clears for it.
|
||||||
|
if (size > InitArrayShortSize) {
|
||||||
|
return new ClearArrayNode(in(0), in(1), in(2), in(3), true);
|
||||||
|
}
|
||||||
Node *mem = in(1);
|
Node *mem = in(1);
|
||||||
if( phase->type(mem)==Type::TOP ) return NULL;
|
if( phase->type(mem)==Type::TOP ) return NULL;
|
||||||
Node *adr = in(3);
|
Node *adr = in(3);
|
||||||
@ -2852,7 +2858,7 @@ Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
|
|||||||
// Bulk clear double-words
|
// Bulk clear double-words
|
||||||
Node* zsize = phase->transform(new SubXNode(zend, zbase) );
|
Node* zsize = phase->transform(new SubXNode(zend, zbase) );
|
||||||
Node* adr = phase->transform(new AddPNode(dest, dest, start_offset) );
|
Node* adr = phase->transform(new AddPNode(dest, dest, start_offset) );
|
||||||
mem = new ClearArrayNode(ctl, mem, zsize, adr);
|
mem = new ClearArrayNode(ctl, mem, zsize, adr, false);
|
||||||
return phase->transform(mem);
|
return phase->transform(mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3901,7 +3907,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
|
|||||||
zeroes_done, zeroes_needed,
|
zeroes_done, zeroes_needed,
|
||||||
phase);
|
phase);
|
||||||
zeroes_done = zeroes_needed;
|
zeroes_done = zeroes_needed;
|
||||||
if (zsize > Matcher::init_array_short_size && ++big_init_gaps > 2)
|
if (zsize > InitArrayShortSize && ++big_init_gaps > 2)
|
||||||
do_zeroing = false; // leave the hole, next time
|
do_zeroing = false; // leave the hole, next time
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1013,9 +1013,11 @@ public:
|
|||||||
|
|
||||||
//------------------------------ClearArray-------------------------------------
|
//------------------------------ClearArray-------------------------------------
|
||||||
class ClearArrayNode: public Node {
|
class ClearArrayNode: public Node {
|
||||||
|
private:
|
||||||
|
bool _is_large;
|
||||||
public:
|
public:
|
||||||
ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base )
|
ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base, bool is_large)
|
||||||
: Node(ctrl,arymem,word_cnt,base) {
|
: Node(ctrl,arymem,word_cnt,base), _is_large(is_large) {
|
||||||
init_class_id(Class_ClearArray);
|
init_class_id(Class_ClearArray);
|
||||||
}
|
}
|
||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
@ -1026,6 +1028,7 @@ public:
|
|||||||
virtual Node* Identity(PhaseGVN* phase);
|
virtual Node* Identity(PhaseGVN* phase);
|
||||||
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
|
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
|
||||||
virtual uint match_edge(uint idx) const;
|
virtual uint match_edge(uint idx) const;
|
||||||
|
bool is_large() const { return _is_large; }
|
||||||
|
|
||||||
// Clear the given area of an object or array.
|
// Clear the given area of an object or array.
|
||||||
// The start offset must always be aligned mod BytesPerInt.
|
// The start offset must always be aligned mod BytesPerInt.
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
#include "oops/symbol.hpp"
|
#include "oops/symbol.hpp"
|
||||||
#include "prims/jvm_misc.hpp"
|
#include "prims/jvm_misc.hpp"
|
||||||
#include "prims/nativeLookup.hpp"
|
#include "prims/nativeLookup.hpp"
|
||||||
|
#include "prims/unsafe.hpp"
|
||||||
#include "runtime/arguments.hpp"
|
#include "runtime/arguments.hpp"
|
||||||
#include "runtime/handles.inline.hpp"
|
#include "runtime/handles.inline.hpp"
|
||||||
#include "runtime/javaCalls.hpp"
|
#include "runtime/javaCalls.hpp"
|
||||||
@ -107,8 +108,6 @@ char* NativeLookup::long_jni_name(const methodHandle& method) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void JNICALL JVM_RegisterJDKInternalMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
|
|
||||||
void JNICALL JVM_RegisterSunMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
|
|
||||||
void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls);
|
void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls);
|
||||||
void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass);
|
void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass);
|
||||||
void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass);
|
void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass);
|
||||||
@ -123,7 +122,6 @@ extern "C" {
|
|||||||
|
|
||||||
static JNINativeMethod lookup_special_native_methods[] = {
|
static JNINativeMethod lookup_special_native_methods[] = {
|
||||||
{ CC"Java_jdk_internal_misc_Unsafe_registerNatives", NULL, FN_PTR(JVM_RegisterJDKInternalMiscUnsafeMethods) },
|
{ CC"Java_jdk_internal_misc_Unsafe_registerNatives", NULL, FN_PTR(JVM_RegisterJDKInternalMiscUnsafeMethods) },
|
||||||
{ CC"Java_sun_misc_Unsafe_registerNatives", NULL, FN_PTR(JVM_RegisterSunMiscUnsafeMethods) },
|
|
||||||
{ CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) },
|
{ CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) },
|
||||||
{ CC"Java_jdk_internal_perf_Perf_registerNatives", NULL, FN_PTR(JVM_RegisterPerfMethods) },
|
{ CC"Java_jdk_internal_perf_Perf_registerNatives", NULL, FN_PTR(JVM_RegisterPerfMethods) },
|
||||||
{ CC"Java_sun_hotspot_WhiteBox_registerNatives", NULL, FN_PTR(JVM_RegisterWhiteBoxMethods) },
|
{ CC"Java_sun_hotspot_WhiteBox_registerNatives", NULL, FN_PTR(JVM_RegisterWhiteBoxMethods) },
|
||||||
|
File diff suppressed because it is too large
Load Diff
39
hotspot/src/share/vm/prims/unsafe.hpp
Normal file
39
hotspot/src/share/vm/prims/unsafe.hpp
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef SHARE_VM_PRIMS_UNSAFE_HPP
|
||||||
|
#define SHARE_VM_PRIMS_UNSAFE_HPP
|
||||||
|
|
||||||
|
#include "jni.h"
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
void JNICALL JVM_RegisterJDKInternalMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
|
||||||
|
|
||||||
|
jlong Unsafe_field_offset_from_byte_offset(jlong byte_offset);
|
||||||
|
|
||||||
|
#endif // SHARE_VM_PRIMS_UNSAFE_HPP
|
@ -42,17 +42,30 @@ void AdvancedThresholdPolicy::print_specific(EventType type, methodHandle mh, me
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AdvancedThresholdPolicy::initialize() {
|
void AdvancedThresholdPolicy::initialize() {
|
||||||
|
int count = CICompilerCount;
|
||||||
|
#ifdef _LP64
|
||||||
// Turn on ergonomic compiler count selection
|
// Turn on ergonomic compiler count selection
|
||||||
if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
|
if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
|
||||||
FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
|
FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
|
||||||
}
|
}
|
||||||
int count = CICompilerCount;
|
|
||||||
if (CICompilerCountPerCPU) {
|
if (CICompilerCountPerCPU) {
|
||||||
// Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
|
// Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
|
||||||
int log_cpu = log2_intptr(os::active_processor_count());
|
int log_cpu = log2_intptr(os::active_processor_count());
|
||||||
int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
|
int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
|
||||||
count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2;
|
count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
// On 32-bit systems, the number of compiler threads is limited to 3.
|
||||||
|
// On these systems, the virtual address space available to the JVM
|
||||||
|
// is usually limited to 2-4 GB (the exact value depends on the platform).
|
||||||
|
// As the compilers (especially C2) can consume a large amount of
|
||||||
|
// memory, scaling the number of compiler threads with the number of
|
||||||
|
// available cores can result in the exhaustion of the address space
|
||||||
|
/// available to the VM and thus cause the VM to crash.
|
||||||
|
if (FLAG_IS_DEFAULT(CICompilerCount)) {
|
||||||
|
count = 3;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
set_c1_count(MAX2(count / 3, 1));
|
set_c1_count(MAX2(count / 3, 1));
|
||||||
set_c2_count(MAX2(count - c1_count(), 1));
|
set_c2_count(MAX2(count - c1_count(), 1));
|
||||||
|
@ -2474,9 +2474,11 @@ bool Arguments::check_vm_args_consistency() {
|
|||||||
status = false;
|
status = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) {
|
if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) {
|
||||||
warning("The VM option CICompilerCountPerCPU overrides CICompilerCount.");
|
warning("The VM option CICompilerCountPerCPU overrides CICompilerCount.");
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef SUPPORT_RESERVED_STACK_AREA
|
#ifndef SUPPORT_RESERVED_STACK_AREA
|
||||||
if (StackReservedPages != 0) {
|
if (StackReservedPages != 0) {
|
||||||
|
@ -354,6 +354,14 @@ Flag::Error TypeProfileLevelConstraintFunc(uintx value, bool verbose) {
|
|||||||
return Flag::SUCCESS;
|
return Flag::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Flag::Error InitArrayShortSizeConstraintFunc(intx value, bool verbose) {
|
||||||
|
if (value % BytesPerLong != 0) {
|
||||||
|
return Flag::VIOLATES_CONSTRAINT;
|
||||||
|
} else {
|
||||||
|
return Flag::SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef COMPILER2
|
#ifdef COMPILER2
|
||||||
Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) {
|
Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) {
|
||||||
if (InteriorEntryAlignment > CodeEntryAlignment) {
|
if (InteriorEntryAlignment > CodeEntryAlignment) {
|
||||||
|
@ -62,6 +62,8 @@ Flag::Error ArraycopySrcPrefetchDistanceConstraintFunc(uintx value, bool verbose
|
|||||||
|
|
||||||
Flag::Error TypeProfileLevelConstraintFunc(uintx value, bool verbose);
|
Flag::Error TypeProfileLevelConstraintFunc(uintx value, bool verbose);
|
||||||
|
|
||||||
|
Flag::Error InitArrayShortSizeConstraintFunc(intx value, bool verbose);
|
||||||
|
|
||||||
#ifdef COMPILER2
|
#ifdef COMPILER2
|
||||||
Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose);
|
Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose);
|
||||||
|
|
||||||
|
@ -725,7 +725,7 @@ public:
|
|||||||
\
|
\
|
||||||
product(bool, UseSHA, false, \
|
product(bool, UseSHA, false, \
|
||||||
"Control whether SHA instructions can be used " \
|
"Control whether SHA instructions can be used " \
|
||||||
"on SPARC and on ARM") \
|
"on SPARC, on ARM and on x86") \
|
||||||
\
|
\
|
||||||
product(bool, UseGHASHIntrinsics, false, \
|
product(bool, UseGHASHIntrinsics, false, \
|
||||||
"Use intrinsics for GHASH versions of crypto") \
|
"Use intrinsics for GHASH versions of crypto") \
|
||||||
@ -3079,16 +3079,16 @@ public:
|
|||||||
develop(intx, MethodHistogramCutoff, 100, \
|
develop(intx, MethodHistogramCutoff, 100, \
|
||||||
"The cutoff value for method invocation histogram (+CountCalls)") \
|
"The cutoff value for method invocation histogram (+CountCalls)") \
|
||||||
\
|
\
|
||||||
develop(intx, ProfilerNumberOfInterpretedMethods, 25, \
|
diagnostic(intx, ProfilerNumberOfInterpretedMethods, 25, \
|
||||||
"Number of interpreted methods to show in profile") \
|
"Number of interpreted methods to show in profile") \
|
||||||
\
|
\
|
||||||
develop(intx, ProfilerNumberOfCompiledMethods, 25, \
|
diagnostic(intx, ProfilerNumberOfCompiledMethods, 25, \
|
||||||
"Number of compiled methods to show in profile") \
|
"Number of compiled methods to show in profile") \
|
||||||
\
|
\
|
||||||
develop(intx, ProfilerNumberOfStubMethods, 25, \
|
diagnostic(intx, ProfilerNumberOfStubMethods, 25, \
|
||||||
"Number of stub methods to show in profile") \
|
"Number of stub methods to show in profile") \
|
||||||
\
|
\
|
||||||
develop(intx, ProfilerNumberOfRuntimeStubNodes, 25, \
|
diagnostic(intx, ProfilerNumberOfRuntimeStubNodes, 25, \
|
||||||
"Number of runtime stub nodes to show in profile") \
|
"Number of runtime stub nodes to show in profile") \
|
||||||
\
|
\
|
||||||
product(intx, ProfileIntervalsTicks, 100, \
|
product(intx, ProfileIntervalsTicks, 100, \
|
||||||
@ -4149,6 +4149,13 @@ public:
|
|||||||
"in the loaded class C. " \
|
"in the loaded class C. " \
|
||||||
"Check (3) is available only in debug builds.") \
|
"Check (3) is available only in debug builds.") \
|
||||||
\
|
\
|
||||||
|
develop_pd(intx, InitArrayShortSize, \
|
||||||
|
"Threshold small size (in bytes) for clearing arrays. " \
|
||||||
|
"Anything this size or smaller may get converted to discrete " \
|
||||||
|
"scalar stores.") \
|
||||||
|
range(0, max_intx) \
|
||||||
|
constraint(InitArrayShortSizeConstraintFunc, AfterErgo) \
|
||||||
|
\
|
||||||
diagnostic(bool, CompilerDirectivesIgnoreCompileCommands, false, \
|
diagnostic(bool, CompilerDirectivesIgnoreCompileCommands, false, \
|
||||||
"Disable backwards compatibility for compile commands.") \
|
"Disable backwards compatibility for compile commands.") \
|
||||||
\
|
\
|
||||||
|
@ -521,9 +521,9 @@ class RuntimeHistogramElement : public HistogramElement {
|
|||||||
JNI_ENTRY_NO_PRESERVE(result_type, header) \
|
JNI_ENTRY_NO_PRESERVE(result_type, header) \
|
||||||
WeakPreserveExceptionMark __wem(thread);
|
WeakPreserveExceptionMark __wem(thread);
|
||||||
|
|
||||||
#define JNI_ENTRY_NO_PRESERVE(result_type, header) \
|
#define JNI_ENTRY_NO_PRESERVE(result_type, header) \
|
||||||
extern "C" { \
|
extern "C" { \
|
||||||
result_type JNICALL header { \
|
result_type JNICALL header { \
|
||||||
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
|
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
|
||||||
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
|
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
|
||||||
ThreadInVMfromNative __tiv(thread); \
|
ThreadInVMfromNative __tiv(thread); \
|
||||||
@ -535,7 +535,7 @@ extern "C" { \
|
|||||||
// a GC, is called outside the NoHandleMark (set via VM_QUICK_ENTRY_BASE).
|
// a GC, is called outside the NoHandleMark (set via VM_QUICK_ENTRY_BASE).
|
||||||
#define JNI_QUICK_ENTRY(result_type, header) \
|
#define JNI_QUICK_ENTRY(result_type, header) \
|
||||||
extern "C" { \
|
extern "C" { \
|
||||||
result_type JNICALL header { \
|
result_type JNICALL header { \
|
||||||
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
|
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
|
||||||
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
|
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
|
||||||
ThreadInVMfromNative __tiv(thread); \
|
ThreadInVMfromNative __tiv(thread); \
|
||||||
@ -545,7 +545,7 @@ extern "C" { \
|
|||||||
|
|
||||||
#define JNI_LEAF(result_type, header) \
|
#define JNI_LEAF(result_type, header) \
|
||||||
extern "C" { \
|
extern "C" { \
|
||||||
result_type JNICALL header { \
|
result_type JNICALL header { \
|
||||||
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
|
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
|
||||||
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
|
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
|
||||||
VM_LEAF_BASE(result_type, header)
|
VM_LEAF_BASE(result_type, header)
|
||||||
|
@ -138,9 +138,15 @@ void SimpleThresholdPolicy::initialize() {
|
|||||||
FLAG_SET_DEFAULT(CICompilerCount, 3);
|
FLAG_SET_DEFAULT(CICompilerCount, 3);
|
||||||
}
|
}
|
||||||
int count = CICompilerCount;
|
int count = CICompilerCount;
|
||||||
|
#ifdef _LP64
|
||||||
|
// On 64-bit systems, scale the number of compiler threads with
|
||||||
|
// the number of cores available on the system. Scaling is not
|
||||||
|
// performed on 32-bit systems because it can lead to exhaustion
|
||||||
|
// of the virtual memory address space available to the JVM.
|
||||||
if (CICompilerCountPerCPU) {
|
if (CICompilerCountPerCPU) {
|
||||||
count = MAX2(log2_intptr(os::active_processor_count()), 1) * 3 / 2;
|
count = MAX2(log2_intptr(os::active_processor_count()), 1) * 3 / 2;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
set_c1_count(MAX2(count / 3, 1));
|
set_c1_count(MAX2(count / 3, 1));
|
||||||
set_c2_count(MAX2(count - c1_count(), 1));
|
set_c2_count(MAX2(count - c1_count(), 1));
|
||||||
FLAG_SET_ERGO(intx, CICompilerCount, c1_count() + c2_count());
|
FLAG_SET_ERGO(intx, CICompilerCount, c1_count() + c2_count());
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include "gc/shared/cardTableModRefBS.hpp"
|
#include "gc/shared/cardTableModRefBS.hpp"
|
||||||
#include "memory/resourceArea.hpp"
|
#include "memory/resourceArea.hpp"
|
||||||
#include "oops/method.hpp"
|
#include "oops/method.hpp"
|
||||||
|
#include "prims/unsafe.hpp"
|
||||||
#include "runtime/os.hpp"
|
#include "runtime/os.hpp"
|
||||||
#include "runtime/synchronizer.hpp"
|
#include "runtime/synchronizer.hpp"
|
||||||
#include "runtime/thread.hpp"
|
#include "runtime/thread.hpp"
|
||||||
@ -326,7 +327,6 @@ Value* SharkBuilder::fabs() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Value* SharkBuilder::unsafe_field_offset_to_byte_offset() {
|
Value* SharkBuilder::unsafe_field_offset_to_byte_offset() {
|
||||||
extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
|
|
||||||
return make_function((address) Unsafe_field_offset_to_byte_offset, "l", "l");
|
return make_function((address) Unsafe_field_offset_to_byte_offset, "l", "l");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,74 +32,99 @@ import jdk.test.lib.TimeLimitedRunner;
|
|||||||
import jdk.test.lib.Utils;
|
import jdk.test.lib.Utils;
|
||||||
import pool.PoolHelper;
|
import pool.PoolHelper;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
public abstract class StressAddJcmdBase {
|
public abstract class StressAddJcmdBase {
|
||||||
private static final int DIRECTIVES_AMOUNT = Integer.getInteger(
|
private static final int DIRECTIVES_AMOUNT = Integer.getInteger(
|
||||||
"compiler.compilercontrol.jcmd.StressAddJcmdBase.directivesAmount",
|
"compiler.compilercontrol.jcmd.StressAddJcmdBase.directivesAmount",
|
||||||
1000);
|
200);
|
||||||
private static final int DIRECTIVE_FILES = Integer.getInteger(
|
private static final int TIMEOUT = Integer.getInteger(
|
||||||
"compiler.compilercontrol.jcmd.StressAddJcmdBase.directiveFiles",
|
"compiler.compilercontrol.jcmd.StressAddJcmdBase.timeout",
|
||||||
5);
|
30);
|
||||||
private static final List<MethodDescriptor> DESCRIPTORS = new PoolHelper()
|
private static final List<MethodDescriptor> DESCRIPTORS = new PoolHelper()
|
||||||
.getAllMethods().stream()
|
.getAllMethods().stream()
|
||||||
.map(pair -> AbstractTestBase
|
.map(pair -> AbstractTestBase
|
||||||
.getValidMethodDescriptor(pair.first))
|
.getValidMethodDescriptor(pair.first))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
private static final String DIRECTIVE_FILE = "directives.json";
|
||||||
|
private static final List<String> VM_OPTIONS = new ArrayList<>();
|
||||||
|
private static final Random RANDOM = Utils.getRandomInstance();
|
||||||
|
|
||||||
|
static {
|
||||||
|
VM_OPTIONS.add("-Xmixed");
|
||||||
|
VM_OPTIONS.add("-XX:+UnlockDiagnosticVMOptions");
|
||||||
|
VM_OPTIONS.add("-XX:+LogCompilation");
|
||||||
|
VM_OPTIONS.add("-XX:CompilerDirectivesLimit=1001");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs test
|
* Performs test
|
||||||
*/
|
*/
|
||||||
public void test() {
|
public void test() {
|
||||||
List<String> commands = prepareCommands();
|
HugeDirectiveUtil.createHugeFile(DESCRIPTORS, DIRECTIVE_FILE,
|
||||||
Executor executor = new TimeLimitedExecutor(commands);
|
DIRECTIVES_AMOUNT);
|
||||||
|
Executor executor = new TimeLimitedExecutor();
|
||||||
List<OutputAnalyzer> outputAnalyzers = executor.execute();
|
List<OutputAnalyzer> outputAnalyzers = executor.execute();
|
||||||
outputAnalyzers.get(0).shouldHaveExitValue(0);
|
outputAnalyzers.get(0).shouldHaveExitValue(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Makes connection to the test VM
|
* Makes connection to the test VM and performs a diagnostic command
|
||||||
*
|
*
|
||||||
* @param pid a pid of the VM under test
|
* @param pid a pid of the VM under test
|
||||||
* @param commands a list of jcmd commands to be executed
|
|
||||||
* @return true if the test should continue invocation of this method
|
* @return true if the test should continue invocation of this method
|
||||||
*/
|
*/
|
||||||
protected abstract boolean makeConnection(int pid, List<String> commands);
|
protected abstract boolean makeConnection(int pid);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finish test executions
|
* Finish test executions
|
||||||
*/
|
*/
|
||||||
protected void finish() { }
|
protected void finish() { }
|
||||||
|
|
||||||
private List<String> prepareCommands() {
|
protected String nextCommand() {
|
||||||
String[] files = new String[DIRECTIVE_FILES];
|
int i = RANDOM.nextInt(JcmdCommand.values().length);
|
||||||
for (int i = 0; i < DIRECTIVE_FILES; i++) {
|
JcmdCommand jcmdCommand = JcmdCommand.values()[i];
|
||||||
files[i] = "directives" + i + ".json";
|
switch (jcmdCommand) {
|
||||||
HugeDirectiveUtil.createHugeFile(DESCRIPTORS, files[i],
|
case ADD:
|
||||||
DIRECTIVES_AMOUNT);
|
return jcmdCommand.command + " " + DIRECTIVE_FILE;
|
||||||
|
case PRINT:
|
||||||
|
case CLEAR:
|
||||||
|
case REMOVE:
|
||||||
|
return jcmdCommand.command;
|
||||||
|
default:
|
||||||
|
throw new Error("TESTBUG: incorrect command: " + jcmdCommand);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum JcmdCommand {
|
||||||
|
ADD("Compiler.directives_add"),
|
||||||
|
PRINT("Compiler.directives_print"),
|
||||||
|
CLEAR("Compiler.directives_clear"),
|
||||||
|
REMOVE("Compiler.directives_remove");
|
||||||
|
|
||||||
|
public final String command;
|
||||||
|
|
||||||
|
JcmdCommand(String command) {
|
||||||
|
this.command = command;
|
||||||
}
|
}
|
||||||
return Stream.of(files)
|
|
||||||
.map(file -> "Compiler.directives_add " + file)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private class TimeLimitedExecutor extends Executor {
|
private class TimeLimitedExecutor extends Executor {
|
||||||
private final List<String> jcmdCommands;
|
public TimeLimitedExecutor() {
|
||||||
|
|
||||||
public TimeLimitedExecutor(List<String> jcmdCommands) {
|
|
||||||
/* There are no need to check the state */
|
/* There are no need to check the state */
|
||||||
super(true, null, null, jcmdCommands);
|
super(true, VM_OPTIONS, null, null);
|
||||||
this.jcmdCommands = jcmdCommands;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected OutputAnalyzer[] executeJCMD(int pid) {
|
protected OutputAnalyzer[] executeJCMD(int pid) {
|
||||||
TimeLimitedRunner runner = new TimeLimitedRunner(
|
TimeLimitedRunner runner = new TimeLimitedRunner(
|
||||||
Utils.DEFAULT_TEST_TIMEOUT,
|
TimeUnit.SECONDS.toMillis(TIMEOUT),
|
||||||
Utils.TIMEOUT_FACTOR,
|
Utils.TIMEOUT_FACTOR,
|
||||||
() -> makeConnection(pid, jcmdCommands));
|
() -> makeConnection(pid));
|
||||||
try {
|
try {
|
||||||
runner.call();
|
runner.call();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -27,21 +27,19 @@
|
|||||||
* @summary Tests jcmd to be able to add a lot of huge directive files with
|
* @summary Tests jcmd to be able to add a lot of huge directive files with
|
||||||
* parallel executed jcmds until timeout has reached
|
* parallel executed jcmds until timeout has reached
|
||||||
* @library /testlibrary /test/lib /compiler/testlibrary ../share /
|
* @library /testlibrary /test/lib /compiler/testlibrary ../share /
|
||||||
* @ignore 8148563
|
|
||||||
* @build compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
|
* @build compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
|
||||||
* pool.sub.* pool.subpack.* sun.hotspot.WhiteBox
|
* pool.sub.* pool.subpack.* sun.hotspot.WhiteBox
|
||||||
* compiler.testlibrary.CompilerUtils
|
* compiler.testlibrary.CompilerUtils
|
||||||
* compiler.compilercontrol.share.actions.*
|
* compiler.compilercontrol.share.actions.*
|
||||||
* @run main ClassFileInstaller sun.hotspot.WhiteBox
|
* @run main ClassFileInstaller sun.hotspot.WhiteBox
|
||||||
* sun.hotspot.WhiteBox$WhiteBoxPermission
|
* sun.hotspot.WhiteBox$WhiteBoxPermission
|
||||||
* @run main/othervm/timeout=360 compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
|
* @run driver compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package compiler.compilercontrol.jcmd;
|
package compiler.compilercontrol.jcmd;
|
||||||
|
|
||||||
import jdk.test.lib.dcmd.PidJcmdExecutor;
|
import jdk.test.lib.dcmd.PidJcmdExecutor;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.concurrent.BlockingQueue;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
@ -49,16 +47,15 @@ import java.util.concurrent.ThreadPoolExecutor;
|
|||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
public class StressAddMultiThreadedTest extends StressAddJcmdBase {
|
public class StressAddMultiThreadedTest extends StressAddJcmdBase {
|
||||||
private static final int THREADS;
|
private static final int THREADS = Integer.getInteger(
|
||||||
|
"compiler.compilercontrol.jcmd.StressAddMultiThreadedTest.threads",
|
||||||
|
5);
|
||||||
|
private volatile int commands = Integer.getInteger(
|
||||||
|
"compiler.compilercontrol.jcmd.StressAddMultiThreadedTest.commands",
|
||||||
|
20);
|
||||||
private final BlockingQueue<Runnable> queue;
|
private final BlockingQueue<Runnable> queue;
|
||||||
private final ExecutorService executor;
|
private final ExecutorService executor;
|
||||||
|
|
||||||
static {
|
|
||||||
THREADS = Runtime.getRuntime().availableProcessors()
|
|
||||||
* Integer.getInteger("compiler.compilercontrol.jcmd" +
|
|
||||||
".StressAddMultiThreadedTest.threadFactor", 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
public StressAddMultiThreadedTest() {
|
public StressAddMultiThreadedTest() {
|
||||||
queue = new ArrayBlockingQueue<>(THREADS);
|
queue = new ArrayBlockingQueue<>(THREADS);
|
||||||
executor = new ThreadPoolExecutor(THREADS, THREADS, 100,
|
executor = new ThreadPoolExecutor(THREADS, THREADS, 100,
|
||||||
@ -71,14 +68,10 @@ public class StressAddMultiThreadedTest extends StressAddJcmdBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean makeConnection(int pid, List<String> commands) {
|
protected boolean makeConnection(int pid) {
|
||||||
commands.forEach(command -> {
|
executor.submit(() -> new PidJcmdExecutor(String.valueOf(pid))
|
||||||
if (!executor.isShutdown()) {
|
.execute(nextCommand()));
|
||||||
executor.submit(() -> new PidJcmdExecutor(String.valueOf(pid))
|
return (--commands != 0);
|
||||||
.execute(command));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return !executor.isShutdown();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1,55 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
||||||
*
|
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License version 2 only, as
|
|
||||||
* published by the Free Software Foundation.
|
|
||||||
*
|
|
||||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
||||||
* version 2 for more details (a copy is included in the LICENSE file that
|
|
||||||
* accompanied this code).
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License version
|
|
||||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
||||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
*
|
|
||||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
||||||
* or visit www.oracle.com if you need additional information or have any
|
|
||||||
* questions.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* @test
|
|
||||||
* @bug 8137167
|
|
||||||
* @summary Tests jcmd to be able to add a lot of huge directives
|
|
||||||
* @library /testlibrary /test/lib /compiler/testlibrary ../share /
|
|
||||||
* @build compiler.compilercontrol.jcmd.StressAddSequentiallyTest
|
|
||||||
* pool.sub.* pool.subpack.* sun.hotspot.WhiteBox
|
|
||||||
* compiler.testlibrary.CompilerUtils
|
|
||||||
* compiler.compilercontrol.share.actions.*
|
|
||||||
* @run main ClassFileInstaller sun.hotspot.WhiteBox
|
|
||||||
* sun.hotspot.WhiteBox$WhiteBoxPermission
|
|
||||||
* @run main/othervm/timeout=300 compiler.compilercontrol.jcmd.StressAddSequentiallyTest
|
|
||||||
*/
|
|
||||||
|
|
||||||
package compiler.compilercontrol.jcmd;
|
|
||||||
|
|
||||||
import jdk.test.lib.dcmd.PidJcmdExecutor;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class StressAddSequentiallyTest extends StressAddJcmdBase {
|
|
||||||
public static void main(String[] args) {
|
|
||||||
new StressAddSequentiallyTest().test();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean makeConnection(int pid, List<String> commands) {
|
|
||||||
commands.forEach(command -> new PidJcmdExecutor(String.valueOf(pid))
|
|
||||||
.execute(command));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
@ -33,8 +33,8 @@
|
|||||||
* -XX:+WhiteBoxAPI
|
* -XX:+WhiteBoxAPI
|
||||||
* -XX:DisableIntrinsic=_putCharVolatile,_putInt
|
* -XX:DisableIntrinsic=_putCharVolatile,_putInt
|
||||||
* -XX:DisableIntrinsic=_putIntVolatile
|
* -XX:DisableIntrinsic=_putIntVolatile
|
||||||
* -XX:CompileCommand=option,sun.misc.Unsafe::putChar,ccstrlist,DisableIntrinsic,_getCharVolatile,_getInt
|
* -XX:CompileCommand=option,jdk.internal.misc.Unsafe::putChar,ccstrlist,DisableIntrinsic,_getCharVolatile,_getInt
|
||||||
* -XX:CompileCommand=option,sun.misc.Unsafe::putCharVolatile,ccstrlist,DisableIntrinsic,_getIntVolatile
|
* -XX:CompileCommand=option,jdk.internal.misc.Unsafe::putCharVolatile,ccstrlist,DisableIntrinsic,_getIntVolatile
|
||||||
* IntrinsicDisabledTest
|
* IntrinsicDisabledTest
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -60,7 +60,7 @@ public class IntrinsicDisabledTest {
|
|||||||
return Boolean.valueOf(Objects.toString(wb.getVMFlag("TieredCompilation")));
|
return Boolean.valueOf(Objects.toString(wb.getVMFlag("TieredCompilation")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This test uses several methods from sun.misc.Unsafe. The method
|
/* This test uses several methods from jdk.internal.misc.Unsafe. The method
|
||||||
* getMethod() returns a different Executable for each different
|
* getMethod() returns a different Executable for each different
|
||||||
* combination of its input parameters. There are eight possible
|
* combination of its input parameters. There are eight possible
|
||||||
* combinations, getMethod can return an Executable representing
|
* combinations, getMethod can return an Executable representing
|
||||||
@ -74,7 +74,7 @@ public class IntrinsicDisabledTest {
|
|||||||
String methodTypeName = isChar ? "Char" : "Int";
|
String methodTypeName = isChar ? "Char" : "Int";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Class aClass = Class.forName("sun.misc.Unsafe");
|
Class aClass = Class.forName("jdk.internal.misc.Unsafe");
|
||||||
if (isPut) {
|
if (isPut) {
|
||||||
aMethod = aClass.getDeclaredMethod("put" + methodTypeName + (isVolatile ? "Volatile" : ""),
|
aMethod = aClass.getDeclaredMethod("put" + methodTypeName + (isVolatile ? "Volatile" : ""),
|
||||||
Object.class,
|
Object.class,
|
||||||
|
166
hotspot/test/compiler/intrinsics/class/TestClassIsPrimitive.java
Normal file
166
hotspot/test/compiler/intrinsics/class/TestClassIsPrimitive.java
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8150669
|
||||||
|
* @summary C1 intrinsic for Class.isPrimitive
|
||||||
|
* @modules java.base/jdk.internal.misc
|
||||||
|
* @run main/othervm -ea -Diters=200 -Xint TestClassIsPrimitive
|
||||||
|
* @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=1 TestClassIsPrimitive
|
||||||
|
* @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=4 TestClassIsPrimitive
|
||||||
|
*/
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.Array;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
public class TestClassIsPrimitive {
|
||||||
|
static final int ITERS = Integer.getInteger("iters", 1);
|
||||||
|
|
||||||
|
public static void main(String... args) throws Exception {
|
||||||
|
testOK(true, InlineConstants::testBoolean);
|
||||||
|
testOK(true, InlineConstants::testByte);
|
||||||
|
testOK(true, InlineConstants::testShort);
|
||||||
|
testOK(true, InlineConstants::testChar);
|
||||||
|
testOK(true, InlineConstants::testInt);
|
||||||
|
testOK(true, InlineConstants::testFloat);
|
||||||
|
testOK(true, InlineConstants::testLong);
|
||||||
|
testOK(true, InlineConstants::testDouble);
|
||||||
|
testOK(false, InlineConstants::testObject);
|
||||||
|
testOK(false, InlineConstants::testArray);
|
||||||
|
|
||||||
|
testOK(true, StaticConstants::testBoolean);
|
||||||
|
testOK(true, StaticConstants::testByte);
|
||||||
|
testOK(true, StaticConstants::testShort);
|
||||||
|
testOK(true, StaticConstants::testChar);
|
||||||
|
testOK(true, StaticConstants::testInt);
|
||||||
|
testOK(true, StaticConstants::testFloat);
|
||||||
|
testOK(true, StaticConstants::testLong);
|
||||||
|
testOK(true, StaticConstants::testDouble);
|
||||||
|
testOK(false, StaticConstants::testObject);
|
||||||
|
testOK(false, StaticConstants::testArray);
|
||||||
|
testNPE( StaticConstants::testNull);
|
||||||
|
|
||||||
|
testOK(true, NoConstants::testBoolean);
|
||||||
|
testOK(true, NoConstants::testByte);
|
||||||
|
testOK(true, NoConstants::testShort);
|
||||||
|
testOK(true, NoConstants::testChar);
|
||||||
|
testOK(true, NoConstants::testInt);
|
||||||
|
testOK(true, NoConstants::testFloat);
|
||||||
|
testOK(true, NoConstants::testLong);
|
||||||
|
testOK(true, NoConstants::testDouble);
|
||||||
|
testOK(false, NoConstants::testObject);
|
||||||
|
testOK(false, NoConstants::testArray);
|
||||||
|
testNPE( NoConstants::testNull);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void testOK(boolean expected, Callable<Object> test) throws Exception {
|
||||||
|
for (int c = 0; c < ITERS; c++) {
|
||||||
|
Object res = test.call();
|
||||||
|
if (!res.equals(expected)) {
|
||||||
|
throw new IllegalStateException("Wrong result: expected = " + expected + ", but got " + res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static volatile Object sink;
|
||||||
|
|
||||||
|
public static void testNPE(Callable<Object> test) throws Exception {
|
||||||
|
for (int c = 0; c < ITERS; c++) {
|
||||||
|
try {
|
||||||
|
sink = test.call();
|
||||||
|
throw new IllegalStateException("Expected NPE");
|
||||||
|
} catch (NullPointerException iae) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static volatile Class<?> classBoolean = boolean.class;
|
||||||
|
static volatile Class<?> classByte = byte.class;
|
||||||
|
static volatile Class<?> classShort = short.class;
|
||||||
|
static volatile Class<?> classChar = char.class;
|
||||||
|
static volatile Class<?> classInt = int.class;
|
||||||
|
static volatile Class<?> classFloat = float.class;
|
||||||
|
static volatile Class<?> classLong = long.class;
|
||||||
|
static volatile Class<?> classDouble = double.class;
|
||||||
|
static volatile Class<?> classObject = Object.class;
|
||||||
|
static volatile Class<?> classArray = Object[].class;
|
||||||
|
static volatile Class<?> classNull = null;
|
||||||
|
|
||||||
|
static final Class<?> staticClassBoolean = boolean.class;
|
||||||
|
static final Class<?> staticClassByte = byte.class;
|
||||||
|
static final Class<?> staticClassShort = short.class;
|
||||||
|
static final Class<?> staticClassChar = char.class;
|
||||||
|
static final Class<?> staticClassInt = int.class;
|
||||||
|
static final Class<?> staticClassFloat = float.class;
|
||||||
|
static final Class<?> staticClassLong = long.class;
|
||||||
|
static final Class<?> staticClassDouble = double.class;
|
||||||
|
static final Class<?> staticClassObject = Object.class;
|
||||||
|
static final Class<?> staticClassArray = Object[].class;
|
||||||
|
static final Class<?> staticClassNull = null;
|
||||||
|
|
||||||
|
static class InlineConstants {
|
||||||
|
static boolean testBoolean() { return boolean.class.isPrimitive(); }
|
||||||
|
static boolean testByte() { return byte.class.isPrimitive(); }
|
||||||
|
static boolean testShort() { return short.class.isPrimitive(); }
|
||||||
|
static boolean testChar() { return char.class.isPrimitive(); }
|
||||||
|
static boolean testInt() { return int.class.isPrimitive(); }
|
||||||
|
static boolean testFloat() { return float.class.isPrimitive(); }
|
||||||
|
static boolean testLong() { return long.class.isPrimitive(); }
|
||||||
|
static boolean testDouble() { return double.class.isPrimitive(); }
|
||||||
|
static boolean testObject() { return Object.class.isPrimitive(); }
|
||||||
|
static boolean testArray() { return Object[].class.isPrimitive(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
static class StaticConstants {
|
||||||
|
static boolean testBoolean() { return staticClassBoolean.isPrimitive(); }
|
||||||
|
static boolean testByte() { return staticClassByte.isPrimitive(); }
|
||||||
|
static boolean testShort() { return staticClassShort.isPrimitive(); }
|
||||||
|
static boolean testChar() { return staticClassChar.isPrimitive(); }
|
||||||
|
static boolean testInt() { return staticClassInt.isPrimitive(); }
|
||||||
|
static boolean testFloat() { return staticClassFloat.isPrimitive(); }
|
||||||
|
static boolean testLong() { return staticClassLong.isPrimitive(); }
|
||||||
|
static boolean testDouble() { return staticClassDouble.isPrimitive(); }
|
||||||
|
static boolean testObject() { return staticClassObject.isPrimitive(); }
|
||||||
|
static boolean testArray() { return staticClassArray.isPrimitive(); }
|
||||||
|
static boolean testNull() { return staticClassNull.isPrimitive(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
static class NoConstants {
|
||||||
|
static boolean testBoolean() { return classBoolean.isPrimitive(); }
|
||||||
|
static boolean testByte() { return classByte.isPrimitive(); }
|
||||||
|
static boolean testShort() { return classShort.isPrimitive(); }
|
||||||
|
static boolean testChar() { return classChar.isPrimitive(); }
|
||||||
|
static boolean testInt() { return classInt.isPrimitive(); }
|
||||||
|
static boolean testFloat() { return classFloat.isPrimitive(); }
|
||||||
|
static boolean testLong() { return classLong.isPrimitive(); }
|
||||||
|
static boolean testDouble() { return classDouble.isPrimitive(); }
|
||||||
|
static boolean testObject() { return classObject.isPrimitive(); }
|
||||||
|
static boolean testArray() { return classArray.isPrimitive(); }
|
||||||
|
static boolean testNull() { return classNull.isPrimitive(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,213 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8150465
|
||||||
|
* @summary Unsafe methods to produce uninitialized arrays
|
||||||
|
* @modules java.base/jdk.internal.misc
|
||||||
|
* @run main/othervm -ea -Diters=200 -Xint AllocateUninitializedArray
|
||||||
|
* @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=1 AllocateUninitializedArray
|
||||||
|
* @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=4 AllocateUninitializedArray
|
||||||
|
*/
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.Array;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
public class AllocateUninitializedArray {
|
||||||
|
static final int ITERS = Integer.getInteger("iters", 1);
|
||||||
|
static final jdk.internal.misc.Unsafe UNSAFE;
|
||||||
|
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
Field f = jdk.internal.misc.Unsafe.class.getDeclaredField("theUnsafe");
|
||||||
|
f.setAccessible(true);
|
||||||
|
UNSAFE = (jdk.internal.misc.Unsafe) f.get(null);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("Unable to get Unsafe instance.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String... args) throws Exception {
|
||||||
|
testIAE(AllConstants::testObject);
|
||||||
|
testIAE(LengthIsConstant::testObject);
|
||||||
|
testIAE(ClassIsConstant::testObject);
|
||||||
|
testIAE(NothingIsConstant::testObject);
|
||||||
|
|
||||||
|
testIAE(AllConstants::testArray);
|
||||||
|
testIAE(LengthIsConstant::testArray);
|
||||||
|
testIAE(ClassIsConstant::testArray);
|
||||||
|
testIAE(NothingIsConstant::testArray);
|
||||||
|
|
||||||
|
testIAE(AllConstants::testNull);
|
||||||
|
testIAE(LengthIsConstant::testNull);
|
||||||
|
testIAE(ClassIsConstant::testNull);
|
||||||
|
testIAE(NothingIsConstant::testNull);
|
||||||
|
|
||||||
|
testOK(boolean[].class, 10, AllConstants::testBoolean);
|
||||||
|
testOK(byte[].class, 10, AllConstants::testByte);
|
||||||
|
testOK(short[].class, 10, AllConstants::testShort);
|
||||||
|
testOK(char[].class, 10, AllConstants::testChar);
|
||||||
|
testOK(int[].class, 10, AllConstants::testInt);
|
||||||
|
testOK(float[].class, 10, AllConstants::testFloat);
|
||||||
|
testOK(long[].class, 10, AllConstants::testLong);
|
||||||
|
testOK(double[].class, 10, AllConstants::testDouble);
|
||||||
|
|
||||||
|
testOK(boolean[].class, 10, LengthIsConstant::testBoolean);
|
||||||
|
testOK(byte[].class, 10, LengthIsConstant::testByte);
|
||||||
|
testOK(short[].class, 10, LengthIsConstant::testShort);
|
||||||
|
testOK(char[].class, 10, LengthIsConstant::testChar);
|
||||||
|
testOK(int[].class, 10, LengthIsConstant::testInt);
|
||||||
|
testOK(float[].class, 10, LengthIsConstant::testFloat);
|
||||||
|
testOK(long[].class, 10, LengthIsConstant::testLong);
|
||||||
|
testOK(double[].class, 10, LengthIsConstant::testDouble);
|
||||||
|
|
||||||
|
testOK(boolean[].class, 10, ClassIsConstant::testBoolean);
|
||||||
|
testOK(byte[].class, 10, ClassIsConstant::testByte);
|
||||||
|
testOK(short[].class, 10, ClassIsConstant::testShort);
|
||||||
|
testOK(char[].class, 10, ClassIsConstant::testChar);
|
||||||
|
testOK(int[].class, 10, ClassIsConstant::testInt);
|
||||||
|
testOK(float[].class, 10, ClassIsConstant::testFloat);
|
||||||
|
testOK(long[].class, 10, ClassIsConstant::testLong);
|
||||||
|
testOK(double[].class, 10, ClassIsConstant::testDouble);
|
||||||
|
|
||||||
|
testOK(boolean[].class, 10, NothingIsConstant::testBoolean);
|
||||||
|
testOK(byte[].class, 10, NothingIsConstant::testByte);
|
||||||
|
testOK(short[].class, 10, NothingIsConstant::testShort);
|
||||||
|
testOK(char[].class, 10, NothingIsConstant::testChar);
|
||||||
|
testOK(int[].class, 10, NothingIsConstant::testInt);
|
||||||
|
testOK(float[].class, 10, NothingIsConstant::testFloat);
|
||||||
|
testOK(long[].class, 10, NothingIsConstant::testLong);
|
||||||
|
testOK(double[].class, 10, NothingIsConstant::testDouble);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void testOK(Class<?> expectClass, int expectLen, Callable<Object> test) throws Exception {
|
||||||
|
for (int c = 0; c < ITERS; c++) {
|
||||||
|
Object res = test.call();
|
||||||
|
Class<?> actualClass = res.getClass();
|
||||||
|
if (!actualClass.equals(expectClass)) {
|
||||||
|
throw new IllegalStateException("Wrong class: expected = " + expectClass + ", but got " + actualClass);
|
||||||
|
}
|
||||||
|
int actualLen = Array.getLength(res);
|
||||||
|
if (actualLen != expectLen) {
|
||||||
|
throw new IllegalStateException("Wrong length: expected = " + expectLen + ", but got " + actualLen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static volatile Object sink;
|
||||||
|
|
||||||
|
public static void testIAE(Callable<Object> test) throws Exception {
|
||||||
|
for (int c = 0; c < ITERS; c++) {
|
||||||
|
try {
|
||||||
|
sink = test.call();
|
||||||
|
throw new IllegalStateException("Expected IAE");
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static volatile int sampleLenNeg = -1;
|
||||||
|
static volatile int sampleLenZero = 0;
|
||||||
|
static volatile int sampleLen = 10;
|
||||||
|
|
||||||
|
|
||||||
|
static volatile Class<?> classBoolean = boolean.class;
|
||||||
|
static volatile Class<?> classByte = byte.class;
|
||||||
|
static volatile Class<?> classShort = short.class;
|
||||||
|
static volatile Class<?> classChar = char.class;
|
||||||
|
static volatile Class<?> classInt = int.class;
|
||||||
|
static volatile Class<?> classFloat = float.class;
|
||||||
|
static volatile Class<?> classLong = long.class;
|
||||||
|
static volatile Class<?> classDouble = double.class;
|
||||||
|
static volatile Class<?> classObject = Object.class;
|
||||||
|
static volatile Class<?> classArray = Object[].class;
|
||||||
|
static volatile Class<?> classNull = null;
|
||||||
|
|
||||||
|
static class AllConstants {
|
||||||
|
static Object testBoolean() { return UNSAFE.allocateUninitializedArray(boolean.class, 10); }
|
||||||
|
static Object testByte() { return UNSAFE.allocateUninitializedArray(byte.class, 10); }
|
||||||
|
static Object testShort() { return UNSAFE.allocateUninitializedArray(short.class, 10); }
|
||||||
|
static Object testChar() { return UNSAFE.allocateUninitializedArray(char.class, 10); }
|
||||||
|
static Object testInt() { return UNSAFE.allocateUninitializedArray(int.class, 10); }
|
||||||
|
static Object testFloat() { return UNSAFE.allocateUninitializedArray(float.class, 10); }
|
||||||
|
static Object testLong() { return UNSAFE.allocateUninitializedArray(long.class, 10); }
|
||||||
|
static Object testDouble() { return UNSAFE.allocateUninitializedArray(double.class, 10); }
|
||||||
|
static Object testObject() { return UNSAFE.allocateUninitializedArray(Object.class, 10); }
|
||||||
|
static Object testArray() { return UNSAFE.allocateUninitializedArray(Object[].class, 10); }
|
||||||
|
static Object testNull() { return UNSAFE.allocateUninitializedArray(null, 10); }
|
||||||
|
static Object testZero() { return UNSAFE.allocateUninitializedArray(int.class, 0); }
|
||||||
|
static Object testNeg() { return UNSAFE.allocateUninitializedArray(int.class, -1); }
|
||||||
|
}
|
||||||
|
|
||||||
|
static class ClassIsConstant {
|
||||||
|
static Object testBoolean() { return UNSAFE.allocateUninitializedArray(boolean.class, sampleLen); }
|
||||||
|
static Object testByte() { return UNSAFE.allocateUninitializedArray(byte.class, sampleLen); }
|
||||||
|
static Object testShort() { return UNSAFE.allocateUninitializedArray(short.class, sampleLen); }
|
||||||
|
static Object testChar() { return UNSAFE.allocateUninitializedArray(char.class, sampleLen); }
|
||||||
|
static Object testInt() { return UNSAFE.allocateUninitializedArray(int.class, sampleLen); }
|
||||||
|
static Object testFloat() { return UNSAFE.allocateUninitializedArray(float.class, sampleLen); }
|
||||||
|
static Object testLong() { return UNSAFE.allocateUninitializedArray(long.class, sampleLen); }
|
||||||
|
static Object testDouble() { return UNSAFE.allocateUninitializedArray(double.class, sampleLen); }
|
||||||
|
static Object testObject() { return UNSAFE.allocateUninitializedArray(Object.class, sampleLen); }
|
||||||
|
static Object testArray() { return UNSAFE.allocateUninitializedArray(Object[].class, sampleLen); }
|
||||||
|
static Object testNull() { return UNSAFE.allocateUninitializedArray(null, sampleLen); }
|
||||||
|
static Object testZero() { return UNSAFE.allocateUninitializedArray(int.class, sampleLenZero); }
|
||||||
|
static Object testNeg() { return UNSAFE.allocateUninitializedArray(int.class, sampleLenNeg); }
|
||||||
|
}
|
||||||
|
|
||||||
|
static class LengthIsConstant {
|
||||||
|
static Object testBoolean() { return UNSAFE.allocateUninitializedArray(classBoolean, 10); }
|
||||||
|
static Object testByte() { return UNSAFE.allocateUninitializedArray(classByte, 10); }
|
||||||
|
static Object testShort() { return UNSAFE.allocateUninitializedArray(classShort, 10); }
|
||||||
|
static Object testChar() { return UNSAFE.allocateUninitializedArray(classChar, 10); }
|
||||||
|
static Object testInt() { return UNSAFE.allocateUninitializedArray(classInt, 10); }
|
||||||
|
static Object testFloat() { return UNSAFE.allocateUninitializedArray(classFloat, 10); }
|
||||||
|
static Object testLong() { return UNSAFE.allocateUninitializedArray(classLong, 10); }
|
||||||
|
static Object testDouble() { return UNSAFE.allocateUninitializedArray(classDouble, 10); }
|
||||||
|
static Object testObject() { return UNSAFE.allocateUninitializedArray(classObject, 10); }
|
||||||
|
static Object testArray() { return UNSAFE.allocateUninitializedArray(classArray, 10); }
|
||||||
|
static Object testNull() { return UNSAFE.allocateUninitializedArray(classNull, 10); }
|
||||||
|
static Object testZero() { return UNSAFE.allocateUninitializedArray(classInt, 0); }
|
||||||
|
static Object testNeg() { return UNSAFE.allocateUninitializedArray(classInt, -1); }
|
||||||
|
}
|
||||||
|
|
||||||
|
static class NothingIsConstant {
|
||||||
|
static Object testBoolean() { return UNSAFE.allocateUninitializedArray(classBoolean, sampleLen); }
|
||||||
|
static Object testByte() { return UNSAFE.allocateUninitializedArray(classByte, sampleLen); }
|
||||||
|
static Object testShort() { return UNSAFE.allocateUninitializedArray(classShort, sampleLen); }
|
||||||
|
static Object testChar() { return UNSAFE.allocateUninitializedArray(classChar, sampleLen); }
|
||||||
|
static Object testInt() { return UNSAFE.allocateUninitializedArray(classInt, sampleLen); }
|
||||||
|
static Object testFloat() { return UNSAFE.allocateUninitializedArray(classFloat, sampleLen); }
|
||||||
|
static Object testLong() { return UNSAFE.allocateUninitializedArray(classLong, sampleLen); }
|
||||||
|
static Object testDouble() { return UNSAFE.allocateUninitializedArray(classDouble, sampleLen); }
|
||||||
|
static Object testObject() { return UNSAFE.allocateUninitializedArray(classObject, sampleLen); }
|
||||||
|
static Object testArray() { return UNSAFE.allocateUninitializedArray(classArray, sampleLen); }
|
||||||
|
static Object testNull() { return UNSAFE.allocateUninitializedArray(classNull, sampleLen); }
|
||||||
|
static Object testZero() { return UNSAFE.allocateUninitializedArray(classInt, sampleLenZero); }
|
||||||
|
static Object testNeg() { return UNSAFE.allocateUninitializedArray(classInt, sampleLenNeg); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user