This commit is contained in:
Alejandro Murillo 2016-03-10 16:08:17 -08:00
commit b6ac98452d
70 changed files with 2976 additions and 1251 deletions

View File

@ -3425,9 +3425,6 @@ const bool Matcher::misaligned_vectors_ok() {
// false => size gets scaled to BytesPerLong, ok. // false => size gets scaled to BytesPerLong, ok.
const bool Matcher::init_array_count_is_in_bytes = false; const bool Matcher::init_array_count_is_in_bytes = false;
// Threshold size for cleararray.
const int Matcher::init_array_short_size = 18 * BytesPerLong;
// Use conditional move (CMOVL) // Use conditional move (CMOVL)
const int Matcher::long_cmove_cost() { const int Matcher::long_cmove_cost() {
// long cmoves are no more expensive than int cmoves // long cmoves are no more expensive than int cmoves
@ -4135,14 +4132,14 @@ encode %{
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
&Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr); Assembler::xword, /*acquire*/ false, /*release*/ true);
%} %}
enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
&Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw); Assembler::word, /*acquire*/ false, /*release*/ true);
%} %}
@ -4154,14 +4151,14 @@ encode %{
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
&Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr); Assembler::xword, /*acquire*/ true, /*release*/ true);
%} %}
enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
&Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw); Assembler::word, /*acquire*/ true, /*release*/ true);
%} %}
@ -4679,8 +4676,14 @@ encode %{
// Compare object markOop with mark and if equal exchange scratch1 // Compare object markOop with mark and if equal exchange scratch1
// with object markOop. // with object markOop.
{ if (UseLSE) {
__ mov(tmp, disp_hdr);
__ casal(Assembler::xword, tmp, box, oop);
__ cmp(tmp, disp_hdr);
__ br(Assembler::EQ, cont);
} else {
Label retry_load; Label retry_load;
__ prfm(Address(oop), PSTL1STRM);
__ bind(retry_load); __ bind(retry_load);
__ ldaxr(tmp, oop); __ ldaxr(tmp, oop);
__ cmp(tmp, disp_hdr); __ cmp(tmp, disp_hdr);
@ -4729,8 +4732,13 @@ encode %{
__ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value)); __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
__ mov(disp_hdr, zr); __ mov(disp_hdr, zr);
{ if (UseLSE) {
__ mov(rscratch1, disp_hdr);
__ casal(Assembler::xword, rscratch1, rthread, tmp);
__ cmp(rscratch1, disp_hdr);
} else {
Label retry_load, fail; Label retry_load, fail;
__ prfm(Address(tmp), PSTL1STRM);
__ bind(retry_load); __ bind(retry_load);
__ ldaxr(rscratch1, tmp); __ ldaxr(rscratch1, tmp);
__ cmp(disp_hdr, rscratch1); __ cmp(disp_hdr, rscratch1);
@ -4818,8 +4826,13 @@ encode %{
// see the stack address of the basicLock in the markOop of the // see the stack address of the basicLock in the markOop of the
// object. // object.
{ if (UseLSE) {
__ mov(tmp, box);
__ casl(Assembler::xword, tmp, disp_hdr, oop);
__ cmp(tmp, box);
} else {
Label retry_load; Label retry_load;
__ prfm(Address(oop), PSTL1STRM);
__ bind(retry_load); __ bind(retry_load);
__ ldxr(tmp, oop); __ ldxr(tmp, oop);
__ cmp(box, tmp); __ cmp(box, tmp);

View File

@ -972,7 +972,7 @@ public:
// System // System
void system(int op0, int op1, int CRn, int CRm, int op2, void system(int op0, int op1, int CRn, int CRm, int op2,
Register rt = (Register)0b11111) Register rt = dummy_reg)
{ {
starti; starti;
f(0b11010101000, 31, 21); f(0b11010101000, 31, 21);
@ -1082,7 +1082,7 @@ public:
#define INSN(NAME, opc) \ #define INSN(NAME, opc) \
void NAME() { \ void NAME() { \
branch_reg((Register)0b11111, opc); \ branch_reg(dummy_reg, opc); \
} }
INSN(eret, 0b0100); INSN(eret, 0b0100);
@ -1094,10 +1094,22 @@ public:
enum operand_size { byte, halfword, word, xword }; enum operand_size { byte, halfword, word, xword };
void load_store_exclusive(Register Rs, Register Rt1, Register Rt2, void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
Register Rn, enum operand_size sz, int op, int o0) { Register Rn, enum operand_size sz, int op, bool ordered) {
starti; starti;
f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21); f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0); rf(Rs, 16), f(ordered, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
}
void load_exclusive(Register dst, Register addr,
enum operand_size sz, bool ordered) {
load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
sz, 0b010, ordered);
}
void store_exclusive(Register status, Register new_val, Register addr,
enum operand_size sz, bool ordered) {
load_store_exclusive(status, new_val, dummy_reg, addr,
sz, 0b000, ordered);
} }
#define INSN4(NAME, sz, op, o0) /* Four registers */ \ #define INSN4(NAME, sz, op, o0) /* Four registers */ \
@ -1109,19 +1121,19 @@ public:
#define INSN3(NAME, sz, op, o0) /* Three registers */ \ #define INSN3(NAME, sz, op, o0) /* Three registers */ \
void NAME(Register Rs, Register Rt, Register Rn) { \ void NAME(Register Rs, Register Rt, Register Rn) { \
guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \ load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
} }
#define INSN2(NAME, sz, op, o0) /* Two registers */ \ #define INSN2(NAME, sz, op, o0) /* Two registers */ \
void NAME(Register Rt, Register Rn) { \ void NAME(Register Rt, Register Rn) { \
load_store_exclusive((Register)0b11111, Rt, (Register)0b11111, \ load_store_exclusive(dummy_reg, Rt, dummy_reg, \
Rn, sz, op, o0); \ Rn, sz, op, o0); \
} }
#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
void NAME(Register Rt1, Register Rt2, Register Rn) { \ void NAME(Register Rt1, Register Rt2, Register Rn) { \
guarantee(Rt1 != Rt2, "unpredictable instruction"); \ guarantee(Rt1 != Rt2, "unpredictable instruction"); \
load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \ load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0); \
} }
// bytes // bytes
@ -1169,6 +1181,46 @@ public:
#undef INSN4 #undef INSN4
#undef INSN_FOO #undef INSN_FOO
// 8.1 Compare and swap extensions
void lse_cas(Register Rs, Register Rt, Register Rn,
enum operand_size sz, bool a, bool r, bool not_pair) {
starti;
if (! not_pair) { // Pair
assert(sz == word || sz == xword, "invalid size");
/* The size bit is in bit 30, not 31 */
sz = (operand_size)(sz == word ? 0b00:0b01);
}
f(sz, 31, 30), f(0b001000, 29, 24), f(1, 23), f(a, 22), f(1, 21);
rf(Rs, 16), f(r, 15), f(0b11111, 14, 10), rf(Rn, 5), rf(Rt, 0);
}
// CAS
#define INSN(NAME, a, r) \
void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \
assert(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
lse_cas(Rs, Rt, Rn, sz, a, r, true); \
}
INSN(cas, false, false)
INSN(casa, true, false)
INSN(casl, false, true)
INSN(casal, true, true)
#undef INSN
// CASP
#define INSN(NAME, a, r) \
void NAME(operand_size sz, Register Rs, Register Rs1, \
Register Rt, Register Rt1, Register Rn) { \
assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 && \
Rs->successor() == Rs1 && Rt->successor() == Rt1 && \
Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers"); \
lse_cas(Rs, Rt, Rn, sz, a, r, false); \
}
INSN(casp, false, false)
INSN(caspa, true, false)
INSN(caspl, false, true)
INSN(caspal, true, true)
#undef INSN
// Load register (literal) // Load register (literal)
#define INSN(NAME, opc, V) \ #define INSN(NAME, opc, V) \
void NAME(Register Rt, address dest) { \ void NAME(Register Rt, address dest) { \

View File

@ -1556,38 +1556,54 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
} }
void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
Label retry_load, nope; if (UseLSE) {
// flush and load exclusive from the memory location __ mov(rscratch1, cmpval);
// and fail if it is not what we expect __ casal(Assembler::word, rscratch1, newval, addr);
__ bind(retry_load); __ cmpw(rscratch1, cmpval);
__ ldaxrw(rscratch1, addr); __ cset(rscratch1, Assembler::NE);
__ cmpw(rscratch1, cmpval); } else {
__ cset(rscratch1, Assembler::NE); Label retry_load, nope;
__ br(Assembler::NE, nope); // flush and load exclusive from the memory location
// if we store+flush with no intervening write rscratch1 wil be zero // and fail if it is not what we expect
__ stlxrw(rscratch1, newval, addr); __ prfm(Address(addr), PSTL1STRM);
// retry so we only ever return after a load fails to compare __ bind(retry_load);
// ensures we don't return a stale value after a failed write. __ ldaxrw(rscratch1, addr);
__ cbnzw(rscratch1, retry_load); __ cmpw(rscratch1, cmpval);
__ bind(nope); __ cset(rscratch1, Assembler::NE);
__ br(Assembler::NE, nope);
// if we store+flush with no intervening write rscratch1 wil be zero
__ stlxrw(rscratch1, newval, addr);
// retry so we only ever return after a load fails to compare
// ensures we don't return a stale value after a failed write.
__ cbnzw(rscratch1, retry_load);
__ bind(nope);
}
__ membar(__ AnyAny); __ membar(__ AnyAny);
} }
void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
Label retry_load, nope; if (UseLSE) {
// flush and load exclusive from the memory location __ mov(rscratch1, cmpval);
// and fail if it is not what we expect __ casal(Assembler::xword, rscratch1, newval, addr);
__ bind(retry_load); __ cmp(rscratch1, cmpval);
__ ldaxr(rscratch1, addr); __ cset(rscratch1, Assembler::NE);
__ cmp(rscratch1, cmpval); } else {
__ cset(rscratch1, Assembler::NE); Label retry_load, nope;
__ br(Assembler::NE, nope); // flush and load exclusive from the memory location
// if we store+flush with no intervening write rscratch1 wil be zero // and fail if it is not what we expect
__ stlxr(rscratch1, newval, addr); __ prfm(Address(addr), PSTL1STRM);
// retry so we only ever return after a load fails to compare __ bind(retry_load);
// ensures we don't return a stale value after a failed write. __ ldaxr(rscratch1, addr);
__ cbnz(rscratch1, retry_load); __ cmp(rscratch1, cmpval);
__ bind(nope); __ cset(rscratch1, Assembler::NE);
__ br(Assembler::NE, nope);
// if we store+flush with no intervening write rscratch1 wil be zero
__ stlxr(rscratch1, newval, addr);
// retry so we only ever return after a load fails to compare
// ensures we don't return a stale value after a failed write.
__ cbnz(rscratch1, retry_load);
__ bind(nope);
}
__ membar(__ AnyAny); __ membar(__ AnyAny);
} }
@ -3156,6 +3172,7 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
} }
Label again; Label again;
__ lea(tmp, addr); __ lea(tmp, addr);
__ prfm(Address(tmp), PSTL1STRM);
__ bind(again); __ bind(again);
(_masm->*lda)(dst, tmp); (_masm->*lda)(dst, tmp);
(_masm->*add)(rscratch1, dst, inc); (_masm->*add)(rscratch1, dst, inc);
@ -3175,6 +3192,7 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
assert_different_registers(obj, addr.base(), tmp, rscratch2, dst); assert_different_registers(obj, addr.base(), tmp, rscratch2, dst);
Label again; Label again;
__ lea(tmp, addr); __ lea(tmp, addr);
__ prfm(Address(tmp), PSTL1STRM);
__ bind(again); __ bind(again);
(_masm->*lda)(dst, tmp); (_masm->*lda)(dst, tmp);
(_masm->*stl)(rscratch2, obj, tmp); (_masm->*stl)(rscratch2, obj, tmp);

View File

@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, false);
// avoid biased locking while we are bootstrapping the aarch64 build // avoid biased locking while we are bootstrapping the aarch64 build
define_pd_global(bool, UseBiasedLocking, false); define_pd_global(bool, UseBiasedLocking, false);
define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
#if defined(COMPILER1) || defined(COMPILER2) #if defined(COMPILER1) || defined(COMPILER2)
define_pd_global(intx, InlineSmallCode, 1000); define_pd_global(intx, InlineSmallCode, 1000);
#endif #endif
@ -101,9 +103,13 @@ define_pd_global(intx, InlineSmallCode, 1000);
\ \
product(bool, UseCRC32, false, \ product(bool, UseCRC32, false, \
"Use CRC32 instructions for CRC32 computation") \ "Use CRC32 instructions for CRC32 computation") \
\
product(bool, UseLSE, false, \
"Use LSE instructions") \
// Don't attempt to use Neon on builtin sim until builtin sim supports it // Don't attempt to use Neon on builtin sim until builtin sim supports it
#define UseCRC32 false #define UseCRC32 false
#define UseSIMDForMemoryOps false
#else #else
#define UseBuiltinSim false #define UseBuiltinSim false
@ -121,6 +127,10 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Use Neon for CRC32 computation") \ "Use Neon for CRC32 computation") \
product(bool, UseCRC32, false, \ product(bool, UseCRC32, false, \
"Use CRC32 instructions for CRC32 computation") \ "Use CRC32 instructions for CRC32 computation") \
product(bool, UseSIMDForMemoryOps, false, \
"Use SIMD instructions in generated memory move code") \
product(bool, UseLSE, false, \
"Use LSE instructions") \
product(bool, TraceTraps, false, "Trace all traps the signal handler") product(bool, TraceTraps, false, "Trace all traps the signal handler")
#endif #endif

View File

@ -1638,6 +1638,7 @@ Address MacroAssembler::form_address(Register Rd, Register base, long byte_offse
void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
Label retry_load; Label retry_load;
prfm(Address(counter_addr), PSTL1STRM);
bind(retry_load); bind(retry_load);
// flush and load exclusive from the memory location // flush and load exclusive from the memory location
ldxrw(tmp, counter_addr); ldxrw(tmp, counter_addr);
@ -2070,25 +2071,32 @@ void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Reg
// oldv holds comparison value // oldv holds comparison value
// newv holds value to write in exchange // newv holds value to write in exchange
// addr identifies memory word to compare against/update // addr identifies memory word to compare against/update
// tmp returns 0/1 for success/failure if (UseLSE) {
Label retry_load, nope; mov(tmp, oldv);
casal(Assembler::xword, oldv, newv, addr);
bind(retry_load); cmp(tmp, oldv);
// flush and load exclusive from the memory location br(Assembler::EQ, succeed);
// and fail if it is not what we expect membar(AnyAny);
ldaxr(tmp, addr); } else {
cmp(tmp, oldv); Label retry_load, nope;
br(Assembler::NE, nope); prfm(Address(addr), PSTL1STRM);
// if we store+flush with no intervening write tmp wil be zero bind(retry_load);
stlxr(tmp, newv, addr); // flush and load exclusive from the memory location
cbzw(tmp, succeed); // and fail if it is not what we expect
// retry so we only ever return after a load fails to compare ldaxr(tmp, addr);
// ensures we don't return a stale value after a failed write. cmp(tmp, oldv);
b(retry_load); br(Assembler::NE, nope);
// if the memory word differs we return it in oldv and signal a fail // if we store+flush with no intervening write tmp wil be zero
bind(nope); stlxr(tmp, newv, addr);
membar(AnyAny); cbzw(tmp, succeed);
mov(oldv, tmp); // retry so we only ever return after a load fails to compare
// ensures we don't return a stale value after a failed write.
b(retry_load);
// if the memory word differs we return it in oldv and signal a fail
bind(nope);
membar(AnyAny);
mov(oldv, tmp);
}
if (fail) if (fail)
b(*fail); b(*fail);
} }
@ -2099,28 +2107,64 @@ void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Regis
// newv holds value to write in exchange // newv holds value to write in exchange
// addr identifies memory word to compare against/update // addr identifies memory word to compare against/update
// tmp returns 0/1 for success/failure // tmp returns 0/1 for success/failure
Label retry_load, nope; if (UseLSE) {
mov(tmp, oldv);
bind(retry_load); casal(Assembler::word, oldv, newv, addr);
// flush and load exclusive from the memory location cmp(tmp, oldv);
// and fail if it is not what we expect br(Assembler::EQ, succeed);
ldaxrw(tmp, addr); membar(AnyAny);
cmp(tmp, oldv); } else {
br(Assembler::NE, nope); Label retry_load, nope;
// if we store+flush with no intervening write tmp wil be zero prfm(Address(addr), PSTL1STRM);
stlxrw(tmp, newv, addr); bind(retry_load);
cbzw(tmp, succeed); // flush and load exclusive from the memory location
// retry so we only ever return after a load fails to compare // and fail if it is not what we expect
// ensures we don't return a stale value after a failed write. ldaxrw(tmp, addr);
b(retry_load); cmp(tmp, oldv);
// if the memory word differs we return it in oldv and signal a fail br(Assembler::NE, nope);
bind(nope); // if we store+flush with no intervening write tmp wil be zero
membar(AnyAny); stlxrw(tmp, newv, addr);
mov(oldv, tmp); cbzw(tmp, succeed);
// retry so we only ever return after a load fails to compare
// ensures we don't return a stale value after a failed write.
b(retry_load);
// if the memory word differs we return it in oldv and signal a fail
bind(nope);
membar(AnyAny);
mov(oldv, tmp);
}
if (fail) if (fail)
b(*fail); b(*fail);
} }
// A generic CAS; success or failure is in the EQ flag.
void MacroAssembler::cmpxchg(Register addr, Register expected,
Register new_val,
enum operand_size size,
bool acquire, bool release,
Register tmp) {
if (UseLSE) {
mov(tmp, expected);
lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true);
cmp(tmp, expected);
} else {
BLOCK_COMMENT("cmpxchg {");
Label retry_load, done;
prfm(Address(addr), PSTL1STRM);
bind(retry_load);
load_exclusive(tmp, addr, size, acquire);
if (size == xword)
cmp(tmp, expected);
else
cmpw(tmp, expected);
br(Assembler::NE, done);
store_exclusive(tmp, new_val, addr, size, release);
cbnzw(tmp, retry_load);
bind(done);
BLOCK_COMMENT("} cmpxchg");
}
}
static bool different(Register a, RegisterOrConstant b, Register c) { static bool different(Register a, RegisterOrConstant b, Register c) {
if (b.is_constant()) if (b.is_constant())
return a != c; return a != c;
@ -2135,6 +2179,7 @@ void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Registe
result = different(prev, incr, addr) ? prev : rscratch2; \ result = different(prev, incr, addr) ? prev : rscratch2; \
\ \
Label retry_load; \ Label retry_load; \
prfm(Address(addr), PSTL1STRM); \
bind(retry_load); \ bind(retry_load); \
LDXR(result, addr); \ LDXR(result, addr); \
OP(rscratch1, result, incr); \ OP(rscratch1, result, incr); \
@ -2157,6 +2202,7 @@ void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {
result = different(prev, newv, addr) ? prev : rscratch2; \ result = different(prev, newv, addr) ? prev : rscratch2; \
\ \
Label retry_load; \ Label retry_load; \
prfm(Address(addr), PSTL1STRM); \
bind(retry_load); \ bind(retry_load); \
LDXR(result, addr); \ LDXR(result, addr); \
STXR(rscratch1, newv, addr); \ STXR(rscratch1, newv, addr); \

View File

@ -971,21 +971,10 @@ public:
} }
// A generic CAS; success or failure is in the EQ flag. // A generic CAS; success or failure is in the EQ flag.
template <typename T1, typename T2>
void cmpxchg(Register addr, Register expected, Register new_val, void cmpxchg(Register addr, Register expected, Register new_val,
T1 load_insn, enum operand_size size,
void (MacroAssembler::*cmp_insn)(Register, Register), bool acquire, bool release,
T2 store_insn, Register tmp = rscratch1);
Register tmp = rscratch1) {
Label retry_load, done;
bind(retry_load);
(this->*load_insn)(tmp, addr);
(this->*cmp_insn)(tmp, expected);
br(Assembler::NE, done);
(this->*store_insn)(tmp, new_val, addr);
cbnzw(tmp, retry_load);
bind(done);
}
// Calls // Calls

View File

@ -107,6 +107,9 @@ CONSTANT_REGISTER_DECLARATION(Register, r31_sp, (31));
CONSTANT_REGISTER_DECLARATION(Register, zr, (32)); CONSTANT_REGISTER_DECLARATION(Register, zr, (32));
CONSTANT_REGISTER_DECLARATION(Register, sp, (33)); CONSTANT_REGISTER_DECLARATION(Register, sp, (33));
// Used as a filler in instructions where a register field is unused.
const Register dummy_reg = r31_sp;
// Use FloatRegister as shortcut // Use FloatRegister as shortcut
class FloatRegisterImpl; class FloatRegisterImpl;
typedef FloatRegisterImpl* FloatRegister; typedef FloatRegisterImpl* FloatRegister;

View File

@ -729,7 +729,7 @@ class StubGenerator: public StubCodeGenerator {
// //
// count is a count of words. // count is a count of words.
// //
// Precondition: count >= 2 // Precondition: count >= 8
// //
// Postconditions: // Postconditions:
// //
@ -741,6 +741,7 @@ class StubGenerator: public StubCodeGenerator {
void generate_copy_longs(Label &start, Register s, Register d, Register count, void generate_copy_longs(Label &start, Register s, Register d, Register count,
copy_direction direction) { copy_direction direction) {
int unit = wordSize * direction; int unit = wordSize * direction;
int bias = (UseSIMDForMemoryOps ? 4:2) * wordSize;
int offset; int offset;
const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6, const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6,
@ -750,7 +751,7 @@ class StubGenerator: public StubCodeGenerator {
assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7); assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7);
assert_different_registers(s, d, count, rscratch1); assert_different_registers(s, d, count, rscratch1);
Label again, large, small; Label again, drain;
const char *stub_name; const char *stub_name;
if (direction == copy_forwards) if (direction == copy_forwards)
stub_name = "foward_copy_longs"; stub_name = "foward_copy_longs";
@ -759,57 +760,35 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, "StubRoutines", stub_name); StubCodeMark mark(this, "StubRoutines", stub_name);
__ align(CodeEntryAlignment); __ align(CodeEntryAlignment);
__ bind(start); __ bind(start);
__ cmp(count, 8);
__ br(Assembler::LO, small);
if (direction == copy_forwards) { if (direction == copy_forwards) {
__ sub(s, s, 2 * wordSize); __ sub(s, s, bias);
__ sub(d, d, 2 * wordSize); __ sub(d, d, bias);
}
__ subs(count, count, 16);
__ br(Assembler::GE, large);
// 8 <= count < 16 words. Copy 8.
__ ldp(t0, t1, Address(s, 2 * unit));
__ ldp(t2, t3, Address(s, 4 * unit));
__ ldp(t4, t5, Address(s, 6 * unit));
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
__ stp(t0, t1, Address(d, 2 * unit));
__ stp(t2, t3, Address(d, 4 * unit));
__ stp(t4, t5, Address(d, 6 * unit));
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
if (direction == copy_forwards) {
__ add(s, s, 2 * wordSize);
__ add(d, d, 2 * wordSize);
} }
#ifdef ASSERT
// Make sure we are never given < 8 words
{ {
Label L1, L2; Label L;
__ bind(small); __ cmp(count, 8);
__ tbz(count, exact_log2(4), L1); __ br(Assembler::GE, L);
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); __ stop("genrate_copy_longs called with < 8 words");
__ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); __ bind(L);
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
__ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
__ bind(L1);
__ tbz(count, 1, L2);
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
__ bind(L2);
} }
#endif
__ ret(lr);
__ align(CodeEntryAlignment);
__ bind(large);
// Fill 8 registers // Fill 8 registers
__ ldp(t0, t1, Address(s, 2 * unit)); if (UseSIMDForMemoryOps) {
__ ldp(t2, t3, Address(s, 4 * unit)); __ ldpq(v0, v1, Address(s, 4 * unit));
__ ldp(t4, t5, Address(s, 6 * unit)); __ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
__ ldp(t6, t7, Address(__ pre(s, 8 * unit))); } else {
__ ldp(t0, t1, Address(s, 2 * unit));
__ ldp(t2, t3, Address(s, 4 * unit));
__ ldp(t4, t5, Address(s, 6 * unit));
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
}
__ subs(count, count, 16);
__ br(Assembler::LO, drain);
int prefetch = PrefetchCopyIntervalInBytes; int prefetch = PrefetchCopyIntervalInBytes;
bool use_stride = false; bool use_stride = false;
@ -824,38 +803,56 @@ class StubGenerator: public StubCodeGenerator {
if (PrefetchCopyIntervalInBytes > 0) if (PrefetchCopyIntervalInBytes > 0)
__ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP); __ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
__ stp(t0, t1, Address(d, 2 * unit)); if (UseSIMDForMemoryOps) {
__ ldp(t0, t1, Address(s, 2 * unit)); __ stpq(v0, v1, Address(d, 4 * unit));
__ stp(t2, t3, Address(d, 4 * unit)); __ ldpq(v0, v1, Address(s, 4 * unit));
__ ldp(t2, t3, Address(s, 4 * unit)); __ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
__ stp(t4, t5, Address(d, 6 * unit)); __ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
__ ldp(t4, t5, Address(s, 6 * unit)); } else {
__ stp(t6, t7, Address(__ pre(d, 8 * unit))); __ stp(t0, t1, Address(d, 2 * unit));
__ ldp(t6, t7, Address(__ pre(s, 8 * unit))); __ ldp(t0, t1, Address(s, 2 * unit));
__ stp(t2, t3, Address(d, 4 * unit));
__ ldp(t2, t3, Address(s, 4 * unit));
__ stp(t4, t5, Address(d, 6 * unit));
__ ldp(t4, t5, Address(s, 6 * unit));
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
}
__ subs(count, count, 8); __ subs(count, count, 8);
__ br(Assembler::HS, again); __ br(Assembler::HS, again);
// Drain // Drain
__ stp(t0, t1, Address(d, 2 * unit)); __ bind(drain);
__ stp(t2, t3, Address(d, 4 * unit)); if (UseSIMDForMemoryOps) {
__ stp(t4, t5, Address(d, 6 * unit)); __ stpq(v0, v1, Address(d, 4 * unit));
__ stp(t6, t7, Address(__ pre(d, 8 * unit))); __ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
} else {
if (direction == copy_forwards) { __ stp(t0, t1, Address(d, 2 * unit));
__ add(s, s, 2 * wordSize); __ stp(t2, t3, Address(d, 4 * unit));
__ add(d, d, 2 * wordSize); __ stp(t4, t5, Address(d, 6 * unit));
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
} }
{ {
Label L1, L2; Label L1, L2;
__ tbz(count, exact_log2(4), L1); __ tbz(count, exact_log2(4), L1);
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); if (UseSIMDForMemoryOps) {
__ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); __ ldpq(v0, v1, Address(__ pre(s, 4 * unit)));
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); __ stpq(v0, v1, Address(__ pre(d, 4 * unit)));
__ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); } else {
__ ldp(t0, t1, Address(s, 2 * unit));
__ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
__ stp(t0, t1, Address(d, 2 * unit));
__ stp(t2, t3, Address(__ pre(d, 4 * unit)));
}
__ bind(L1); __ bind(L1);
if (direction == copy_forwards) {
__ add(s, s, 2 * wordSize);
__ add(d, d, 2 * wordSize);
}
__ tbz(count, 1, L2); __ tbz(count, 1, L2);
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
@ -931,16 +928,135 @@ class StubGenerator: public StubCodeGenerator {
int granularity = uabs(step); int granularity = uabs(step);
const Register t0 = r3, t1 = r4; const Register t0 = r3, t1 = r4;
// <= 96 bytes do inline. Direction doesn't matter because we always
// load all the data before writing anything
Label copy4, copy8, copy16, copy32, copy80, copy128, copy_big, finish;
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
const Register send = r17, dend = r18;
if (PrefetchCopyIntervalInBytes > 0)
__ prfm(Address(s, 0), PLDL1KEEP);
__ cmp(count, (UseSIMDForMemoryOps ? 96:80)/granularity);
__ br(Assembler::HI, copy_big);
__ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
__ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
__ cmp(count, 16/granularity);
__ br(Assembler::LS, copy16);
__ cmp(count, 64/granularity);
__ br(Assembler::HI, copy80);
__ cmp(count, 32/granularity);
__ br(Assembler::LS, copy32);
// 33..64 bytes
if (UseSIMDForMemoryOps) {
__ ldpq(v0, v1, Address(s, 0));
__ ldpq(v2, v3, Address(send, -32));
__ stpq(v0, v1, Address(d, 0));
__ stpq(v2, v3, Address(dend, -32));
} else {
__ ldp(t0, t1, Address(s, 0));
__ ldp(t2, t3, Address(s, 16));
__ ldp(t4, t5, Address(send, -32));
__ ldp(t6, t7, Address(send, -16));
__ stp(t0, t1, Address(d, 0));
__ stp(t2, t3, Address(d, 16));
__ stp(t4, t5, Address(dend, -32));
__ stp(t6, t7, Address(dend, -16));
}
__ b(finish);
// 17..32 bytes
__ bind(copy32);
__ ldp(t0, t1, Address(s, 0));
__ ldp(t2, t3, Address(send, -16));
__ stp(t0, t1, Address(d, 0));
__ stp(t2, t3, Address(dend, -16));
__ b(finish);
// 65..80/96 bytes
// (96 bytes if SIMD because we do 32 byes per instruction)
__ bind(copy80);
if (UseSIMDForMemoryOps) {
__ ldpq(v0, v1, Address(s, 0));
__ ldpq(v2, v3, Address(s, 32));
__ ldpq(v4, v5, Address(send, -32));
__ stpq(v0, v1, Address(d, 0));
__ stpq(v2, v3, Address(d, 32));
__ stpq(v4, v5, Address(dend, -32));
} else {
__ ldp(t0, t1, Address(s, 0));
__ ldp(t2, t3, Address(s, 16));
__ ldp(t4, t5, Address(s, 32));
__ ldp(t6, t7, Address(s, 48));
__ ldp(t8, t9, Address(send, -16));
__ stp(t0, t1, Address(d, 0));
__ stp(t2, t3, Address(d, 16));
__ stp(t4, t5, Address(d, 32));
__ stp(t6, t7, Address(d, 48));
__ stp(t8, t9, Address(dend, -16));
}
__ b(finish);
// 0..16 bytes
__ bind(copy16);
__ cmp(count, 8/granularity);
__ br(Assembler::LO, copy8);
// 8..16 bytes
__ ldr(t0, Address(s, 0));
__ ldr(t1, Address(send, -8));
__ str(t0, Address(d, 0));
__ str(t1, Address(dend, -8));
__ b(finish);
if (granularity < 8) {
// 4..7 bytes
__ bind(copy8);
__ tbz(count, 2 - exact_log2(granularity), copy4);
__ ldrw(t0, Address(s, 0));
__ ldrw(t1, Address(send, -4));
__ strw(t0, Address(d, 0));
__ strw(t1, Address(dend, -4));
__ b(finish);
if (granularity < 4) {
// 0..3 bytes
__ bind(copy4);
__ cbz(count, finish); // get rid of 0 case
if (granularity == 2) {
__ ldrh(t0, Address(s, 0));
__ strh(t0, Address(d, 0));
} else { // granularity == 1
// Now 1..3 bytes. Handle the 1 and 2 byte case by copying
// the first and last byte.
// Handle the 3 byte case by loading and storing base + count/2
// (count == 1 (s+0)->(d+0), count == 2,3 (s+1) -> (d+1))
// This does means in the 1 byte case we load/store the same
// byte 3 times.
__ lsr(count, count, 1);
__ ldrb(t0, Address(s, 0));
__ ldrb(t1, Address(send, -1));
__ ldrb(t2, Address(s, count));
__ strb(t0, Address(d, 0));
__ strb(t1, Address(dend, -1));
__ strb(t2, Address(d, count));
}
__ b(finish);
}
}
__ bind(copy_big);
if (is_backwards) { if (is_backwards) {
__ lea(s, Address(s, count, Address::lsl(exact_log2(-step)))); __ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
__ lea(d, Address(d, count, Address::lsl(exact_log2(-step)))); __ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
} }
Label tail;
__ cmp(count, 16/granularity);
__ br(Assembler::LO, tail);
// Now we've got the small case out of the way we can align the // Now we've got the small case out of the way we can align the
// source address on a 2-word boundary. // source address on a 2-word boundary.
@ -986,8 +1102,6 @@ class StubGenerator: public StubCodeGenerator {
#endif #endif
} }
__ cmp(count, 16/granularity);
__ br(Assembler::LT, tail);
__ bind(aligned); __ bind(aligned);
// s is now 2-word-aligned. // s is now 2-word-aligned.
@ -1001,9 +1115,11 @@ class StubGenerator: public StubCodeGenerator {
__ bl(copy_b); __ bl(copy_b);
// And the tail. // And the tail.
__ bind(tail);
copy_memory_small(s, d, count, tmp, step); copy_memory_small(s, d, count, tmp, step);
if (granularity >= 8) __ bind(copy8);
if (granularity >= 4) __ bind(copy4);
__ bind(finish);
} }

View File

@ -1984,6 +1984,7 @@ void TemplateInterpreterGenerator::count_bytecode() {
__ push(rscratch3); __ push(rscratch3);
Label L; Label L;
__ mov(rscratch2, (address) &BytecodeCounter::_counter_value); __ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
__ prfm(Address(rscratch2), PSTL1STRM);
__ bind(L); __ bind(L);
__ ldxr(rscratch1, rscratch2); __ ldxr(rscratch1, rscratch2);
__ add(rscratch1, rscratch1, 1); __ add(rscratch1, rscratch1, 1);

View File

@ -61,6 +61,10 @@
#define HWCAP_CRC32 (1<<7) #define HWCAP_CRC32 (1<<7)
#endif #endif
#ifndef HWCAP_ATOMICS
#define HWCAP_ATOMICS (1<<8)
#endif
int VM_Version::_cpu; int VM_Version::_cpu;
int VM_Version::_model; int VM_Version::_model;
int VM_Version::_model2; int VM_Version::_model2;
@ -172,6 +176,7 @@ void VM_Version::get_processor_features() {
if (auxv & HWCAP_AES) strcat(buf, ", aes"); if (auxv & HWCAP_AES) strcat(buf, ", aes");
if (auxv & HWCAP_SHA1) strcat(buf, ", sha1"); if (auxv & HWCAP_SHA1) strcat(buf, ", sha1");
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
_features_string = os::strdup(buf); _features_string = os::strdup(buf);
@ -191,6 +196,15 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
} }
if (auxv & HWCAP_ATOMICS) {
if (FLAG_IS_DEFAULT(UseLSE))
FLAG_SET_DEFAULT(UseLSE, true);
} else {
if (UseLSE) {
warning("UseLSE specified, but not supported on this CPU");
}
}
if (auxv & HWCAP_AES) { if (auxv & HWCAP_AES) {
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
UseAESIntrinsics = UseAESIntrinsics =

View File

@ -47,7 +47,7 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
// The expected size in bytes of a cache line, used to pad data structures. // The expected size in bytes of a cache line, used to pad data structures.
#define DEFAULT_CACHE_LINE_SIZE 128 #define DEFAULT_CACHE_LINE_SIZE 128
#if defined(COMPILER2) && defined(AIX) #if defined(COMPILER2) && (defined(AIX) || defined(linux))
// Include Transactional Memory lock eliding optimization // Include Transactional Memory lock eliding optimization
#define INCLUDE_RTM_OPT 1 #define INCLUDE_RTM_OPT 1
#endif #endif

View File

@ -76,6 +76,8 @@ define_pd_global(uintx, TypeProfileLevel, 111);
define_pd_global(bool, CompactStrings, true); define_pd_global(bool, CompactStrings, true);
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
// Platform dependent flag handling: flags only defined on this platform. // Platform dependent flag handling: flags only defined on this platform.
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
\ \

View File

@ -2137,8 +2137,6 @@ MachTypeNode *Matcher::make_decode_node() {
return decode; return decode;
} }
*/ */
// Threshold size for cleararray.
const int Matcher::init_array_short_size = 8 * BytesPerLong;
// false => size gets scaled to BytesPerLong, ok. // false => size gets scaled to BytesPerLong, ok.
const bool Matcher::init_array_count_is_in_bytes = false; const bool Matcher::init_array_count_is_in_bytes = false;

View File

@ -255,7 +255,16 @@ void VM_Version::initialize() {
} }
#endif #endif
#ifdef linux #ifdef linux
// TODO: check kernel version (we currently have too old versions only) // At least Linux kernel 4.2, as the problematic behavior of syscalls
// being called in the middle of a transaction has been addressed.
// Please, refer to commit b4b56f9ecab40f3b4ef53e130c9f6663be491894
// in Linux kernel source tree: https://goo.gl/Kc5i7A
if (os::Linux::os_version_is_known()) {
if (os::Linux::os_version() >= 0x040200)
os_too_old = false;
} else {
vm_exit_during_initialization("RTM can not be enabled: kernel version is unknown.");
}
#endif #endif
if (os_too_old) { if (os_too_old) {
vm_exit_during_initialization("RTM is not supported on this OS version."); vm_exit_during_initialization("RTM is not supported on this OS version.");

View File

@ -90,6 +90,8 @@ define_pd_global(uintx, TypeProfileLevel, 111);
define_pd_global(bool, CompactStrings, true); define_pd_global(bool, CompactStrings, true);
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
\ \
product(intx, UseVIS, 99, \ product(intx, UseVIS, 99, \

View File

@ -1980,9 +1980,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
// No scaling for the parameter the ClearArray node. // No scaling for the parameter the ClearArray node.
const bool Matcher::init_array_count_is_in_bytes = true; const bool Matcher::init_array_count_is_in_bytes = true;
// Threshold size for cleararray.
const int Matcher::init_array_short_size = 8 * BytesPerLong;
// No additional cost for CMOVL. // No additional cost for CMOVL.
const int Matcher::long_cmove_cost() { return 0; } const int Matcher::long_cmove_cost() { return 0; }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -777,6 +777,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case 0x6E: // movd case 0x6E: // movd
case 0x7E: // movd case 0x7E: // movd
case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
case 0xFE: // paddd
debug_only(has_disp32 = true); debug_only(has_disp32 = true);
break; break;
@ -926,6 +927,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
ip++; // skip P2, move to opcode ip++; // skip P2, move to opcode
// To find the end of instruction (which == end_pc_operand). // To find the end of instruction (which == end_pc_operand).
switch (0xFF & *ip) { switch (0xFF & *ip) {
case 0x22: // pinsrd r, r/a, #8
case 0x61: // pcmpestri r, r/a, #8 case 0x61: // pcmpestri r, r/a, #8
case 0x70: // pshufd r, r/a, #8 case 0x70: // pshufd r, r/a, #8
case 0x73: // psrldq r, #8 case 0x73: // psrldq r, #8
@ -3953,6 +3955,83 @@ void Assembler::setb(Condition cc, Register dst) {
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
} }
void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_ssse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x0F);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}
void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x0E);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}
void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sha(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0xCC);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8((unsigned char)imm8);
}
void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sha(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xC8);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sha(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xC9);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sha(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xCA);
emit_int8((unsigned char)(0xC0 | encode));
}
// xmm0 is implicit additional source to this instruction.
void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sha(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xCB);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sha(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xCC);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sha(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xCD);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::shll(Register dst, int imm8) { void Assembler::shll(Register dst, int imm8) {
assert(isShiftCount(imm8), "illegal shift count"); assert(isShiftCount(imm8), "illegal shift count");
int encode = prefix_and_encode(dst->encoding()); int encode = prefix_and_encode(dst->encoding());
@ -4931,6 +5010,15 @@ void Assembler::paddd(XMMRegister dst, XMMRegister src) {
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
} }
void Assembler::paddd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFE);
emit_operand(dst, src);
}
void Assembler::paddq(XMMRegister dst, XMMRegister src) { void Assembler::paddq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@ -5611,8 +5699,9 @@ void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_
} }
void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
@ -5621,11 +5710,12 @@ void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits // 0x00 - insert into lower 128 bits
// 0x01 - insert into upper 128 bits // 0x01 - insert into upper 128 bits
emit_int8(0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@ -5633,26 +5723,29 @@ void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 256 bits // 0x00 - insert into lower 256 bits
// 0x01 - insert into upper 256 bits // 0x01 - insert into upper 256 bits
emit_int8(value & 0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) { void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity"); assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
// swap src<->dst for encoding // swap src<->dst for encoding
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x1A); emit_int8(0x1A);
emit_operand(dst, src); emit_operand(dst, src);
// 0x00 - insert into lower 256 bits // 0x00 - insert into lower 256 bits
// 0x01 - insert into upper 128 bits // 0x01 - insert into upper 256 bits
emit_int8(value & 0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@ -5662,57 +5755,64 @@ void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
// 0x01 - insert into q1 128 bits (128..255) // 0x01 - insert into q1 128 bits (128..255)
// 0x02 - insert into q2 128 bits (256..383) // 0x02 - insert into q2 128 bits (256..383)
// 0x03 - insert into q3 128 bits (384..511) // 0x03 - insert into q3 128 bits (384..511)
emit_int8(value & 0x3); emit_int8(imm8 & 0x03);
} }
void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) { void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity"); assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x03, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
// swap src<->dst for encoding // swap src<->dst for encoding
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18); emit_int8(0x18);
emit_operand(dst, src); emit_operand(dst, src);
// 0x00 - insert into q0 128 bits (0..127) // 0x00 - insert into q0 128 bits (0..127)
// 0x01 - insert into q1 128 bits (128..255) // 0x01 - insert into q1 128 bits (128..255)
// 0x02 - insert into q2 128 bits (256..383) // 0x02 - insert into q2 128 bits (256..383)
// 0x03 - insert into q3 128 bits (384..511) // 0x03 - insert into q3 128 bits (384..511)
emit_int8(value & 0x3); emit_int8(imm8 & 0x03);
} }
void Assembler::vinsertf128h(XMMRegister dst, Address src) { void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity"); assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
// swap src<->dst for encoding // swap src<->dst for encoding
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18); emit_int8(0x18);
emit_operand(dst, src); emit_operand(dst, src);
// 0x00 - insert into lower 128 bits
// 0x01 - insert into upper 128 bits // 0x01 - insert into upper 128 bits
emit_int8(0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) { void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19); emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits // 0x00 - extract from lower 128 bits
// 0x01 - insert into upper 128 bits // 0x01 - extract from upper 128 bits
emit_int8(0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vextractf128h(Address dst, XMMRegister src) { void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
assert(src != xnoreg, "sanity"); assert(src != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
@ -5720,12 +5820,14 @@ void Assembler::vextractf128h(Address dst, XMMRegister src) {
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19); emit_int8(0x19);
emit_operand(src, dst); emit_operand(src, dst);
// 0x00 - extract from lower 128 bits
// 0x01 - extract from upper 128 bits // 0x01 - extract from upper 128 bits
emit_int8(0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx2(), ""); assert(VM_Version::supports_avx2(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
@ -5734,11 +5836,12 @@ void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits // 0x00 - insert into lower 128 bits
// 0x01 - insert into upper 128 bits // 0x01 - insert into upper 128 bits
emit_int8(0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@ -5746,39 +5849,44 @@ void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 256 bits // 0x00 - insert into lower 256 bits
// 0x01 - insert into upper 256 bits // 0x01 - insert into upper 256 bits
emit_int8(value & 0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vinserti128h(XMMRegister dst, Address src) { void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_avx2(), ""); assert(VM_Version::supports_avx2(), "");
assert(dst != xnoreg, "sanity"); assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
// swap src<->dst for encoding // swap src<->dst for encoding
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x38); emit_int8(0x38);
emit_operand(dst, src); emit_operand(dst, src);
// 0x00 - insert into lower 128 bits
// 0x01 - insert into upper 128 bits // 0x01 - insert into upper 128 bits
emit_int8(0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) { void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39); emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits // 0x00 - extract from lower 128 bits
// 0x01 - insert into upper 128 bits // 0x01 - extract from upper 128 bits
emit_int8(0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vextracti128h(Address dst, XMMRegister src) { void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx2(), ""); assert(VM_Version::supports_avx2(), "");
assert(src != xnoreg, "sanity"); assert(src != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
@ -5786,47 +5894,53 @@ void Assembler::vextracti128h(Address dst, XMMRegister src) {
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39); emit_int8(0x39);
emit_operand(src, dst); emit_operand(src, dst);
// 0x00 - extract from lower 128 bits
// 0x01 - extract from upper 128 bits // 0x01 - extract from upper 128 bits
emit_int8(0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) { void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x3B); emit_int8(0x3B);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - extract from lower 256 bits // 0x00 - extract from lower 256 bits
// 0x01 - extract from upper 256 bits // 0x01 - extract from upper 256 bits
emit_int8(value & 0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) { void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39); emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - extract from bits 127:0
// 0x01 - extract from bits 255:128 // 0x01 - extract from bits 255:128
// 0x02 - extract from bits 383:256 // 0x02 - extract from bits 383:256
// 0x03 - extract from bits 511:384 // 0x03 - extract from bits 511:384
emit_int8(value & 0x3); emit_int8(imm8 & 0x03);
} }
void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) { void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x1B); emit_int8(0x1B);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - extract from lower 256 bits // 0x00 - extract from lower 256 bits
// 0x01 - extract from upper 256 bits // 0x01 - extract from upper 256 bits
emit_int8(value & 0x1); emit_int8(imm8 & 0x01);
} }
void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) { void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity"); assert(src != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit);
@ -5835,11 +5949,12 @@ void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
emit_operand(src, dst); emit_operand(src, dst);
// 0x00 - extract from lower 256 bits // 0x00 - extract from lower 256 bits
// 0x01 - extract from upper 256 bits // 0x01 - extract from upper 256 bits
emit_int8(value & 0x01); emit_int8(imm8 & 0x01);
} }
void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) { void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@ -5849,12 +5964,13 @@ void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
// 0x01 - extract from bits 255:128 // 0x01 - extract from bits 255:128
// 0x02 - extract from bits 383:256 // 0x02 - extract from bits 383:256
// 0x03 - extract from bits 511:384 // 0x03 - extract from bits 511:384
emit_int8(value & 0x3); emit_int8(imm8 & 0x03);
} }
void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) { void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity"); assert(src != xnoreg, "sanity");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
@ -5865,19 +5981,21 @@ void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
// 0x01 - extract from bits 255:128 // 0x01 - extract from bits 255:128
// 0x02 - extract from bits 383:256 // 0x02 - extract from bits 383:256
// 0x03 - extract from bits 511:384 // 0x03 - extract from bits 511:384
emit_int8(value & 0x3); emit_int8(imm8 & 0x03);
} }
void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) { void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19); emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - extract from bits 127:0
// 0x01 - extract from bits 255:128 // 0x01 - extract from bits 255:128
// 0x02 - extract from bits 383:256 // 0x02 - extract from bits 383:256
// 0x03 - extract from bits 511:384 // 0x03 - extract from bits 511:384
emit_int8(value & 0x3); emit_int8(imm8 & 0x03);
} }
// duplicate 4-bytes integer data from src into 8 locations in dest // duplicate 4-bytes integer data from src into 8 locations in dest

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -1672,6 +1672,18 @@ private:
void setb(Condition cc, Register dst); void setb(Condition cc, Register dst);
void palignr(XMMRegister dst, XMMRegister src, int imm8);
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
void sha1nexte(XMMRegister dst, XMMRegister src);
void sha1msg1(XMMRegister dst, XMMRegister src);
void sha1msg2(XMMRegister dst, XMMRegister src);
// xmm0 is implicit additional source to the following instruction.
void sha256rnds2(XMMRegister dst, XMMRegister src);
void sha256msg1(XMMRegister dst, XMMRegister src);
void sha256msg2(XMMRegister dst, XMMRegister src);
void shldl(Register dst, Register src); void shldl(Register dst, Register src);
void shldl(Register dst, Register src, int8_t imm8); void shldl(Register dst, Register src, int8_t imm8);
@ -1868,6 +1880,7 @@ private:
void paddb(XMMRegister dst, XMMRegister src); void paddb(XMMRegister dst, XMMRegister src);
void paddw(XMMRegister dst, XMMRegister src); void paddw(XMMRegister dst, XMMRegister src);
void paddd(XMMRegister dst, XMMRegister src); void paddd(XMMRegister dst, XMMRegister src);
void paddd(XMMRegister dst, Address src);
void paddq(XMMRegister dst, XMMRegister src); void paddq(XMMRegister dst, XMMRegister src);
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@ -1958,33 +1971,31 @@ private:
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
// Copy low 128bit into high 128bit of YMM registers. // 128bit copy from/to 256bit (YMM) vector registers
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vextractf128h(XMMRegister dst, XMMRegister src); void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vextracti128h(XMMRegister dst, XMMRegister src); void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
// Load/store high 128bit of YMM registers which does not destroy other half. // 256bit copy from/to 512bit (ZMM) vector registers
void vinsertf128h(XMMRegister dst, Address src); void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti128h(XMMRegister dst, Address src); void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vextractf128h(Address dst, XMMRegister src); void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vextracti128h(Address dst, XMMRegister src); void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
// Copy low 256bit into high 256bit of ZMM registers. // 128bit copy from/to 256bit (YMM) or 512bit (ZMM) vector registers
void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vextracti64x4h(XMMRegister dst, XMMRegister src, int value); void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vextractf64x4h(XMMRegister dst, XMMRegister src, int value); void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
void vextractf64x4h(Address dst, XMMRegister src, int value); void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinsertf64x4h(XMMRegister dst, Address src, int value); void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
// Copy targeted 128bit segments of the ZMM registers
void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
void vextractf32x4h(Address dst, XMMRegister src, int value);
void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
void vinsertf32x4h(XMMRegister dst, Address src, int value);
// duplicate 4-bytes integer data from src into 8 locations in dest // duplicate 4-bytes integer data from src into 8 locations in dest
void vpbroadcastd(XMMRegister dst, XMMRegister src); void vpbroadcastd(XMMRegister dst, XMMRegister src);

View File

@ -97,6 +97,8 @@ define_pd_global(bool, CompactStrings, true);
define_pd_global(bool, PreserveFramePointer, false); define_pd_global(bool, PreserveFramePointer, false);
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
\ \
develop(bool, IEEEPrecision, true, \ develop(bool, IEEEPrecision, true, \

View File

@ -3445,7 +3445,7 @@ void MacroAssembler::movptr(Address dst, Register src) {
void MacroAssembler::movdqu(Address dst, XMMRegister src) { void MacroAssembler::movdqu(Address dst, XMMRegister src) {
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
Assembler::vextractf32x4h(dst, src, 0); Assembler::vextractf32x4(dst, src, 0);
} else { } else {
Assembler::movdqu(dst, src); Assembler::movdqu(dst, src);
} }
@ -3453,7 +3453,7 @@ void MacroAssembler::movdqu(Address dst, XMMRegister src) {
void MacroAssembler::movdqu(XMMRegister dst, Address src) { void MacroAssembler::movdqu(XMMRegister dst, Address src) {
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
Assembler::vinsertf32x4h(dst, src, 0); Assembler::vinsertf32x4(dst, dst, src, 0);
} else { } else {
Assembler::movdqu(dst, src); Assembler::movdqu(dst, src);
} }
@ -3478,7 +3478,7 @@ void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::vmovdqu(Address dst, XMMRegister src) { void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
Assembler::vextractf64x4h(dst, src, 0); vextractf64x4_low(dst, src);
} else { } else {
Assembler::vmovdqu(dst, src); Assembler::vmovdqu(dst, src);
} }
@ -3486,7 +3486,7 @@ void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
Assembler::vinsertf64x4h(dst, src, 0); vinsertf64x4_low(dst, src);
} else { } else {
Assembler::vmovdqu(dst, src); Assembler::vmovdqu(dst, src);
} }
@ -5649,14 +5649,14 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
// Save upper half of ZMM registers // Save upper half of ZMM registers
subptr(rsp, 32*num_xmm_regs); subptr(rsp, 32*num_xmm_regs);
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
} }
} }
assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
// Save upper half of YMM registers // Save upper half of YMM registers
subptr(rsp, 16*num_xmm_regs); subptr(rsp, 16*num_xmm_regs);
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
} }
} }
#endif #endif
@ -5665,7 +5665,7 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
#ifdef _LP64 #ifdef _LP64
if (VM_Version::supports_evex()) { if (VM_Version::supports_evex()) {
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0); vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
} }
} else { } else {
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
@ -5753,7 +5753,7 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
#ifdef _LP64 #ifdef _LP64
if (VM_Version::supports_evex()) { if (VM_Version::supports_evex()) {
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0); vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
} }
} else { } else {
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
@ -5771,12 +5771,12 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
if (MaxVectorSize > 16) { if (MaxVectorSize > 16) {
// Restore upper half of YMM registers. // Restore upper half of YMM registers.
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
} }
addptr(rsp, 16*num_xmm_regs); addptr(rsp, 16*num_xmm_regs);
if(UseAVX > 2) { if(UseAVX > 2) {
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
} }
addptr(rsp, 32*num_xmm_regs); addptr(rsp, 32*num_xmm_regs);
} }
@ -7198,21 +7198,50 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
} }
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) { void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, bool is_large) {
// cnt - number of qwords (8-byte words). // cnt - number of qwords (8-byte words).
// base - start address, qword aligned. // base - start address, qword aligned.
// is_large - if optimizers know cnt is larger than InitArrayShortSize
assert(base==rdi, "base register must be edi for rep stos"); assert(base==rdi, "base register must be edi for rep stos");
assert(tmp==rax, "tmp register must be eax for rep stos"); assert(tmp==rax, "tmp register must be eax for rep stos");
assert(cnt==rcx, "cnt register must be ecx for rep stos"); assert(cnt==rcx, "cnt register must be ecx for rep stos");
assert(InitArrayShortSize % BytesPerLong == 0,
"InitArrayShortSize should be the multiple of BytesPerLong");
Label DONE;
xorptr(tmp, tmp); xorptr(tmp, tmp);
if (!is_large) {
Label LOOP, LONG;
cmpptr(cnt, InitArrayShortSize/BytesPerLong);
jccb(Assembler::greater, LONG);
NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
decrement(cnt);
jccb(Assembler::negative, DONE); // Zero length
// Use individual pointer-sized stores for small counts:
BIND(LOOP);
movptr(Address(base, cnt, Address::times_ptr), tmp);
decrement(cnt);
jccb(Assembler::greaterEqual, LOOP);
jmpb(DONE);
BIND(LONG);
}
// Use longer rep-prefixed ops for non-small counts:
if (UseFastStosb) { if (UseFastStosb) {
shlptr(cnt,3); // convert to number of bytes shlptr(cnt, 3); // convert to number of bytes
rep_stosb(); rep_stosb();
} else { } else {
NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
rep_stos(); rep_stos();
} }
BIND(DONE);
} }
#ifdef COMPILER2 #ifdef COMPILER2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -48,7 +48,6 @@ class MacroAssembler: public Assembler {
// This is the base routine called by the different versions of call_VM_leaf. The interpreter // This is the base routine called by the different versions of call_VM_leaf. The interpreter
// may customize this version by overriding it for its purposes (e.g., to save/restore // may customize this version by overriding it for its purposes (e.g., to save/restore
// additional registers when doing a VM call). // additional registers when doing a VM call).
#define COMMA ,
virtual void call_VM_leaf_base( virtual void call_VM_leaf_base(
address entry_point, // the entry point address entry_point, // the entry point
@ -903,35 +902,66 @@ class MacroAssembler: public Assembler {
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
void ldmxcsr(AddressLiteral src); void ldmxcsr(AddressLiteral src);
void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
Register buf, Register state, Register ofs, Register limit, Register rsp,
bool multi_block);
#ifdef _LP64
void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
Register buf, Register state, Register ofs, Register limit, Register rsp,
bool multi_block, XMMRegister shuf_mask);
#else
void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
Register buf, Register state, Register ofs, Register limit, Register rsp,
bool multi_block);
#endif
void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp); Register rax, Register rcx, Register rdx, Register tmp);
#ifdef _LP64
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1 LP64_ONLY(COMMA Register tmp2)); Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx NOT_LP64(COMMA Register tmp) LP64_ONLY(COMMA Register tmp1) Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
LP64_ONLY(COMMA Register tmp2) LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4));
void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rbx LP64_ONLY(COMMA Register rcx), Register rdx Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2,
LP64_ONLY(COMMA Register tmp1) LP64_ONLY(COMMA Register tmp2) Register tmp3, Register tmp4);
LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4));
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx NOT_LP64(COMMA Register tmp) Register rax, Register rcx, Register rdx, Register tmp1,
LP64_ONLY(COMMA Register r8) LP64_ONLY(COMMA Register r9) Register tmp2, Register tmp3, Register tmp4);
LP64_ONLY(COMMA Register r10) LP64_ONLY(COMMA Register r11)); #else
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp);
void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rbx, Register rdx);
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
#ifndef _LP64
void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
Register edx, Register ebx, Register esi, Register edi, Register edx, Register ebx, Register esi, Register edi,
Register ebp, Register esp); Register ebp, Register esp);
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
Register esi, Register edi, Register ebp, Register esp); Register esi, Register edi, Register ebp, Register esp);
#endif #endif
@ -1185,14 +1215,131 @@ public:
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
// Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector. void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { if (UseAVX > 1) { // vinserti128 is available only in AVX2
if (UseAVX > 1) // vinserti128h is available only in AVX2 Assembler::vinserti128(dst, nds, src, imm8);
Assembler::vinserti128h(dst, nds, src); } else {
else Assembler::vinsertf128(dst, nds, src, imm8);
Assembler::vinsertf128h(dst, nds, src); }
} }
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
if (UseAVX > 1) { // vinserti128 is available only in AVX2
Assembler::vinserti128(dst, nds, src, imm8);
} else {
Assembler::vinsertf128(dst, nds, src, imm8);
}
}
void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
if (UseAVX > 1) { // vextracti128 is available only in AVX2
Assembler::vextracti128(dst, src, imm8);
} else {
Assembler::vextractf128(dst, src, imm8);
}
}
void vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
if (UseAVX > 1) { // vextracti128 is available only in AVX2
Assembler::vextracti128(dst, src, imm8);
} else {
Assembler::vextractf128(dst, src, imm8);
}
}
// 128bit copy to/from high 128 bits of 256bit (YMM) vector registers
void vinserti128_high(XMMRegister dst, XMMRegister src) {
vinserti128(dst, dst, src, 1);
}
void vinserti128_high(XMMRegister dst, Address src) {
vinserti128(dst, dst, src, 1);
}
void vextracti128_high(XMMRegister dst, XMMRegister src) {
vextracti128(dst, src, 1);
}
void vextracti128_high(Address dst, XMMRegister src) {
vextracti128(dst, src, 1);
}
void vinsertf128_high(XMMRegister dst, XMMRegister src) {
vinsertf128(dst, dst, src, 1);
}
void vinsertf128_high(XMMRegister dst, Address src) {
vinsertf128(dst, dst, src, 1);
}
void vextractf128_high(XMMRegister dst, XMMRegister src) {
vextractf128(dst, src, 1);
}
void vextractf128_high(Address dst, XMMRegister src) {
vextractf128(dst, src, 1);
}
// 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers
void vinserti64x4_high(XMMRegister dst, XMMRegister src) {
vinserti64x4(dst, dst, src, 1);
}
void vinsertf64x4_high(XMMRegister dst, XMMRegister src) {
vinsertf64x4(dst, dst, src, 1);
}
void vextracti64x4_high(XMMRegister dst, XMMRegister src) {
vextracti64x4(dst, src, 1);
}
void vextractf64x4_high(XMMRegister dst, XMMRegister src) {
vextractf64x4(dst, src, 1);
}
void vextractf64x4_high(Address dst, XMMRegister src) {
vextractf64x4(dst, src, 1);
}
void vinsertf64x4_high(XMMRegister dst, Address src) {
vinsertf64x4(dst, dst, src, 1);
}
// 128bit copy to/from low 128 bits of 256bit (YMM) vector registers
void vinserti128_low(XMMRegister dst, XMMRegister src) {
vinserti128(dst, dst, src, 0);
}
void vinserti128_low(XMMRegister dst, Address src) {
vinserti128(dst, dst, src, 0);
}
void vextracti128_low(XMMRegister dst, XMMRegister src) {
vextracti128(dst, src, 0);
}
void vextracti128_low(Address dst, XMMRegister src) {
vextracti128(dst, src, 0);
}
void vinsertf128_low(XMMRegister dst, XMMRegister src) {
vinsertf128(dst, dst, src, 0);
}
void vinsertf128_low(XMMRegister dst, Address src) {
vinsertf128(dst, dst, src, 0);
}
void vextractf128_low(XMMRegister dst, XMMRegister src) {
vextractf128(dst, src, 0);
}
void vextractf128_low(Address dst, XMMRegister src) {
vextractf128(dst, src, 0);
}
// 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers
void vinserti64x4_low(XMMRegister dst, XMMRegister src) {
vinserti64x4(dst, dst, src, 0);
}
void vinsertf64x4_low(XMMRegister dst, XMMRegister src) {
vinsertf64x4(dst, dst, src, 0);
}
void vextracti64x4_low(XMMRegister dst, XMMRegister src) {
vextracti64x4(dst, src, 0);
}
void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
vextractf64x4(dst, src, 0);
}
void vextractf64x4_low(Address dst, XMMRegister src) {
vextractf64x4(dst, src, 0);
}
void vinsertf64x4_low(XMMRegister dst, Address src) {
vinsertf64x4(dst, dst, src, 0);
}
// Carry-Less Multiplication Quadword // Carry-Less Multiplication Quadword
void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
// 0x00 - multiply lower 64 bits [0:63] // 0x00 - multiply lower 64 bits [0:63]
@ -1284,8 +1431,9 @@ public:
// C2 compiled method's prolog code. // C2 compiled method's prolog code.
void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b); void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b);
// clear memory of size 'cnt' qwords, starting at 'base'. // clear memory of size 'cnt' qwords, starting at 'base';
void clear_mem(Register base, Register cnt, Register rtmp); // if 'is_large' is set, do not try to produce short loop
void clear_mem(Register base, Register cnt, Register rtmp, bool is_large);
#ifdef COMPILER2 #ifdef COMPILER2
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,

View File

@ -0,0 +1,495 @@
/*
* Copyright (c) 2016, Intel Corporation.
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
// ofs and limit are used for multi-block byte array.
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) {
Label start, done_hash, loop0;
address upper_word_mask = StubRoutines::x86::upper_word_mask_addr();
address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr();
bind(start);
movdqu(abcd, Address(state, 0));
pinsrd(e0, Address(state, 16), 3);
movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000
pand(e0, shuf_mask);
pshufd(abcd, abcd, 0x1B);
movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f
bind(loop0);
// Save hash values for addition after rounds
movdqu(Address(rsp, 0), e0);
movdqu(Address(rsp, 16), abcd);
// Rounds 0 - 3
movdqu(msg0, Address(buf, 0));
pshufb(msg0, shuf_mask);
paddd(e0, msg0);
movdqa(e1, abcd);
sha1rnds4(abcd, e0, 0);
// Rounds 4 - 7
movdqu(msg1, Address(buf, 16));
pshufb(msg1, shuf_mask);
sha1nexte(e1, msg1);
movdqa(e0, abcd);
sha1rnds4(abcd, e1, 0);
sha1msg1(msg0, msg1);
// Rounds 8 - 11
movdqu(msg2, Address(buf, 32));
pshufb(msg2, shuf_mask);
sha1nexte(e0, msg2);
movdqa(e1, abcd);
sha1rnds4(abcd, e0, 0);
sha1msg1(msg1, msg2);
pxor(msg0, msg2);
// Rounds 12 - 15
movdqu(msg3, Address(buf, 48));
pshufb(msg3, shuf_mask);
sha1nexte(e1, msg3);
movdqa(e0, abcd);
sha1msg2(msg0, msg3);
sha1rnds4(abcd, e1, 0);
sha1msg1(msg2, msg3);
pxor(msg1, msg3);
// Rounds 16 - 19
sha1nexte(e0, msg0);
movdqa(e1, abcd);
sha1msg2(msg1, msg0);
sha1rnds4(abcd, e0, 0);
sha1msg1(msg3, msg0);
pxor(msg2, msg0);
// Rounds 20 - 23
sha1nexte(e1, msg1);
movdqa(e0, abcd);
sha1msg2(msg2, msg1);
sha1rnds4(abcd, e1, 1);
sha1msg1(msg0, msg1);
pxor(msg3, msg1);
// Rounds 24 - 27
sha1nexte(e0, msg2);
movdqa(e1, abcd);
sha1msg2(msg3, msg2);
sha1rnds4(abcd, e0, 1);
sha1msg1(msg1, msg2);
pxor(msg0, msg2);
// Rounds 28 - 31
sha1nexte(e1, msg3);
movdqa(e0, abcd);
sha1msg2(msg0, msg3);
sha1rnds4(abcd, e1, 1);
sha1msg1(msg2, msg3);
pxor(msg1, msg3);
// Rounds 32 - 35
sha1nexte(e0, msg0);
movdqa(e1, abcd);
sha1msg2(msg1, msg0);
sha1rnds4(abcd, e0, 1);
sha1msg1(msg3, msg0);
pxor(msg2, msg0);
// Rounds 36 - 39
sha1nexte(e1, msg1);
movdqa(e0, abcd);
sha1msg2(msg2, msg1);
sha1rnds4(abcd, e1, 1);
sha1msg1(msg0, msg1);
pxor(msg3, msg1);
// Rounds 40 - 43
sha1nexte(e0, msg2);
movdqa(e1, abcd);
sha1msg2(msg3, msg2);
sha1rnds4(abcd, e0, 2);
sha1msg1(msg1, msg2);
pxor(msg0, msg2);
// Rounds 44 - 47
sha1nexte(e1, msg3);
movdqa(e0, abcd);
sha1msg2(msg0, msg3);
sha1rnds4(abcd, e1, 2);
sha1msg1(msg2, msg3);
pxor(msg1, msg3);
// Rounds 48 - 51
sha1nexte(e0, msg0);
movdqa(e1, abcd);
sha1msg2(msg1, msg0);
sha1rnds4(abcd, e0, 2);
sha1msg1(msg3, msg0);
pxor(msg2, msg0);
// Rounds 52 - 55
sha1nexte(e1, msg1);
movdqa(e0, abcd);
sha1msg2(msg2, msg1);
sha1rnds4(abcd, e1, 2);
sha1msg1(msg0, msg1);
pxor(msg3, msg1);
// Rounds 56 - 59
sha1nexte(e0, msg2);
movdqa(e1, abcd);
sha1msg2(msg3, msg2);
sha1rnds4(abcd, e0, 2);
sha1msg1(msg1, msg2);
pxor(msg0, msg2);
// Rounds 60 - 63
sha1nexte(e1, msg3);
movdqa(e0, abcd);
sha1msg2(msg0, msg3);
sha1rnds4(abcd, e1, 3);
sha1msg1(msg2, msg3);
pxor(msg1, msg3);
// Rounds 64 - 67
sha1nexte(e0, msg0);
movdqa(e1, abcd);
sha1msg2(msg1, msg0);
sha1rnds4(abcd, e0, 3);
sha1msg1(msg3, msg0);
pxor(msg2, msg0);
// Rounds 68 - 71
sha1nexte(e1, msg1);
movdqa(e0, abcd);
sha1msg2(msg2, msg1);
sha1rnds4(abcd, e1, 3);
pxor(msg3, msg1);
// Rounds 72 - 75
sha1nexte(e0, msg2);
movdqa(e1, abcd);
sha1msg2(msg3, msg2);
sha1rnds4(abcd, e0, 3);
// Rounds 76 - 79
sha1nexte(e1, msg3);
movdqa(e0, abcd);
sha1rnds4(abcd, e1, 3);
// add current hash values with previously saved
movdqu(msg0, Address(rsp, 0));
sha1nexte(e0, msg0);
movdqu(msg0, Address(rsp, 16));
paddd(abcd, msg0);
if (multi_block) {
// increment data pointer and loop if more to process
addptr(buf, 64);
addptr(ofs, 64);
cmpptr(ofs, limit);
jcc(Assembler::belowEqual, loop0);
movptr(rax, ofs); //return ofs
}
// write hash values back in the correct order
pshufd(abcd, abcd, 0x1b);
movdqu(Address(state, 0), abcd);
pextrd(Address(state, 16), e0, 3);
bind(done_hash);
}
// xmm0 (msg) is used as an implicit argument to sh256rnds2
// and state0 and state1 can never use xmm0 register.
// ofs and limit are used for multi-block byte array.
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
#ifdef _LP64
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
Register buf, Register state, Register ofs, Register limit, Register rsp,
bool multi_block, XMMRegister shuf_mask) {
#else
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
Register buf, Register state, Register ofs, Register limit, Register rsp,
bool multi_block) {
#endif
Label start, done_hash, loop0;
address K256 = StubRoutines::x86::k256_addr();
address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr();
bind(start);
movdqu(state0, Address(state, 0));
movdqu(state1, Address(state, 16));
pshufd(state0, state0, 0xB1);
pshufd(state1, state1, 0x1B);
movdqa(msgtmp4, state0);
palignr(state0, state1, 8);
pblendw(state1, msgtmp4, 0xF0);
#ifdef _LP64
movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask));
#endif
lea(rax, ExternalAddress(K256));
bind(loop0);
movdqu(Address(rsp, 0), state0);
movdqu(Address(rsp, 16), state1);
// Rounds 0-3
movdqu(msg, Address(buf, 0));
#ifdef _LP64
pshufb(msg, shuf_mask);
#else
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
#endif
movdqa(msgtmp0, msg);
paddd(msg, Address(rax, 0));
sha256rnds2(state1, state0);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
// Rounds 4-7
movdqu(msg, Address(buf, 16));
#ifdef _LP64
pshufb(msg, shuf_mask);
#else
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
#endif
movdqa(msgtmp1, msg);
paddd(msg, Address(rax, 16));
sha256rnds2(state1, state0);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp0, msgtmp1);
// Rounds 8-11
movdqu(msg, Address(buf, 32));
#ifdef _LP64
pshufb(msg, shuf_mask);
#else
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
#endif
movdqa(msgtmp2, msg);
paddd(msg, Address(rax, 32));
sha256rnds2(state1, state0);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp1, msgtmp2);
// Rounds 12-15
movdqu(msg, Address(buf, 48));
#ifdef _LP64
pshufb(msg, shuf_mask);
#else
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
#endif
movdqa(msgtmp3, msg);
paddd(msg, Address(rax, 48));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp3);
palignr(msgtmp4, msgtmp2, 4);
paddd(msgtmp0, msgtmp4);
sha256msg2(msgtmp0, msgtmp3);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp2, msgtmp3);
// Rounds 16-19
movdqa(msg, msgtmp0);
paddd(msg, Address(rax, 64));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp0);
palignr(msgtmp4, msgtmp3, 4);
paddd(msgtmp1, msgtmp4);
sha256msg2(msgtmp1, msgtmp0);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp3, msgtmp0);
// Rounds 20-23
movdqa(msg, msgtmp1);
paddd(msg, Address(rax, 80));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp1);
palignr(msgtmp4, msgtmp0, 4);
paddd(msgtmp2, msgtmp4);
sha256msg2(msgtmp2, msgtmp1);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp0, msgtmp1);
// Rounds 24-27
movdqa(msg, msgtmp2);
paddd(msg, Address(rax, 96));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp2);
palignr(msgtmp4, msgtmp1, 4);
paddd(msgtmp3, msgtmp4);
sha256msg2(msgtmp3, msgtmp2);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp1, msgtmp2);
// Rounds 28-31
movdqa(msg, msgtmp3);
paddd(msg, Address(rax, 112));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp3);
palignr(msgtmp4, msgtmp2, 4);
paddd(msgtmp0, msgtmp4);
sha256msg2(msgtmp0, msgtmp3);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp2, msgtmp3);
// Rounds 32-35
movdqa(msg, msgtmp0);
paddd(msg, Address(rax, 128));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp0);
palignr(msgtmp4, msgtmp3, 4);
paddd(msgtmp1, msgtmp4);
sha256msg2(msgtmp1, msgtmp0);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp3, msgtmp0);
// Rounds 36-39
movdqa(msg, msgtmp1);
paddd(msg, Address(rax, 144));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp1);
palignr(msgtmp4, msgtmp0, 4);
paddd(msgtmp2, msgtmp4);
sha256msg2(msgtmp2, msgtmp1);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp0, msgtmp1);
// Rounds 40-43
movdqa(msg, msgtmp2);
paddd(msg, Address(rax, 160));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp2);
palignr(msgtmp4, msgtmp1, 4);
paddd(msgtmp3, msgtmp4);
sha256msg2(msgtmp3, msgtmp2);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp1, msgtmp2);
// Rounds 44-47
movdqa(msg, msgtmp3);
paddd(msg, Address(rax, 176));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp3);
palignr(msgtmp4, msgtmp2, 4);
paddd(msgtmp0, msgtmp4);
sha256msg2(msgtmp0, msgtmp3);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp2, msgtmp3);
// Rounds 48-51
movdqa(msg, msgtmp0);
paddd(msg, Address(rax, 192));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp0);
palignr(msgtmp4, msgtmp3, 4);
paddd(msgtmp1, msgtmp4);
sha256msg2(msgtmp1, msgtmp0);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
sha256msg1(msgtmp3, msgtmp0);
// Rounds 52-55
movdqa(msg, msgtmp1);
paddd(msg, Address(rax, 208));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp1);
palignr(msgtmp4, msgtmp0, 4);
paddd(msgtmp2, msgtmp4);
sha256msg2(msgtmp2, msgtmp1);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
// Rounds 56-59
movdqa(msg, msgtmp2);
paddd(msg, Address(rax, 224));
sha256rnds2(state1, state0);
movdqa(msgtmp4, msgtmp2);
palignr(msgtmp4, msgtmp1, 4);
paddd(msgtmp3, msgtmp4);
sha256msg2(msgtmp3, msgtmp2);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
// Rounds 60-63
movdqa(msg, msgtmp3);
paddd(msg, Address(rax, 240));
sha256rnds2(state1, state0);
pshufd(msg, msg, 0x0E);
sha256rnds2(state0, state1);
movdqu(msg, Address(rsp, 0));
paddd(state0, msg);
movdqu(msg, Address(rsp, 16));
paddd(state1, msg);
if (multi_block) {
// increment data pointer and loop if more to process
addptr(buf, 64);
addptr(ofs, 64);
cmpptr(ofs, limit);
jcc(Assembler::belowEqual, loop0);
movptr(rax, ofs); //return ofs
}
pshufd(state0, state0, 0x1B);
pshufd(state1, state1, 0xB1);
movdqa(msgtmp4, state0);
pblendw(state0, state1, 0xF0);
palignr(state1, msgtmp4, 8);
movdqu(Address(state, 0), state0);
movdqu(Address(state, 16), state1);
bind(done_hash);
}

View File

@ -208,13 +208,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
__ subptr(rsp, ymm_bytes); __ subptr(rsp, ymm_bytes);
// Save upper half of YMM registers // Save upper half of YMM registers
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
} }
if (UseAVX > 2) { if (UseAVX > 2) {
__ subptr(rsp, zmm_bytes); __ subptr(rsp, zmm_bytes);
// Save upper half of ZMM registers // Save upper half of ZMM registers
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
} }
} }
} }
@ -304,13 +304,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
if (UseAVX > 2) { if (UseAVX > 2) {
// Restore upper half of ZMM registers. // Restore upper half of ZMM registers.
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
} }
__ addptr(rsp, zmm_bytes); __ addptr(rsp, zmm_bytes);
} }
// Restore upper half of YMM registers. // Restore upper half of YMM registers.
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
} }
__ addptr(rsp, ymm_bytes); __ addptr(rsp, ymm_bytes);
} }

View File

@ -179,13 +179,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// Save upper half of YMM registers(0..15) // Save upper half of YMM registers(0..15)
int base_addr = XSAVE_AREA_YMM_BEGIN; int base_addr = XSAVE_AREA_YMM_BEGIN;
for (int n = 0; n < 16; n++) { for (int n = 0; n < 16; n++) {
__ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n)); __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n));
} }
if (VM_Version::supports_evex()) { if (VM_Version::supports_evex()) {
// Save upper half of ZMM registers(0..15) // Save upper half of ZMM registers(0..15)
base_addr = XSAVE_AREA_ZMM_BEGIN; base_addr = XSAVE_AREA_ZMM_BEGIN;
for (int n = 0; n < 16; n++) { for (int n = 0; n < 16; n++) {
__ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1); __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
} }
// Save full ZMM registers(16..num_xmm_regs) // Save full ZMM registers(16..num_xmm_regs)
base_addr = XSAVE_AREA_UPPERBANK; base_addr = XSAVE_AREA_UPPERBANK;
@ -333,13 +333,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
// Restore upper half of YMM registers (0..15) // Restore upper half of YMM registers (0..15)
int base_addr = XSAVE_AREA_YMM_BEGIN; int base_addr = XSAVE_AREA_YMM_BEGIN;
for (int n = 0; n < 16; n++) { for (int n = 0; n < 16; n++) {
__ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16)); __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
} }
if (VM_Version::supports_evex()) { if (VM_Version::supports_evex()) {
// Restore upper half of ZMM registers (0..15) // Restore upper half of ZMM registers (0..15)
base_addr = XSAVE_AREA_ZMM_BEGIN; base_addr = XSAVE_AREA_ZMM_BEGIN;
for (int n = 0; n < 16; n++) { for (int n = 0; n < 16; n++) {
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1); __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
} }
// Restore full ZMM registers(16..num_xmm_regs) // Restore full ZMM registers(16..num_xmm_regs)
base_addr = XSAVE_AREA_UPPERBANK; base_addr = XSAVE_AREA_UPPERBANK;

View File

@ -3068,6 +3068,136 @@ class StubGenerator: public StubCodeGenerator {
return start; return start;
} }
address generate_upper_word_mask() {
__ align(64);
StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
address start = __ pc();
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0xFFFFFFFF, relocInfo::none, 0);
return start;
}
address generate_shuffle_byte_flip_mask() {
__ align(64);
StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
address start = __ pc();
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
__ emit_data(0x08090a0b, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
return start;
}
// ofs and limit are use for multi-block byte array.
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
address generate_sha1_implCompress(bool multi_block, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Register buf = rax;
Register state = rdx;
Register ofs = rcx;
Register limit = rdi;
const Address buf_param(rbp, 8 + 0);
const Address state_param(rbp, 8 + 4);
const Address ofs_param(rbp, 8 + 8);
const Address limit_param(rbp, 8 + 12);
const XMMRegister abcd = xmm0;
const XMMRegister e0 = xmm1;
const XMMRegister e1 = xmm2;
const XMMRegister msg0 = xmm3;
const XMMRegister msg1 = xmm4;
const XMMRegister msg2 = xmm5;
const XMMRegister msg3 = xmm6;
const XMMRegister shuf_mask = xmm7;
__ enter();
__ subptr(rsp, 8 * wordSize);
if (multi_block) {
__ push(limit);
}
__ movptr(buf, buf_param);
__ movptr(state, state_param);
if (multi_block) {
__ movptr(ofs, ofs_param);
__ movptr(limit, limit_param);
}
__ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
buf, state, ofs, limit, rsp, multi_block);
if (multi_block) {
__ pop(limit);
}
__ addptr(rsp, 8 * wordSize);
__ leave();
__ ret(0);
return start;
}
address generate_pshuffle_byte_flip_mask() {
__ align(64);
StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
address start = __ pc();
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x08090a0b, relocInfo::none, 0);
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
return start;
}
// ofs and limit are use for multi-block byte array.
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
address generate_sha256_implCompress(bool multi_block, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Register buf = rbx;
Register state = rsi;
Register ofs = rdx;
Register limit = rcx;
const Address buf_param(rbp, 8 + 0);
const Address state_param(rbp, 8 + 4);
const Address ofs_param(rbp, 8 + 8);
const Address limit_param(rbp, 8 + 12);
const XMMRegister msg = xmm0;
const XMMRegister state0 = xmm1;
const XMMRegister state1 = xmm2;
const XMMRegister msgtmp0 = xmm3;
const XMMRegister msgtmp1 = xmm4;
const XMMRegister msgtmp2 = xmm5;
const XMMRegister msgtmp3 = xmm6;
const XMMRegister msgtmp4 = xmm7;
__ enter();
__ subptr(rsp, 8 * wordSize);
handleSOERegisters(true /*saving*/);
__ movptr(buf, buf_param);
__ movptr(state, state_param);
if (multi_block) {
__ movptr(ofs, ofs_param);
__ movptr(limit, limit_param);
}
__ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
buf, state, ofs, limit, rsp, multi_block);
handleSOERegisters(false);
__ addptr(rsp, 8 * wordSize);
__ leave();
__ ret(0);
return start;
}
// byte swap x86 long // byte swap x86 long
address generate_ghash_long_swap_mask() { address generate_ghash_long_swap_mask() {
@ -3772,6 +3902,19 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
} }
if (UseSHA1Intrinsics) {
StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
}
if (UseSHA256Intrinsics) {
StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
}
// Generate GHASH intrinsics code // Generate GHASH intrinsics code
if (UseGHASHIntrinsics) { if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();

View File

@ -275,7 +275,7 @@ class StubGenerator: public StubCodeGenerator {
} }
if (VM_Version::supports_evex()) { if (VM_Version::supports_evex()) {
for (int i = xmm_save_first; i <= last_reg; i++) { for (int i = xmm_save_first; i <= last_reg; i++) {
__ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0); __ vextractf32x4(xmm_save(i), as_XMMRegister(i), 0);
} }
} else { } else {
for (int i = xmm_save_first; i <= last_reg; i++) { for (int i = xmm_save_first; i <= last_reg; i++) {
@ -393,7 +393,7 @@ class StubGenerator: public StubCodeGenerator {
// emit the restores for xmm regs // emit the restores for xmm regs
if (VM_Version::supports_evex()) { if (VM_Version::supports_evex()) {
for (int i = xmm_save_first; i <= last_reg; i++) { for (int i = xmm_save_first; i <= last_reg; i++) {
__ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0); __ vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0);
} }
} else { } else {
for (int i = xmm_save_first; i <= last_reg; i++) { for (int i = xmm_save_first; i <= last_reg; i++) {
@ -3695,6 +3695,133 @@ class StubGenerator: public StubCodeGenerator {
return start; return start;
} }
address generate_upper_word_mask() {
__ align(64);
StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
address start = __ pc();
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0xFFFFFFFF00000000, relocInfo::none);
return start;
}
address generate_shuffle_byte_flip_mask() {
__ align(64);
StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
address start = __ pc();
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
__ emit_data64(0x0001020304050607, relocInfo::none);
return start;
}
// ofs and limit are use for multi-block byte array.
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
address generate_sha1_implCompress(bool multi_block, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
Register ofs = c_rarg2;
Register limit = c_rarg3;
const XMMRegister abcd = xmm0;
const XMMRegister e0 = xmm1;
const XMMRegister e1 = xmm2;
const XMMRegister msg0 = xmm3;
const XMMRegister msg1 = xmm4;
const XMMRegister msg2 = xmm5;
const XMMRegister msg3 = xmm6;
const XMMRegister shuf_mask = xmm7;
__ enter();
#ifdef _WIN64
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ subptr(rsp, 4 * wordSize);
__ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
buf, state, ofs, limit, rsp, multi_block);
__ addptr(rsp, 4 * wordSize);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
#endif
__ leave();
__ ret(0);
return start;
}
address generate_pshuffle_byte_flip_mask() {
__ align(64);
StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
address start = __ pc();
__ emit_data64(0x0405060700010203, relocInfo::none);
__ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
return start;
}
// ofs and limit are use for multi-block byte array.
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
address generate_sha256_implCompress(bool multi_block, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
Register ofs = c_rarg2;
Register limit = c_rarg3;
const XMMRegister msg = xmm0;
const XMMRegister state0 = xmm1;
const XMMRegister state1 = xmm2;
const XMMRegister msgtmp0 = xmm3;
const XMMRegister msgtmp1 = xmm4;
const XMMRegister msgtmp2 = xmm5;
const XMMRegister msgtmp3 = xmm6;
const XMMRegister msgtmp4 = xmm7;
const XMMRegister shuf_mask = xmm8;
__ enter();
#ifdef _WIN64
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 6 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
__ movdqu(Address(rsp, 4 * wordSize), xmm8);
#endif
__ subptr(rsp, 4 * wordSize);
__ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
buf, state, ofs, limit, rsp, multi_block, shuf_mask);
__ addptr(rsp, 4 * wordSize);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ movdqu(xmm8, Address(rsp, 4 * wordSize));
__ addptr(rsp, 6 * wordSize);
#endif
__ leave();
__ ret(0);
return start;
}
// This is a version of CTR/AES crypt which does 6 blocks in a loop at a time // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
// to hide instruction latency // to hide instruction latency
// //
@ -4974,6 +5101,19 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
} }
if (UseSHA1Intrinsics) {
StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
}
if (UseSHA256Intrinsics) {
StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
}
// Generate GHASH intrinsics code // Generate GHASH intrinsics code
if (UseGHASHIntrinsics) { if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();

View File

@ -29,6 +29,12 @@
#include "runtime/thread.inline.hpp" #include "runtime/thread.inline.hpp"
#include "crc32c.h" #include "crc32c.h"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
// Implementation of the platform-specific part of StubRoutines - for // Implementation of the platform-specific part of StubRoutines - for
// a description of how to extend it, see the stubRoutines.hpp file. // a description of how to extend it, see the stubRoutines.hpp file.
@ -37,6 +43,10 @@ address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
address StubRoutines::x86::_counter_shuffle_mask_addr = NULL; address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL; address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL; address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
address StubRoutines::x86::_upper_word_mask_addr = NULL;
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
address StubRoutines::x86::_k256_adr = NULL;
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
uint64_t StubRoutines::x86::_crc_by128_masks[] = uint64_t StubRoutines::x86::_crc_by128_masks[] =
{ {
@ -236,3 +246,23 @@ void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported)
_crc32c_table = (juint*)pclmulqdq_table; _crc32c_table = (juint*)pclmulqdq_table;
} }
} }
ALIGNED_(64) juint StubRoutines::x86::_k256[] =
{
0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
};

View File

@ -46,6 +46,17 @@
static address _ghash_long_swap_mask_addr; static address _ghash_long_swap_mask_addr;
static address _ghash_byte_swap_mask_addr; static address _ghash_byte_swap_mask_addr;
// upper word mask for sha1
static address _upper_word_mask_addr;
// byte flip mask for sha1
static address _shuffle_byte_flip_mask_addr;
//k256 table for sha256
static juint _k256[];
static address _k256_adr;
// byte flip mask for sha256
static address _pshuffle_byte_flip_mask_addr;
public: public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
@ -53,5 +64,9 @@
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; } static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; } static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
static address upper_word_mask_addr() { return _upper_word_mask_addr; }
static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; }
static address k256_addr() { return _k256_adr; }
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
static void generate_CRC32C_table(bool is_pclmulqdq_supported); static void generate_CRC32C_table(bool is_pclmulqdq_supported);
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP

View File

@ -68,10 +68,11 @@
declare_constant(VM_Version::CPU_AVX512DQ) \ declare_constant(VM_Version::CPU_AVX512DQ) \
declare_constant(VM_Version::CPU_AVX512PF) \ declare_constant(VM_Version::CPU_AVX512PF) \
declare_constant(VM_Version::CPU_AVX512ER) \ declare_constant(VM_Version::CPU_AVX512ER) \
declare_constant(VM_Version::CPU_AVX512CD) \ declare_constant(VM_Version::CPU_AVX512CD)
declare_constant(VM_Version::CPU_AVX512BW)
#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) declare_preprocessor_constant("VM_Version::CPU_AVX512BW", CPU_AVX512BW) \
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \
declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA)
#endif // CPU_X86_VM_VMSTRUCTS_X86_HPP #endif // CPU_X86_VM_VMSTRUCTS_X86_HPP

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -385,7 +385,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movdl(xmm0, rcx); __ movdl(xmm0, rcx);
__ pshufd(xmm0, xmm0, 0x00); __ pshufd(xmm0, xmm0, 0x00);
__ vinsertf128h(xmm0, xmm0, xmm0); __ vinsertf128_high(xmm0, xmm0);
__ vmovdqu(xmm7, xmm0); __ vmovdqu(xmm7, xmm0);
#ifdef _LP64 #ifdef _LP64
__ vmovdqu(xmm8, xmm0); __ vmovdqu(xmm8, xmm0);
@ -577,7 +577,7 @@ void VM_Version::get_processor_features() {
} }
char buf[256]; char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(), cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping, cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""), (supports_cmov() ? ", cmov" : ""),
@ -608,7 +608,8 @@ void VM_Version::get_processor_features() {
(supports_bmi1() ? ", bmi1" : ""), (supports_bmi1() ? ", bmi1" : ""),
(supports_bmi2() ? ", bmi2" : ""), (supports_bmi2() ? ", bmi2" : ""),
(supports_adx() ? ", adx" : ""), (supports_adx() ? ", adx" : ""),
(supports_evex() ? ", evex" : "")); (supports_evex() ? ", evex" : ""),
(supports_sha() ? ", sha" : ""));
_features_string = os::strdup(buf); _features_string = os::strdup(buf);
// UseSSE is set to the smaller of what hardware supports and what // UseSSE is set to the smaller of what hardware supports and what
@ -730,17 +731,29 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
} }
if (UseSHA) { if (supports_sha()) {
if (FLAG_IS_DEFAULT(UseSHA)) {
UseSHA = true;
}
} else if (UseSHA) {
warning("SHA instructions are not available on this CPU"); warning("SHA instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseSHA, false); FLAG_SET_DEFAULT(UseSHA, false);
} }
if (UseSHA1Intrinsics) { if (UseSHA) {
if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
}
} else if (UseSHA1Intrinsics) {
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
} }
if (UseSHA256Intrinsics) { if (UseSHA) {
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
}
} else if (UseSHA256Intrinsics) {
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
} }
@ -750,6 +763,10 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
} }
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
if (UseAdler32Intrinsics) { if (UseAdler32Intrinsics) {
warning("Adler32Intrinsics not available on this CPU."); warning("Adler32Intrinsics not available on this CPU.");
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);

View File

@ -221,7 +221,7 @@ class VM_Version : public Abstract_VM_Version {
avx512pf : 1, avx512pf : 1,
avx512er : 1, avx512er : 1,
avx512cd : 1, avx512cd : 1,
: 1, sha : 1,
avx512bw : 1, avx512bw : 1,
avx512vl : 1; avx512vl : 1;
} bits; } bits;
@ -282,11 +282,13 @@ protected:
CPU_AVX512DQ = (1 << 27), CPU_AVX512DQ = (1 << 27),
CPU_AVX512PF = (1 << 28), CPU_AVX512PF = (1 << 28),
CPU_AVX512ER = (1 << 29), CPU_AVX512ER = (1 << 29),
CPU_AVX512CD = (1 << 30), CPU_AVX512CD = (1 << 30)
CPU_AVX512BW = (1 << 31) // Keeping sign bit 31 unassigned.
}; };
#define CPU_AVX512VL UCONST64(0x100000000) // EVEX instructions with smaller vector length : enums are limited to 32bit #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
#define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
#define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
enum Extended_Family { enum Extended_Family {
// AMD // AMD
@ -516,6 +518,8 @@ protected:
result |= CPU_ADX; result |= CPU_ADX;
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
result |= CPU_BMI2; result |= CPU_BMI2;
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
result |= CPU_SHA;
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
result |= CPU_LZCNT; result |= CPU_LZCNT;
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
@ -721,6 +725,7 @@ public:
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); } static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); } static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); } static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
static bool supports_sha() { return (_features & CPU_SHA) != 0; }
// Intel features // Intel features
static bool is_intel_family_core() { return is_intel() && static bool is_intel_family_core() { return is_intel() &&
extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }

View File

@ -3179,13 +3179,13 @@ instruct Repl32B(vecY dst, rRegI src) %{
"punpcklbw $dst,$dst\n\t" "punpcklbw $dst,$dst\n\t"
"pshuflw $dst,$dst,0x00\n\t" "pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate32B" %} "vinserti128_high $dst,$dst\t! replicate32B" %}
ins_encode %{ ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register); __ movdl($dst$$XMMRegister, $src$$Register);
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3196,12 +3196,12 @@ instruct Repl32B_mem(vecY dst, memory mem) %{
format %{ "punpcklbw $dst,$mem\n\t" format %{ "punpcklbw $dst,$mem\n\t"
"pshuflw $dst,$dst,0x00\n\t" "pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate32B" %} "vinserti128_high $dst,$dst\t! replicate32B" %}
ins_encode %{ ins_encode %{
__ punpcklbw($dst$$XMMRegister, $mem$$Address); __ punpcklbw($dst$$XMMRegister, $mem$$Address);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3223,11 +3223,11 @@ instruct Repl32B_imm(vecY dst, immI con) %{
match(Set dst (ReplicateB con)); match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t" format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %}
ins_encode %{ ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3298,12 +3298,12 @@ instruct Repl16S(vecY dst, rRegI src) %{
format %{ "movd $dst,$src\n\t" format %{ "movd $dst,$src\n\t"
"pshuflw $dst,$dst,0x00\n\t" "pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate16S" %} "vinserti128_high $dst,$dst\t! replicate16S" %}
ins_encode %{ ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register); __ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3313,11 +3313,11 @@ instruct Repl16S_mem(vecY dst, memory mem) %{
match(Set dst (ReplicateS (LoadS mem))); match(Set dst (ReplicateS (LoadS mem)));
format %{ "pshuflw $dst,$mem,0x00\n\t" format %{ "pshuflw $dst,$mem,0x00\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate16S" %} "vinserti128_high $dst,$dst\t! replicate16S" %}
ins_encode %{ ins_encode %{
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3327,11 +3327,11 @@ instruct Repl16S_imm(vecY dst, immI con) %{
match(Set dst (ReplicateS con)); match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t" format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} "vinserti128_high $dst,$dst\t! replicate16S($con)" %}
ins_encode %{ ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3363,11 +3363,11 @@ instruct Repl8I(vecY dst, rRegI src) %{
match(Set dst (ReplicateI src)); match(Set dst (ReplicateI src));
format %{ "movd $dst,$src\n\t" format %{ "movd $dst,$src\n\t"
"pshufd $dst,$dst,0x00\n\t" "pshufd $dst,$dst,0x00\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate8I" %} "vinserti128_high $dst,$dst\t! replicate8I" %}
ins_encode %{ ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register); __ movdl($dst$$XMMRegister, $src$$Register);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3376,10 +3376,10 @@ instruct Repl8I_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI (LoadI mem))); match(Set dst (ReplicateI (LoadI mem)));
format %{ "pshufd $dst,$mem,0x00\n\t" format %{ "pshufd $dst,$mem,0x00\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate8I" %} "vinserti128_high $dst,$dst\t! replicate8I" %}
ins_encode %{ ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3401,11 +3401,11 @@ instruct Repl8I_imm(vecY dst, immI con) %{
match(Set dst (ReplicateI con)); match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst" %} "vinserti128_high $dst,$dst" %}
ins_encode %{ ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3430,11 +3430,11 @@ instruct Repl4L(vecY dst, rRegL src) %{
match(Set dst (ReplicateL src)); match(Set dst (ReplicateL src));
format %{ "movdq $dst,$src\n\t" format %{ "movdq $dst,$src\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate4L" %} "vinserti128_high $dst,$dst\t! replicate4L" %}
ins_encode %{ ins_encode %{
__ movdq($dst$$XMMRegister, $src$$Register); __ movdq($dst$$XMMRegister, $src$$Register);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3447,13 +3447,13 @@ instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
"movdl $tmp,$src.hi\n\t" "movdl $tmp,$src.hi\n\t"
"punpckldq $dst,$tmp\n\t" "punpckldq $dst,$tmp\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate4L" %} "vinserti128_high $dst,$dst\t! replicate4L" %}
ins_encode %{ ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register); __ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3464,11 +3464,11 @@ instruct Repl4L_imm(vecY dst, immL con) %{
match(Set dst (ReplicateL con)); match(Set dst (ReplicateL con));
format %{ "movq $dst,[$constantaddress]\n\t" format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} "vinserti128_high $dst,$dst\t! replicate4L($con)" %}
ins_encode %{ ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress($con)); __ movq($dst$$XMMRegister, $constantaddress($con));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3478,11 +3478,11 @@ instruct Repl4L_mem(vecY dst, memory mem) %{
match(Set dst (ReplicateL (LoadL mem))); match(Set dst (ReplicateL (LoadL mem)));
format %{ "movq $dst,$mem\n\t" format %{ "movq $dst,$mem\n\t"
"punpcklqdq $dst,$dst\n\t" "punpcklqdq $dst,$dst\n\t"
"vinserti128h $dst,$dst,$dst\t! replicate4L" %} "vinserti128_high $dst,$dst\t! replicate4L" %}
ins_encode %{ ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address); __ movq($dst$$XMMRegister, $mem$$Address);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3511,10 +3511,10 @@ instruct Repl8F(vecY dst, regF src) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF src)); match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$src,0x00\n\t" format %{ "pshufd $dst,$src,0x00\n\t"
"vinsertf128h $dst,$dst,$dst\t! replicate8F" %} "vinsertf128_high $dst,$dst\t! replicate8F" %}
ins_encode %{ ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3523,10 +3523,10 @@ instruct Repl8F_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF (LoadF mem))); match(Set dst (ReplicateF (LoadF mem)));
format %{ "pshufd $dst,$mem,0x00\n\t" format %{ "pshufd $dst,$mem,0x00\n\t"
"vinsertf128h $dst,$dst,$dst\t! replicate8F" %} "vinsertf128_high $dst,$dst\t! replicate8F" %}
ins_encode %{ ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3576,10 +3576,10 @@ instruct Repl4D(vecY dst, regD src) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD src)); match(Set dst (ReplicateD src));
format %{ "pshufd $dst,$src,0x44\n\t" format %{ "pshufd $dst,$src,0x44\n\t"
"vinsertf128h $dst,$dst,$dst\t! replicate4D" %} "vinsertf128_high $dst,$dst\t! replicate4D" %}
ins_encode %{ ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -3588,10 +3588,10 @@ instruct Repl4D_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD (LoadD mem))); match(Set dst (ReplicateD (LoadD mem)));
format %{ "pshufd $dst,$mem,0x44\n\t" format %{ "pshufd $dst,$mem,0x44\n\t"
"vinsertf128h $dst,$dst,$dst\t! replicate4D" %} "vinsertf128_high $dst,$dst\t! replicate4D" %}
ins_encode %{ ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -4791,7 +4791,7 @@ instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
effect(TEMP tmp, TEMP tmp2); effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t" format %{ "vphaddd $tmp,$src2,$src2\n\t"
"vphaddd $tmp,$tmp,$tmp2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t"
"vextracti128 $tmp2,$tmp\n\t" "vextracti128_high $tmp2,$tmp\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t" "movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp2,$tmp\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t"
@ -4800,7 +4800,7 @@ instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
int vector_len = 1; int vector_len = 1;
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ movdl($tmp2$$XMMRegister, $src1$$Register); __ movdl($tmp2$$XMMRegister, $src1$$Register);
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@ -4813,7 +4813,7 @@ instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp,
predicate(UseAVX > 2); predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2)); match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2); effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128 $tmp,$src2\n\t" format %{ "vextracti128_high $tmp,$src2\n\t"
"vpaddd $tmp,$tmp,$src2\n\t" "vpaddd $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t" "pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t"
@ -4824,7 +4824,7 @@ instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp,
"movd $dst,$tmp2\t! add reduction8I" %} "movd $dst,$tmp2\t! add reduction8I" %}
ins_encode %{ ins_encode %{
int vector_len = 0; int vector_len = 0;
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
@ -4841,9 +4841,9 @@ instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp,
predicate(UseAVX > 2); predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2)); match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3); effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" format %{ "vextracti64x4_high $tmp3,$src2\n\t"
"vpaddd $tmp3,$tmp3,$src2\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t"
"vextracti128 $tmp,$tmp3\n\t" "vextracti128_high $tmp,$tmp3\n\t"
"vpaddd $tmp,$tmp,$tmp3\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t"
"pshufd $tmp2,$tmp,0xE\n\t" "pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t"
@ -4853,9 +4853,9 @@ instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp,
"vpaddd $tmp2,$tmp,$tmp2\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %} "movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{ ins_encode %{
__ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
__ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
@ -4892,7 +4892,7 @@ instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
predicate(UseAVX > 2); predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2)); match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2); effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128 $tmp,$src2\n\t" format %{ "vextracti128_high $tmp,$src2\n\t"
"vpaddq $tmp2,$tmp,$src2\n\t" "vpaddq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t"
@ -4900,7 +4900,7 @@ instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
"vpaddq $tmp2,$tmp2,$tmp\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction4L" %} "movdq $dst,$tmp2\t! add reduction4L" %}
ins_encode %{ ins_encode %{
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@ -4915,9 +4915,9 @@ instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
predicate(UseAVX > 2); predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2)); match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2); effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" format %{ "vextracti64x4_high $tmp2,$src2\n\t"
"vpaddq $tmp2,$tmp2,$src2\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t"
"vextracti128 $tmp,$tmp2\n\t" "vextracti128_high $tmp,$tmp2\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t"
@ -4925,9 +4925,9 @@ instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
"vpaddq $tmp2,$tmp2,$tmp\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction8L" %} "movdq $dst,$tmp2\t! add reduction8L" %}
ins_encode %{ ins_encode %{
__ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
__ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@ -5026,7 +5026,7 @@ instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t" "pshufd $tmp,$src2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
"vextractf128 $tmp2,$src2\n\t" "vextractf128_high $tmp2,$src2\n\t"
"vaddss $dst,$dst,$tmp2\n\t" "vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t" "pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
@ -5042,7 +5042,7 @@ instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5065,7 +5065,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t" "pshufd $tmp,$src2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x1\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddss $dst,$dst,$tmp2\n\t" "vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t" "pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
@ -5073,7 +5073,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t" "pshufd $tmp,$tmp2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x2\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vaddss $dst,$dst,$tmp2\n\t" "vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t" "pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
@ -5081,7 +5081,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t" "pshufd $tmp,$tmp2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x3\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vaddss $dst,$dst,$tmp2\n\t" "vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t" "pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t" "vaddss $dst,$dst,$tmp\n\t"
@ -5097,7 +5097,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5105,7 +5105,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5113,7 +5113,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5162,7 +5162,7 @@ instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
format %{ "vaddsd $dst,$dst,$src2\n\t" format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t" "pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t" "vaddsd $dst,$dst,$tmp\n\t"
"vextractf32x4h $tmp2,$src2, 0x1\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t" "vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %} "vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
@ -5170,7 +5170,7 @@ instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5185,15 +5185,15 @@ instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
format %{ "vaddsd $dst,$dst,$src2\n\t" format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t" "pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t" "vaddsd $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x1\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t" "vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t" "vaddsd $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x2\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vaddsd $dst,$dst,$tmp2\n\t" "vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t" "vaddsd $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x3\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vaddsd $dst,$dst,$tmp2\n\t" "vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction8D" %} "vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
@ -5201,15 +5201,15 @@ instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5307,7 +5307,7 @@ instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
predicate(UseAVX > 0); predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2)); match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2); effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128 $tmp,$src2\n\t" format %{ "vextracti128_high $tmp,$src2\n\t"
"vpmulld $tmp,$tmp,$src2\n\t" "vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t" "pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t"
@ -5318,7 +5318,7 @@ instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
"movd $dst,$tmp2\t! mul reduction8I" %} "movd $dst,$tmp2\t! mul reduction8I" %}
ins_encode %{ ins_encode %{
int vector_len = 0; int vector_len = 0;
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
@ -5335,9 +5335,9 @@ instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF
predicate(UseAVX > 2); predicate(UseAVX > 2);
match(Set dst (MulReductionVI src1 src2)); match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3); effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" format %{ "vextracti64x4_high $tmp3,$src2\n\t"
"vpmulld $tmp3,$tmp3,$src2\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t"
"vextracti128 $tmp,$tmp3\n\t" "vextracti128_high $tmp,$tmp3\n\t"
"vpmulld $tmp,$tmp,$src2\n\t" "vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t" "pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t"
@ -5347,9 +5347,9 @@ instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF
"vpmulld $tmp2,$tmp,$tmp2\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %} "movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{ ins_encode %{
__ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
__ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
@ -5386,7 +5386,7 @@ instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2)); match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2); effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128 $tmp,$src2\n\t" format %{ "vextracti128_high $tmp,$src2\n\t"
"vpmullq $tmp2,$tmp,$src2\n\t" "vpmullq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t"
@ -5394,7 +5394,7 @@ instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
"vpmullq $tmp2,$tmp2,$tmp\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction4L" %} "movdq $dst,$tmp2\t! mul reduction4L" %}
ins_encode %{ ins_encode %{
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@ -5409,9 +5409,9 @@ instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2)); match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2); effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" format %{ "vextracti64x4_high $tmp2,$src2\n\t"
"vpmullq $tmp2,$tmp2,$src2\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t"
"vextracti128 $tmp,$tmp2\n\t" "vextracti128_high $tmp,$tmp2\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t"
@ -5419,9 +5419,9 @@ instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
"vpmullq $tmp2,$tmp2,$tmp\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction8L" %} "movdq $dst,$tmp2\t! mul reduction8L" %}
ins_encode %{ ins_encode %{
__ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
__ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@ -5520,7 +5520,7 @@ instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t" "pshufd $tmp,$src2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
"vextractf128 $tmp2,$src2\n\t" "vextractf128_high $tmp2,$src2\n\t"
"vmulss $dst,$dst,$tmp2\n\t" "vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t" "pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
@ -5536,7 +5536,7 @@ instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5559,7 +5559,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t" "pshufd $tmp,$src2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x1\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vmulss $dst,$dst,$tmp2\n\t" "vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t" "pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
@ -5567,7 +5567,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t" "pshufd $tmp,$tmp2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x2\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vmulss $dst,$dst,$tmp2\n\t" "vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t" "pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
@ -5575,7 +5575,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t" "pshufd $tmp,$tmp2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x3\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vmulss $dst,$dst,$tmp2\n\t" "vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t" "pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t" "vmulss $dst,$dst,$tmp\n\t"
@ -5591,7 +5591,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5599,7 +5599,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5607,7 +5607,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5656,7 +5656,7 @@ instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
format %{ "vmulsd $dst,$dst,$src2\n\t" format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t" "pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t" "vmulsd $dst,$dst,$tmp\n\t"
"vextractf128 $tmp2,$src2\n\t" "vextractf128_high $tmp2,$src2\n\t"
"vmulsd $dst,$dst,$tmp2\n\t" "vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
@ -5664,7 +5664,7 @@ instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@ -5679,15 +5679,15 @@ instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
format %{ "vmulsd $dst,$dst,$src2\n\t" format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t" "pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t" "vmulsd $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x1\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vmulsd $dst,$dst,$tmp2\n\t" "vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$src2,0xE\n\t" "pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t" "vmulsd $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x2\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vmulsd $dst,$dst,$tmp2\n\t" "vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t" "vmulsd $dst,$dst,$tmp\n\t"
"vextractf32x4 $tmp2,$src2, 0x3\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vmulsd $dst,$dst,$tmp2\n\t" "vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t" "pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
@ -5695,15 +5695,15 @@ instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);

View File

@ -1420,9 +1420,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
// The ecx parameter to rep stos for the ClearArray node is in dwords. // The ecx parameter to rep stos for the ClearArray node is in dwords.
const bool Matcher::init_array_count_is_in_bytes = false; const bool Matcher::init_array_count_is_in_bytes = false;
// Threshold size for cleararray.
const int Matcher::init_array_short_size = 8 * BytesPerLong;
// Needs 2 CMOV's for longs. // Needs 2 CMOV's for longs.
const int Matcher::long_cmove_cost() { return 1; } const int Matcher::long_cmove_cost() { return 1; }
@ -11369,27 +11366,54 @@ instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
// ======================================================================= // =======================================================================
// fast clearing of an array // fast clearing of an array
instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
predicate(!UseFastStosb); predicate(!((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base)); match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
format %{ "XOR EAX,EAX\t# ClearArray:\n\t"
"SHL ECX,1\t# Convert doublewords to words\n\t" format %{ $$template
"REP STOS\t# store EAX into [EDI++] while ECX--" %} $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
$$emit$$"CMP InitArrayShortSize,rcx\n\t"
$$emit$$"JG LARGE\n\t"
$$emit$$"SHL ECX, 1\n\t"
$$emit$$"DEC ECX\n\t"
$$emit$$"JS DONE\t# Zero length\n\t"
$$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
$$emit$$"DEC ECX\n\t"
$$emit$$"JGE LOOP\n\t"
$$emit$$"JMP DONE\n\t"
$$emit$$"# LARGE:\n\t"
if (UseFastStosb) {
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
} else {
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
}
$$emit$$"# DONE"
%}
ins_encode %{ ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
predicate(UseFastStosb); predicate(((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base)); match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
format %{ "XOR EAX,EAX\t# ClearArray:\n\t" format %{ $$template
"SHL ECX,3\t# Convert doublewords to bytes\n\t" $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
"REP STOSB\t# store EAX into [EDI++] while ECX--" %} if (UseFastStosb) {
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
} else {
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
}
$$emit$$"# DONE"
%}
ins_encode %{ ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}

View File

@ -1637,9 +1637,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
// The ecx parameter to rep stosq for the ClearArray node is in words. // The ecx parameter to rep stosq for the ClearArray node is in words.
const bool Matcher::init_array_count_is_in_bytes = false; const bool Matcher::init_array_count_is_in_bytes = false;
// Threshold size for cleararray.
const int Matcher::init_array_short_size = 8 * BytesPerLong;
// No additional cost for CMOVL. // No additional cost for CMOVL.
const int Matcher::long_cmove_cost() { return 0; } const int Matcher::long_cmove_cost() { return 0; }
@ -10460,31 +10457,55 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
rFlagsReg cr) rFlagsReg cr)
%{ %{
predicate(!UseFastStosb); predicate(!((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base)); match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
format %{ "xorq rax, rax\t# ClearArray:\n\t" format %{ $$template
"rep stosq\t# Store rax to *rdi++ while rcx--" %} $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
$$emit$$"cmp InitArrayShortSize,rcx\n\t"
$$emit$$"jg LARGE\n\t"
$$emit$$"dec rcx\n\t"
$$emit$$"js DONE\t# Zero length\n\t"
$$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
$$emit$$"dec rcx\n\t"
$$emit$$"jge LOOP\n\t"
$$emit$$"jmp DONE\n\t"
$$emit$$"# LARGE:\n\t"
if (UseFastStosb) {
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
} else {
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
}
$$emit$$"# DONE"
%}
ins_encode %{ ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
%} %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
rFlagsReg cr) rFlagsReg cr)
%{ %{
predicate(UseFastStosb); predicate(((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base)); match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
format %{ "xorq rax, rax\t# ClearArray:\n\t"
"shlq rcx,3\t# Convert doublewords to bytes\n\t" format %{ $$template
"rep stosb\t# Store rax to *rdi++ while rcx--" %} $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
ins_encode %{ if (UseFastStosb) {
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
} else {
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
}
%} %}
ins_pipe( pipe_slow ); ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
%}
ins_pipe(pipe_slow);
%} %}
instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,

View File

@ -203,7 +203,8 @@ public class AMD64 extends Architecture {
AVX512ER, AVX512ER,
AVX512CD, AVX512CD,
AVX512BW, AVX512BW,
AVX512VL AVX512VL,
SHA
} }
private final EnumSet<CPUFeature> features; private final EnumSet<CPUFeature> features;

View File

@ -122,6 +122,9 @@ public class AMD64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFacto
if ((config.vmVersionFeatures & config.amd64AVX512VL) != 0) { if ((config.vmVersionFeatures & config.amd64AVX512VL) != 0) {
features.add(AMD64.CPUFeature.AVX512VL); features.add(AMD64.CPUFeature.AVX512VL);
} }
if ((config.vmVersionFeatures & config.amd64SHA) != 0) {
features.add(AMD64.CPUFeature.SHA);
}
return features; return features;
} }

View File

@ -41,7 +41,6 @@ import jdk.vm.ci.meta.DeoptimizationAction;
import jdk.vm.ci.meta.DeoptimizationReason; import jdk.vm.ci.meta.DeoptimizationReason;
import jdk.vm.ci.meta.JavaConstant; import jdk.vm.ci.meta.JavaConstant;
import jdk.vm.ci.meta.JavaKind; import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.JavaType;
import jdk.vm.ci.meta.MetaAccessProvider; import jdk.vm.ci.meta.MetaAccessProvider;
import jdk.vm.ci.meta.ResolvedJavaField; import jdk.vm.ci.meta.ResolvedJavaField;
import jdk.vm.ci.meta.ResolvedJavaMethod; import jdk.vm.ci.meta.ResolvedJavaMethod;
@ -111,23 +110,26 @@ public class HotSpotMetaAccessProvider implements MetaAccessProvider, HotSpotPro
} }
public ResolvedJavaField lookupJavaField(Field reflectionField) { public ResolvedJavaField lookupJavaField(Field reflectionField) {
String name = reflectionField.getName();
Class<?> fieldHolder = reflectionField.getDeclaringClass(); Class<?> fieldHolder = reflectionField.getDeclaringClass();
Class<?> fieldType = reflectionField.getType();
// java.lang.reflect.Field's modifiers should be enough here since VM internal modifier bits
// are not used (yet).
final int modifiers = reflectionField.getModifiers();
final long offset = Modifier.isStatic(modifiers) ? UNSAFE.staticFieldOffset(reflectionField) : UNSAFE.objectFieldOffset(reflectionField);
HotSpotResolvedObjectType holder = fromObjectClass(fieldHolder); HotSpotResolvedObjectType holder = fromObjectClass(fieldHolder);
JavaType type = runtime.fromClass(fieldType); if (Modifier.isStatic(reflectionField.getModifiers())) {
final long offset = UNSAFE.staticFieldOffset(reflectionField);
if (offset != -1) { for (ResolvedJavaField field : holder.getStaticFields()) {
HotSpotResolvedObjectType resolved = holder; if (offset == ((HotSpotResolvedJavaField) field).offset()) {
return resolved.createField(name, type, offset, modifiers); return field;
}
}
} else { } else {
throw new JVMCIError("unresolved field %s", reflectionField); final long offset = UNSAFE.objectFieldOffset(reflectionField);
for (ResolvedJavaField field : holder.getInstanceFields(false)) {
if (offset == ((HotSpotResolvedJavaField) field).offset()) {
return field;
}
}
} }
throw new JVMCIError("unresolved field %s", reflectionField);
} }
private static int intMaskRight(int n) { private static int intMaskRight(int n) {

View File

@ -945,6 +945,7 @@ public class HotSpotVMConfig {
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512CD", archs = {"amd64"}) @Stable public long amd64AVX512CD; @HotSpotVMConstant(name = "VM_Version::CPU_AVX512CD", archs = {"amd64"}) @Stable public long amd64AVX512CD;
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512BW", archs = {"amd64"}) @Stable public long amd64AVX512BW; @HotSpotVMConstant(name = "VM_Version::CPU_AVX512BW", archs = {"amd64"}) @Stable public long amd64AVX512BW;
@HotSpotVMConstant(name = "VM_Version::CPU_AVX512VL", archs = {"amd64"}) @Stable public long amd64AVX512VL; @HotSpotVMConstant(name = "VM_Version::CPU_AVX512VL", archs = {"amd64"}) @Stable public long amd64AVX512VL;
@HotSpotVMConstant(name = "VM_Version::CPU_SHA", archs = {"amd64"}) @Stable public long amd64SHA;
// SPARC specific values // SPARC specific values
@HotSpotVMConstant(name = "VM_Version::vis3_instructions_m", archs = {"sparc"}) @Stable public int sparcVis3Instructions; @HotSpotVMConstant(name = "VM_Version::vis3_instructions_m", archs = {"sparc"}) @Stable public int sparcVis3Instructions;

View File

@ -144,6 +144,7 @@ pthread_t os::Linux::_main_thread;
int os::Linux::_page_size = -1; int os::Linux::_page_size = -1;
const int os::Linux::_vm_default_page_size = (8 * K); const int os::Linux::_vm_default_page_size = (8 * K);
bool os::Linux::_supports_fast_thread_cpu_time = false; bool os::Linux::_supports_fast_thread_cpu_time = false;
uint32_t os::Linux::_os_version = 0;
const char * os::Linux::_glibc_version = NULL; const char * os::Linux::_glibc_version = NULL;
const char * os::Linux::_libpthread_version = NULL; const char * os::Linux::_libpthread_version = NULL;
pthread_condattr_t os::Linux::_condattr[1]; pthread_condattr_t os::Linux::_condattr[1];
@ -4356,6 +4357,48 @@ jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec; return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
} }
void os::Linux::initialize_os_info() {
assert(_os_version == 0, "OS info already initialized");
struct utsname _uname;
uint32_t major;
uint32_t minor;
uint32_t fix;
int rc;
// Kernel version is unknown if
// verification below fails.
_os_version = 0x01000000;
rc = uname(&_uname);
if (rc != -1) {
rc = sscanf(_uname.release,"%d.%d.%d", &major, &minor, &fix);
if (rc == 3) {
if (major < 256 && minor < 256 && fix < 256) {
// Kernel version format is as expected,
// set it overriding unknown state.
_os_version = (major << 16) |
(minor << 8 ) |
(fix << 0 ) ;
}
}
}
}
uint32_t os::Linux::os_version() {
assert(_os_version != 0, "not initialized");
return _os_version & 0x00FFFFFF;
}
bool os::Linux::os_version_is_known() {
assert(_os_version != 0, "not initialized");
return _os_version & 0x01000000 ? false : true;
}
///// /////
// glibc on Linux platform uses non-documented flag // glibc on Linux platform uses non-documented flag
// to indicate, that some special sort of signal // to indicate, that some special sort of signal
@ -4578,6 +4621,8 @@ void os::init(void) {
Linux::initialize_system_info(); Linux::initialize_system_info();
Linux::initialize_os_info();
// main_thread points to the aboriginal thread // main_thread points to the aboriginal thread
Linux::_main_thread = pthread_self(); Linux::_main_thread = pthread_self();

View File

@ -56,6 +56,15 @@ class Linux {
static GrowableArray<int>* _cpu_to_node; static GrowableArray<int>* _cpu_to_node;
// 0x00000000 = uninitialized,
// 0x01000000 = kernel version unknown,
// otherwise a 32-bit number:
// Ox00AABBCC
// AA, Major Version
// BB, Minor Version
// CC, Fix Version
static uint32_t _os_version;
protected: protected:
static julong _physical_memory; static julong _physical_memory;
@ -198,6 +207,10 @@ class Linux {
static jlong fast_thread_cpu_time(clockid_t clockid); static jlong fast_thread_cpu_time(clockid_t clockid);
static void initialize_os_info();
static bool os_version_is_known();
static uint32_t os_version();
// pthread_cond clock suppport // pthread_cond clock suppport
private: private:
static pthread_condattr_t _condattr[1]; static pthread_condattr_t _condattr[1];

View File

@ -471,7 +471,7 @@ void Canonicalizer::do_Intrinsic (Intrinsic* x) {
InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant(); InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant();
if (c != NULL && !c->value()->is_null_object()) { if (c != NULL && !c->value()->is_null_object()) {
// ciInstance::java_mirror_type() returns non-NULL only for Java mirrors // ciInstance::java_mirror_type() returns non-NULL only for Java mirrors
ciType* t = c->value()->as_instance()->java_mirror_type(); ciType* t = c->value()->java_mirror_type();
if (t->is_klass()) { if (t->is_klass()) {
// substitute cls.isInstance(obj) of a constant Class into // substitute cls.isInstance(obj) of a constant Class into
// an InstantOf instruction // an InstantOf instruction
@ -487,6 +487,17 @@ void Canonicalizer::do_Intrinsic (Intrinsic* x) {
} }
break; break;
} }
case vmIntrinsics::_isPrimitive : {
assert(x->number_of_arguments() == 1, "wrong type");
// Class.isPrimitive is known on constant classes:
InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant();
if (c != NULL && !c->value()->is_null_object()) {
ciType* t = c->value()->java_mirror_type();
set_constant(t->is_primitive_type());
}
break;
}
} }
} }

View File

@ -148,6 +148,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
case vmIntrinsics::_longBitsToDouble: case vmIntrinsics::_longBitsToDouble:
case vmIntrinsics::_getClass: case vmIntrinsics::_getClass:
case vmIntrinsics::_isInstance: case vmIntrinsics::_isInstance:
case vmIntrinsics::_isPrimitive:
case vmIntrinsics::_currentThread: case vmIntrinsics::_currentThread:
case vmIntrinsics::_dabs: case vmIntrinsics::_dabs:
case vmIntrinsics::_dsqrt: case vmIntrinsics::_dsqrt:

View File

@ -1296,6 +1296,25 @@ void LIRGenerator::do_getClass(Intrinsic* x) {
__ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result); __ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
} }
// java.lang.Class::isPrimitive()
void LIRGenerator::do_isPrimitive(Intrinsic* x) {
assert(x->number_of_arguments() == 1, "wrong type");
LIRItem rcvr(x->argument_at(0), this);
rcvr.load_item();
LIR_Opr temp = new_register(T_METADATA);
LIR_Opr result = rlock_result(x);
CodeEmitInfo* info = NULL;
if (x->needs_null_check()) {
info = state_for(x);
}
__ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info);
__ cmp(lir_cond_notEqual, temp, LIR_OprFact::intConst(0));
__ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
}
// Example: Thread.currentThread() // Example: Thread.currentThread()
void LIRGenerator::do_currentThread(Intrinsic* x) { void LIRGenerator::do_currentThread(Intrinsic* x) {
@ -3098,6 +3117,7 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
case vmIntrinsics::_Object_init: do_RegisterFinalizer(x); break; case vmIntrinsics::_Object_init: do_RegisterFinalizer(x); break;
case vmIntrinsics::_isInstance: do_isInstance(x); break; case vmIntrinsics::_isInstance: do_isInstance(x); break;
case vmIntrinsics::_isPrimitive: do_isPrimitive(x); break;
case vmIntrinsics::_getClass: do_getClass(x); break; case vmIntrinsics::_getClass: do_getClass(x); break;
case vmIntrinsics::_currentThread: do_currentThread(x); break; case vmIntrinsics::_currentThread: do_currentThread(x); break;

View File

@ -246,6 +246,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
void do_RegisterFinalizer(Intrinsic* x); void do_RegisterFinalizer(Intrinsic* x);
void do_isInstance(Intrinsic* x); void do_isInstance(Intrinsic* x);
void do_isPrimitive(Intrinsic* x);
void do_getClass(Intrinsic* x); void do_getClass(Intrinsic* x);
void do_currentThread(Intrinsic* x); void do_currentThread(Intrinsic* x);
void do_MathIntrinsic(Intrinsic* x); void do_MathIntrinsic(Intrinsic* x);

View File

@ -1035,14 +1035,15 @@
do_name( updateByteBuffer_A_name, "updateByteBuffer") \ do_name( updateByteBuffer_A_name, "updateByteBuffer") \
\ \
/* support for Unsafe */ \ /* support for Unsafe */ \
do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \
do_class(jdk_internal_misc_Unsafe, "jdk/internal/misc/Unsafe") \ do_class(jdk_internal_misc_Unsafe, "jdk/internal/misc/Unsafe") \
\ \
do_intrinsic(_allocateInstance, jdk_internal_misc_Unsafe, allocateInstance_name, allocateInstance_signature, F_RN) \ do_intrinsic(_allocateInstance, jdk_internal_misc_Unsafe, allocateInstance_name, allocateInstance_signature, F_RN) \
do_name( allocateInstance_name, "allocateInstance") \ do_name( allocateInstance_name, "allocateInstance") \
do_signature(allocateInstance_signature, "(Ljava/lang/Class;)Ljava/lang/Object;") \ do_signature(allocateInstance_signature, "(Ljava/lang/Class;)Ljava/lang/Object;") \
do_intrinsic(_allocateUninitializedArray, jdk_internal_misc_Unsafe, allocateUninitializedArray_name, newArray_signature, F_R) \
do_name( allocateUninitializedArray_name, "allocateUninitializedArray0") \
do_intrinsic(_copyMemory, jdk_internal_misc_Unsafe, copyMemory_name, copyMemory_signature, F_RN) \ do_intrinsic(_copyMemory, jdk_internal_misc_Unsafe, copyMemory_name, copyMemory_signature, F_RN) \
do_name( copyMemory_name, "copyMemory") \ do_name( copyMemory_name, "copyMemory0") \
do_signature(copyMemory_signature, "(Ljava/lang/Object;JLjava/lang/Object;JJ)V") \ do_signature(copyMemory_signature, "(Ljava/lang/Object;JLjava/lang/Object;JJ)V") \
do_intrinsic(_loadFence, jdk_internal_misc_Unsafe, loadFence_name, loadFence_signature, F_RN) \ do_intrinsic(_loadFence, jdk_internal_misc_Unsafe, loadFence_name, loadFence_signature, F_RN) \
do_name( loadFence_name, "loadFence") \ do_name( loadFence_name, "loadFence") \

View File

@ -639,11 +639,12 @@
declare_constant(VM_Version::CPU_AVX512DQ) \ declare_constant(VM_Version::CPU_AVX512DQ) \
declare_constant(VM_Version::CPU_AVX512PF) \ declare_constant(VM_Version::CPU_AVX512PF) \
declare_constant(VM_Version::CPU_AVX512ER) \ declare_constant(VM_Version::CPU_AVX512ER) \
declare_constant(VM_Version::CPU_AVX512CD) \ declare_constant(VM_Version::CPU_AVX512CD)
declare_constant(VM_Version::CPU_AVX512BW)
#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) declare_preprocessor_constant("VM_Version::CPU_AVX512BW", CPU_AVX512BW) \
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \
declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA)
#endif // TARGET_ARCH_x86 #endif // TARGET_ARCH_x86

View File

@ -1338,73 +1338,6 @@ vmSymbols::SID Method::klass_id_for_intrinsics(const Klass* holder) {
return vmSymbols::find_sid(klass_name); return vmSymbols::find_sid(klass_name);
} }
static bool is_unsafe_alias(vmSymbols::SID name_id) {
// All 70 intrinsic candidate methods from sun.misc.Unsafe in 1.8.
// Some have the same method name but different signature, e.g.
// getByte(long), getByte(Object,long)
switch (name_id) {
case vmSymbols::VM_SYMBOL_ENUM_NAME(allocateInstance_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(copyMemory_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(loadFence_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(storeFence_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(fullFence_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getObject_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getBoolean_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getByte_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getShort_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getChar_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getInt_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getLong_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getFloat_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getDouble_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putObject_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putBoolean_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putByte_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putShort_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putChar_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putInt_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putLong_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putFloat_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putDouble_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getObjectVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getBooleanVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getByteVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getShortVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getCharVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getIntVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getLongVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getFloatVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getDoubleVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putObjectVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putBooleanVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putByteVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putShortVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putCharVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putIntVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putLongVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putFloatVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putDoubleVolatile_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAddress_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putAddress_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapObject_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapLong_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapInt_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedObject_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedLong_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedInt_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndAddInt_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndAddLong_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetInt_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetLong_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetObject_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(park_name):
case vmSymbols::VM_SYMBOL_ENUM_NAME(unpark_name):
return true;
}
return false;
}
void Method::init_intrinsic_id() { void Method::init_intrinsic_id() {
assert(_intrinsic_id == vmIntrinsics::_none, "do this just once"); assert(_intrinsic_id == vmIntrinsics::_none, "do this just once");
const uintptr_t max_id_uint = right_n_bits((int)(sizeof(_intrinsic_id) * BitsPerByte)); const uintptr_t max_id_uint = right_n_bits((int)(sizeof(_intrinsic_id) * BitsPerByte));
@ -1457,14 +1390,6 @@ void Method::init_intrinsic_id() {
if (is_static() != MethodHandles::is_signature_polymorphic_static(id)) if (is_static() != MethodHandles::is_signature_polymorphic_static(id))
id = vmIntrinsics::_none; id = vmIntrinsics::_none;
break; break;
case vmSymbols::VM_SYMBOL_ENUM_NAME(sun_misc_Unsafe):
// Map sun.misc.Unsafe to jdk.internal.misc.Unsafe
if (!is_unsafe_alias(name_id)) break;
// pretend it is the corresponding method in the internal Unsafe class:
klass_id = vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_misc_Unsafe);
id = vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
break;
} }
if (id != vmIntrinsics::_none) { if (id != vmIntrinsics::_none) {

View File

@ -498,6 +498,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_currentTimeMillis: case vmIntrinsics::_currentTimeMillis:
case vmIntrinsics::_nanoTime: case vmIntrinsics::_nanoTime:
case vmIntrinsics::_allocateInstance: case vmIntrinsics::_allocateInstance:
case vmIntrinsics::_allocateUninitializedArray:
case vmIntrinsics::_newArray: case vmIntrinsics::_newArray:
case vmIntrinsics::_getLength: case vmIntrinsics::_getLength:
case vmIntrinsics::_copyOf: case vmIntrinsics::_copyOf:

View File

@ -1118,7 +1118,11 @@ class Compile : public Phase {
bool in_scratch_emit_size() const { return _in_scratch_emit_size; } bool in_scratch_emit_size() const { return _in_scratch_emit_size; }
enum ScratchBufferBlob { enum ScratchBufferBlob {
#if defined(PPC64)
MAX_inst_size = 2048,
#else
MAX_inst_size = 1024, MAX_inst_size = 1024,
#endif
MAX_locs_size = 128, // number of relocInfo elements MAX_locs_size = 128, // number of relocInfo elements
MAX_const_size = 128, MAX_const_size = 128,
MAX_stubs_size = 128 MAX_stubs_size = 128

View File

@ -48,6 +48,7 @@
#include "opto/runtime.hpp" #include "opto/runtime.hpp"
#include "opto/subnode.hpp" #include "opto/subnode.hpp"
#include "prims/nativeLookup.hpp" #include "prims/nativeLookup.hpp"
#include "prims/unsafe.hpp"
#include "runtime/sharedRuntime.hpp" #include "runtime/sharedRuntime.hpp"
#ifdef TRACE_HAVE_INTRINSICS #ifdef TRACE_HAVE_INTRINSICS
#include "trace/traceMacros.hpp" #include "trace/traceMacros.hpp"
@ -248,6 +249,7 @@ class LibraryCallKit : public GraphKit {
bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, AccessKind kind, bool is_unaligned); bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
static bool klass_needs_init_guard(Node* kls); static bool klass_needs_init_guard(Node* kls);
bool inline_unsafe_allocate(); bool inline_unsafe_allocate();
bool inline_unsafe_newArray(bool uninitialized);
bool inline_unsafe_copyMemory(); bool inline_unsafe_copyMemory();
bool inline_native_currentThread(); bool inline_native_currentThread();
@ -255,8 +257,6 @@ class LibraryCallKit : public GraphKit {
bool inline_native_isInterrupted(); bool inline_native_isInterrupted();
bool inline_native_Class_query(vmIntrinsics::ID id); bool inline_native_Class_query(vmIntrinsics::ID id);
bool inline_native_subtype_check(); bool inline_native_subtype_check();
bool inline_native_newArray();
bool inline_native_getLength(); bool inline_native_getLength();
bool inline_array_copyOf(bool is_copyOfRange); bool inline_array_copyOf(bool is_copyOfRange);
bool inline_array_equals(StrIntrinsicNode::ArgEnc ae); bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
@ -711,7 +711,6 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_nanoTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime"); case vmIntrinsics::_nanoTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
case vmIntrinsics::_allocateInstance: return inline_unsafe_allocate(); case vmIntrinsics::_allocateInstance: return inline_unsafe_allocate();
case vmIntrinsics::_copyMemory: return inline_unsafe_copyMemory(); case vmIntrinsics::_copyMemory: return inline_unsafe_copyMemory();
case vmIntrinsics::_newArray: return inline_native_newArray();
case vmIntrinsics::_getLength: return inline_native_getLength(); case vmIntrinsics::_getLength: return inline_native_getLength();
case vmIntrinsics::_copyOf: return inline_array_copyOf(false); case vmIntrinsics::_copyOf: return inline_array_copyOf(false);
case vmIntrinsics::_copyOfRange: return inline_array_copyOf(true); case vmIntrinsics::_copyOfRange: return inline_array_copyOf(true);
@ -720,6 +719,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_Objects_checkIndex: return inline_objects_checkIndex(); case vmIntrinsics::_Objects_checkIndex: return inline_objects_checkIndex();
case vmIntrinsics::_clone: return inline_native_clone(intrinsic()->is_virtual()); case vmIntrinsics::_clone: return inline_native_clone(intrinsic()->is_virtual());
case vmIntrinsics::_allocateUninitializedArray: return inline_unsafe_newArray(true);
case vmIntrinsics::_newArray: return inline_unsafe_newArray(false);
case vmIntrinsics::_isAssignableFrom: return inline_native_subtype_check(); case vmIntrinsics::_isAssignableFrom: return inline_native_subtype_check();
case vmIntrinsics::_isInstance: case vmIntrinsics::_isInstance:
@ -2303,9 +2305,6 @@ void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
} }
// Interpret Unsafe.fieldOffset cookies correctly:
extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) { const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) {
// Attempt to infer a sharper value type from the offset and base type. // Attempt to infer a sharper value type from the offset and base type.
ciKlass* sharpened_klass = NULL; ciKlass* sharpened_klass = NULL;
@ -3782,9 +3781,17 @@ Node* LibraryCallKit::generate_array_guard_common(Node* kls, RegionNode* region,
//-----------------------inline_native_newArray-------------------------- //-----------------------inline_native_newArray--------------------------
// private static native Object java.lang.reflect.newArray(Class<?> componentType, int length); // private static native Object java.lang.reflect.newArray(Class<?> componentType, int length);
bool LibraryCallKit::inline_native_newArray() { // private native Object Unsafe.allocateUninitializedArray0(Class<?> cls, int size);
Node* mirror = argument(0); bool LibraryCallKit::inline_unsafe_newArray(bool uninitialized) {
Node* count_val = argument(1); Node* mirror;
Node* count_val;
if (uninitialized) {
mirror = argument(1);
count_val = argument(2);
} else {
mirror = argument(0);
count_val = argument(1);
}
mirror = null_check(mirror); mirror = null_check(mirror);
// If mirror or obj is dead, only null-path is taken. // If mirror or obj is dead, only null-path is taken.
@ -3829,6 +3836,12 @@ bool LibraryCallKit::inline_native_newArray() {
result_val->init_req(_normal_path, obj); result_val->init_req(_normal_path, obj);
result_io ->init_req(_normal_path, i_o()); result_io ->init_req(_normal_path, i_o());
result_mem->init_req(_normal_path, reset_memory()); result_mem->init_req(_normal_path, reset_memory());
if (uninitialized) {
// Mark the allocation so that zeroing is skipped
AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(obj, &_gvn);
alloc->maybe_set_complete(&_gvn);
}
} }
// Return the combined state. // Return the combined state.
@ -4417,7 +4430,7 @@ bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
} }
//----------------------inline_unsafe_copyMemory------------------------- //----------------------inline_unsafe_copyMemory-------------------------
// public native void Unsafe.copyMemory(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes); // public native void Unsafe.copyMemory0(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes);
bool LibraryCallKit::inline_unsafe_copyMemory() { bool LibraryCallKit::inline_unsafe_copyMemory() {
if (callee()->is_static()) return false; // caller must have the capability! if (callee()->is_static()) return false; // caller must have the capability!
null_check_receiver(); // null-check receiver null_check_receiver(); // null-check receiver

View File

@ -399,10 +399,6 @@ public:
// Optional scaling for the parameter to the ClearArray/CopyArray node. // Optional scaling for the parameter to the ClearArray/CopyArray node.
static const bool init_array_count_is_in_bytes; static const bool init_array_count_is_in_bytes;
// Threshold small size (in bytes) for a ClearArray/CopyArray node.
// Anything this size or smaller may get converted to discrete scalar stores.
static const int init_array_short_size;
// Some hardware needs 2 CMOV's for longs. // Some hardware needs 2 CMOV's for longs.
static const int long_cmove_cost(); static const int long_cmove_cost();

View File

@ -2741,6 +2741,9 @@ Node* ClearArrayNode::Identity(PhaseGVN* phase) {
//------------------------------Idealize--------------------------------------- //------------------------------Idealize---------------------------------------
// Clearing a short array is faster with stores // Clearing a short array is faster with stores
Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){ Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
// Already know this is a large node, do not try to ideal it
if (_is_large) return NULL;
const int unit = BytesPerLong; const int unit = BytesPerLong;
const TypeX* t = phase->type(in(2))->isa_intptr_t(); const TypeX* t = phase->type(in(2))->isa_intptr_t();
if (!t) return NULL; if (!t) return NULL;
@ -2753,8 +2756,11 @@ Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
// (see jck test stmt114.stmt11402.val). // (see jck test stmt114.stmt11402.val).
if (size <= 0 || size % unit != 0) return NULL; if (size <= 0 || size % unit != 0) return NULL;
intptr_t count = size / unit; intptr_t count = size / unit;
// Length too long; use fast hardware clear // Length too long; communicate this to matchers and assemblers.
if (size > Matcher::init_array_short_size) return NULL; // Assemblers are responsible to produce fast hardware clears for it.
if (size > InitArrayShortSize) {
return new ClearArrayNode(in(0), in(1), in(2), in(3), true);
}
Node *mem = in(1); Node *mem = in(1);
if( phase->type(mem)==Type::TOP ) return NULL; if( phase->type(mem)==Type::TOP ) return NULL;
Node *adr = in(3); Node *adr = in(3);
@ -2852,7 +2858,7 @@ Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
// Bulk clear double-words // Bulk clear double-words
Node* zsize = phase->transform(new SubXNode(zend, zbase) ); Node* zsize = phase->transform(new SubXNode(zend, zbase) );
Node* adr = phase->transform(new AddPNode(dest, dest, start_offset) ); Node* adr = phase->transform(new AddPNode(dest, dest, start_offset) );
mem = new ClearArrayNode(ctl, mem, zsize, adr); mem = new ClearArrayNode(ctl, mem, zsize, adr, false);
return phase->transform(mem); return phase->transform(mem);
} }
@ -3901,7 +3907,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
zeroes_done, zeroes_needed, zeroes_done, zeroes_needed,
phase); phase);
zeroes_done = zeroes_needed; zeroes_done = zeroes_needed;
if (zsize > Matcher::init_array_short_size && ++big_init_gaps > 2) if (zsize > InitArrayShortSize && ++big_init_gaps > 2)
do_zeroing = false; // leave the hole, next time do_zeroing = false; // leave the hole, next time
} }
} }

View File

@ -1013,9 +1013,11 @@ public:
//------------------------------ClearArray------------------------------------- //------------------------------ClearArray-------------------------------------
class ClearArrayNode: public Node { class ClearArrayNode: public Node {
private:
bool _is_large;
public: public:
ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base ) ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base, bool is_large)
: Node(ctrl,arymem,word_cnt,base) { : Node(ctrl,arymem,word_cnt,base), _is_large(is_large) {
init_class_id(Class_ClearArray); init_class_id(Class_ClearArray);
} }
virtual int Opcode() const; virtual int Opcode() const;
@ -1026,6 +1028,7 @@ public:
virtual Node* Identity(PhaseGVN* phase); virtual Node* Identity(PhaseGVN* phase);
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual uint match_edge(uint idx) const; virtual uint match_edge(uint idx) const;
bool is_large() const { return _is_large; }
// Clear the given area of an object or array. // Clear the given area of an object or array.
// The start offset must always be aligned mod BytesPerInt. // The start offset must always be aligned mod BytesPerInt.

View File

@ -35,6 +35,7 @@
#include "oops/symbol.hpp" #include "oops/symbol.hpp"
#include "prims/jvm_misc.hpp" #include "prims/jvm_misc.hpp"
#include "prims/nativeLookup.hpp" #include "prims/nativeLookup.hpp"
#include "prims/unsafe.hpp"
#include "runtime/arguments.hpp" #include "runtime/arguments.hpp"
#include "runtime/handles.inline.hpp" #include "runtime/handles.inline.hpp"
#include "runtime/javaCalls.hpp" #include "runtime/javaCalls.hpp"
@ -107,8 +108,6 @@ char* NativeLookup::long_jni_name(const methodHandle& method) {
} }
extern "C" { extern "C" {
void JNICALL JVM_RegisterJDKInternalMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
void JNICALL JVM_RegisterSunMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls); void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls);
void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass); void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass);
void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass); void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass);
@ -123,7 +122,6 @@ extern "C" {
static JNINativeMethod lookup_special_native_methods[] = { static JNINativeMethod lookup_special_native_methods[] = {
{ CC"Java_jdk_internal_misc_Unsafe_registerNatives", NULL, FN_PTR(JVM_RegisterJDKInternalMiscUnsafeMethods) }, { CC"Java_jdk_internal_misc_Unsafe_registerNatives", NULL, FN_PTR(JVM_RegisterJDKInternalMiscUnsafeMethods) },
{ CC"Java_sun_misc_Unsafe_registerNatives", NULL, FN_PTR(JVM_RegisterSunMiscUnsafeMethods) },
{ CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) }, { CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) },
{ CC"Java_jdk_internal_perf_Perf_registerNatives", NULL, FN_PTR(JVM_RegisterPerfMethods) }, { CC"Java_jdk_internal_perf_Perf_registerNatives", NULL, FN_PTR(JVM_RegisterPerfMethods) },
{ CC"Java_sun_hotspot_WhiteBox_registerNatives", NULL, FN_PTR(JVM_RegisterWhiteBoxMethods) }, { CC"Java_sun_hotspot_WhiteBox_registerNatives", NULL, FN_PTR(JVM_RegisterWhiteBoxMethods) },

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_PRIMS_UNSAFE_HPP
#define SHARE_VM_PRIMS_UNSAFE_HPP
#include "jni.h"
extern "C" {
void JNICALL JVM_RegisterJDKInternalMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
}
jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
jlong Unsafe_field_offset_from_byte_offset(jlong byte_offset);
#endif // SHARE_VM_PRIMS_UNSAFE_HPP

View File

@ -42,17 +42,30 @@ void AdvancedThresholdPolicy::print_specific(EventType type, methodHandle mh, me
} }
void AdvancedThresholdPolicy::initialize() { void AdvancedThresholdPolicy::initialize() {
int count = CICompilerCount;
#ifdef _LP64
// Turn on ergonomic compiler count selection // Turn on ergonomic compiler count selection
if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) { if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
FLAG_SET_DEFAULT(CICompilerCountPerCPU, true); FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
} }
int count = CICompilerCount;
if (CICompilerCountPerCPU) { if (CICompilerCountPerCPU) {
// Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
int log_cpu = log2_intptr(os::active_processor_count()); int log_cpu = log2_intptr(os::active_processor_count());
int loglog_cpu = log2_intptr(MAX2(log_cpu, 1)); int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2; count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2;
} }
#else
// On 32-bit systems, the number of compiler threads is limited to 3.
// On these systems, the virtual address space available to the JVM
// is usually limited to 2-4 GB (the exact value depends on the platform).
// As the compilers (especially C2) can consume a large amount of
// memory, scaling the number of compiler threads with the number of
// available cores can result in the exhaustion of the address space
/// available to the VM and thus cause the VM to crash.
if (FLAG_IS_DEFAULT(CICompilerCount)) {
count = 3;
}
#endif
set_c1_count(MAX2(count / 3, 1)); set_c1_count(MAX2(count / 3, 1));
set_c2_count(MAX2(count - c1_count(), 1)); set_c2_count(MAX2(count - c1_count(), 1));

View File

@ -2474,9 +2474,11 @@ bool Arguments::check_vm_args_consistency() {
status = false; status = false;
} }
#ifdef _LP64
if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) { if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) {
warning("The VM option CICompilerCountPerCPU overrides CICompilerCount."); warning("The VM option CICompilerCountPerCPU overrides CICompilerCount.");
} }
#endif
#ifndef SUPPORT_RESERVED_STACK_AREA #ifndef SUPPORT_RESERVED_STACK_AREA
if (StackReservedPages != 0) { if (StackReservedPages != 0) {

View File

@ -354,6 +354,14 @@ Flag::Error TypeProfileLevelConstraintFunc(uintx value, bool verbose) {
return Flag::SUCCESS; return Flag::SUCCESS;
} }
Flag::Error InitArrayShortSizeConstraintFunc(intx value, bool verbose) {
if (value % BytesPerLong != 0) {
return Flag::VIOLATES_CONSTRAINT;
} else {
return Flag::SUCCESS;
}
}
#ifdef COMPILER2 #ifdef COMPILER2
Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) { Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) {
if (InteriorEntryAlignment > CodeEntryAlignment) { if (InteriorEntryAlignment > CodeEntryAlignment) {

View File

@ -62,6 +62,8 @@ Flag::Error ArraycopySrcPrefetchDistanceConstraintFunc(uintx value, bool verbose
Flag::Error TypeProfileLevelConstraintFunc(uintx value, bool verbose); Flag::Error TypeProfileLevelConstraintFunc(uintx value, bool verbose);
Flag::Error InitArrayShortSizeConstraintFunc(intx value, bool verbose);
#ifdef COMPILER2 #ifdef COMPILER2
Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose); Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose);

View File

@ -725,7 +725,7 @@ public:
\ \
product(bool, UseSHA, false, \ product(bool, UseSHA, false, \
"Control whether SHA instructions can be used " \ "Control whether SHA instructions can be used " \
"on SPARC and on ARM") \ "on SPARC, on ARM and on x86") \
\ \
product(bool, UseGHASHIntrinsics, false, \ product(bool, UseGHASHIntrinsics, false, \
"Use intrinsics for GHASH versions of crypto") \ "Use intrinsics for GHASH versions of crypto") \
@ -3079,16 +3079,16 @@ public:
develop(intx, MethodHistogramCutoff, 100, \ develop(intx, MethodHistogramCutoff, 100, \
"The cutoff value for method invocation histogram (+CountCalls)") \ "The cutoff value for method invocation histogram (+CountCalls)") \
\ \
develop(intx, ProfilerNumberOfInterpretedMethods, 25, \ diagnostic(intx, ProfilerNumberOfInterpretedMethods, 25, \
"Number of interpreted methods to show in profile") \ "Number of interpreted methods to show in profile") \
\ \
develop(intx, ProfilerNumberOfCompiledMethods, 25, \ diagnostic(intx, ProfilerNumberOfCompiledMethods, 25, \
"Number of compiled methods to show in profile") \ "Number of compiled methods to show in profile") \
\ \
develop(intx, ProfilerNumberOfStubMethods, 25, \ diagnostic(intx, ProfilerNumberOfStubMethods, 25, \
"Number of stub methods to show in profile") \ "Number of stub methods to show in profile") \
\ \
develop(intx, ProfilerNumberOfRuntimeStubNodes, 25, \ diagnostic(intx, ProfilerNumberOfRuntimeStubNodes, 25, \
"Number of runtime stub nodes to show in profile") \ "Number of runtime stub nodes to show in profile") \
\ \
product(intx, ProfileIntervalsTicks, 100, \ product(intx, ProfileIntervalsTicks, 100, \
@ -4149,6 +4149,13 @@ public:
"in the loaded class C. " \ "in the loaded class C. " \
"Check (3) is available only in debug builds.") \ "Check (3) is available only in debug builds.") \
\ \
develop_pd(intx, InitArrayShortSize, \
"Threshold small size (in bytes) for clearing arrays. " \
"Anything this size or smaller may get converted to discrete " \
"scalar stores.") \
range(0, max_intx) \
constraint(InitArrayShortSizeConstraintFunc, AfterErgo) \
\
diagnostic(bool, CompilerDirectivesIgnoreCompileCommands, false, \ diagnostic(bool, CompilerDirectivesIgnoreCompileCommands, false, \
"Disable backwards compatibility for compile commands.") \ "Disable backwards compatibility for compile commands.") \
\ \

View File

@ -521,9 +521,9 @@ class RuntimeHistogramElement : public HistogramElement {
JNI_ENTRY_NO_PRESERVE(result_type, header) \ JNI_ENTRY_NO_PRESERVE(result_type, header) \
WeakPreserveExceptionMark __wem(thread); WeakPreserveExceptionMark __wem(thread);
#define JNI_ENTRY_NO_PRESERVE(result_type, header) \ #define JNI_ENTRY_NO_PRESERVE(result_type, header) \
extern "C" { \ extern "C" { \
result_type JNICALL header { \ result_type JNICALL header { \
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \ JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \ assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
ThreadInVMfromNative __tiv(thread); \ ThreadInVMfromNative __tiv(thread); \
@ -535,7 +535,7 @@ extern "C" { \
// a GC, is called outside the NoHandleMark (set via VM_QUICK_ENTRY_BASE). // a GC, is called outside the NoHandleMark (set via VM_QUICK_ENTRY_BASE).
#define JNI_QUICK_ENTRY(result_type, header) \ #define JNI_QUICK_ENTRY(result_type, header) \
extern "C" { \ extern "C" { \
result_type JNICALL header { \ result_type JNICALL header { \
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \ JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \ assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
ThreadInVMfromNative __tiv(thread); \ ThreadInVMfromNative __tiv(thread); \
@ -545,7 +545,7 @@ extern "C" { \
#define JNI_LEAF(result_type, header) \ #define JNI_LEAF(result_type, header) \
extern "C" { \ extern "C" { \
result_type JNICALL header { \ result_type JNICALL header { \
JavaThread* thread=JavaThread::thread_from_jni_environment(env); \ JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \ assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
VM_LEAF_BASE(result_type, header) VM_LEAF_BASE(result_type, header)

View File

@ -138,9 +138,15 @@ void SimpleThresholdPolicy::initialize() {
FLAG_SET_DEFAULT(CICompilerCount, 3); FLAG_SET_DEFAULT(CICompilerCount, 3);
} }
int count = CICompilerCount; int count = CICompilerCount;
#ifdef _LP64
// On 64-bit systems, scale the number of compiler threads with
// the number of cores available on the system. Scaling is not
// performed on 32-bit systems because it can lead to exhaustion
// of the virtual memory address space available to the JVM.
if (CICompilerCountPerCPU) { if (CICompilerCountPerCPU) {
count = MAX2(log2_intptr(os::active_processor_count()), 1) * 3 / 2; count = MAX2(log2_intptr(os::active_processor_count()), 1) * 3 / 2;
} }
#endif
set_c1_count(MAX2(count / 3, 1)); set_c1_count(MAX2(count / 3, 1));
set_c2_count(MAX2(count - c1_count(), 1)); set_c2_count(MAX2(count - c1_count(), 1));
FLAG_SET_ERGO(intx, CICompilerCount, c1_count() + c2_count()); FLAG_SET_ERGO(intx, CICompilerCount, c1_count() + c2_count());

View File

@ -29,6 +29,7 @@
#include "gc/shared/cardTableModRefBS.hpp" #include "gc/shared/cardTableModRefBS.hpp"
#include "memory/resourceArea.hpp" #include "memory/resourceArea.hpp"
#include "oops/method.hpp" #include "oops/method.hpp"
#include "prims/unsafe.hpp"
#include "runtime/os.hpp" #include "runtime/os.hpp"
#include "runtime/synchronizer.hpp" #include "runtime/synchronizer.hpp"
#include "runtime/thread.hpp" #include "runtime/thread.hpp"
@ -326,7 +327,6 @@ Value* SharkBuilder::fabs() {
} }
Value* SharkBuilder::unsafe_field_offset_to_byte_offset() { Value* SharkBuilder::unsafe_field_offset_to_byte_offset() {
extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
return make_function((address) Unsafe_field_offset_to_byte_offset, "l", "l"); return make_function((address) Unsafe_field_offset_to_byte_offset, "l", "l");
} }

View File

@ -32,74 +32,99 @@ import jdk.test.lib.TimeLimitedRunner;
import jdk.test.lib.Utils; import jdk.test.lib.Utils;
import pool.PoolHelper; import pool.PoolHelper;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
public abstract class StressAddJcmdBase { public abstract class StressAddJcmdBase {
private static final int DIRECTIVES_AMOUNT = Integer.getInteger( private static final int DIRECTIVES_AMOUNT = Integer.getInteger(
"compiler.compilercontrol.jcmd.StressAddJcmdBase.directivesAmount", "compiler.compilercontrol.jcmd.StressAddJcmdBase.directivesAmount",
1000); 200);
private static final int DIRECTIVE_FILES = Integer.getInteger( private static final int TIMEOUT = Integer.getInteger(
"compiler.compilercontrol.jcmd.StressAddJcmdBase.directiveFiles", "compiler.compilercontrol.jcmd.StressAddJcmdBase.timeout",
5); 30);
private static final List<MethodDescriptor> DESCRIPTORS = new PoolHelper() private static final List<MethodDescriptor> DESCRIPTORS = new PoolHelper()
.getAllMethods().stream() .getAllMethods().stream()
.map(pair -> AbstractTestBase .map(pair -> AbstractTestBase
.getValidMethodDescriptor(pair.first)) .getValidMethodDescriptor(pair.first))
.collect(Collectors.toList()); .collect(Collectors.toList());
private static final String DIRECTIVE_FILE = "directives.json";
private static final List<String> VM_OPTIONS = new ArrayList<>();
private static final Random RANDOM = Utils.getRandomInstance();
static {
VM_OPTIONS.add("-Xmixed");
VM_OPTIONS.add("-XX:+UnlockDiagnosticVMOptions");
VM_OPTIONS.add("-XX:+LogCompilation");
VM_OPTIONS.add("-XX:CompilerDirectivesLimit=1001");
}
/** /**
* Performs test * Performs test
*/ */
public void test() { public void test() {
List<String> commands = prepareCommands(); HugeDirectiveUtil.createHugeFile(DESCRIPTORS, DIRECTIVE_FILE,
Executor executor = new TimeLimitedExecutor(commands); DIRECTIVES_AMOUNT);
Executor executor = new TimeLimitedExecutor();
List<OutputAnalyzer> outputAnalyzers = executor.execute(); List<OutputAnalyzer> outputAnalyzers = executor.execute();
outputAnalyzers.get(0).shouldHaveExitValue(0); outputAnalyzers.get(0).shouldHaveExitValue(0);
} }
/** /**
* Makes connection to the test VM * Makes connection to the test VM and performs a diagnostic command
* *
* @param pid a pid of the VM under test * @param pid a pid of the VM under test
* @param commands a list of jcmd commands to be executed
* @return true if the test should continue invocation of this method * @return true if the test should continue invocation of this method
*/ */
protected abstract boolean makeConnection(int pid, List<String> commands); protected abstract boolean makeConnection(int pid);
/** /**
* Finish test executions * Finish test executions
*/ */
protected void finish() { } protected void finish() { }
private List<String> prepareCommands() { protected String nextCommand() {
String[] files = new String[DIRECTIVE_FILES]; int i = RANDOM.nextInt(JcmdCommand.values().length);
for (int i = 0; i < DIRECTIVE_FILES; i++) { JcmdCommand jcmdCommand = JcmdCommand.values()[i];
files[i] = "directives" + i + ".json"; switch (jcmdCommand) {
HugeDirectiveUtil.createHugeFile(DESCRIPTORS, files[i], case ADD:
DIRECTIVES_AMOUNT); return jcmdCommand.command + " " + DIRECTIVE_FILE;
case PRINT:
case CLEAR:
case REMOVE:
return jcmdCommand.command;
default:
throw new Error("TESTBUG: incorrect command: " + jcmdCommand);
}
}
private enum JcmdCommand {
ADD("Compiler.directives_add"),
PRINT("Compiler.directives_print"),
CLEAR("Compiler.directives_clear"),
REMOVE("Compiler.directives_remove");
public final String command;
JcmdCommand(String command) {
this.command = command;
} }
return Stream.of(files)
.map(file -> "Compiler.directives_add " + file)
.collect(Collectors.toList());
} }
private class TimeLimitedExecutor extends Executor { private class TimeLimitedExecutor extends Executor {
private final List<String> jcmdCommands; public TimeLimitedExecutor() {
public TimeLimitedExecutor(List<String> jcmdCommands) {
/* There are no need to check the state */ /* There are no need to check the state */
super(true, null, null, jcmdCommands); super(true, VM_OPTIONS, null, null);
this.jcmdCommands = jcmdCommands;
} }
@Override @Override
protected OutputAnalyzer[] executeJCMD(int pid) { protected OutputAnalyzer[] executeJCMD(int pid) {
TimeLimitedRunner runner = new TimeLimitedRunner( TimeLimitedRunner runner = new TimeLimitedRunner(
Utils.DEFAULT_TEST_TIMEOUT, TimeUnit.SECONDS.toMillis(TIMEOUT),
Utils.TIMEOUT_FACTOR, Utils.TIMEOUT_FACTOR,
() -> makeConnection(pid, jcmdCommands)); () -> makeConnection(pid));
try { try {
runner.call(); runner.call();
} catch (Exception e) { } catch (Exception e) {

View File

@ -27,21 +27,19 @@
* @summary Tests jcmd to be able to add a lot of huge directive files with * @summary Tests jcmd to be able to add a lot of huge directive files with
* parallel executed jcmds until timeout has reached * parallel executed jcmds until timeout has reached
* @library /testlibrary /test/lib /compiler/testlibrary ../share / * @library /testlibrary /test/lib /compiler/testlibrary ../share /
* @ignore 8148563
* @build compiler.compilercontrol.jcmd.StressAddMultiThreadedTest * @build compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
* pool.sub.* pool.subpack.* sun.hotspot.WhiteBox * pool.sub.* pool.subpack.* sun.hotspot.WhiteBox
* compiler.testlibrary.CompilerUtils * compiler.testlibrary.CompilerUtils
* compiler.compilercontrol.share.actions.* * compiler.compilercontrol.share.actions.*
* @run main ClassFileInstaller sun.hotspot.WhiteBox * @run main ClassFileInstaller sun.hotspot.WhiteBox
* sun.hotspot.WhiteBox$WhiteBoxPermission * sun.hotspot.WhiteBox$WhiteBoxPermission
* @run main/othervm/timeout=360 compiler.compilercontrol.jcmd.StressAddMultiThreadedTest * @run driver compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
*/ */
package compiler.compilercontrol.jcmd; package compiler.compilercontrol.jcmd;
import jdk.test.lib.dcmd.PidJcmdExecutor; import jdk.test.lib.dcmd.PidJcmdExecutor;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
@ -49,16 +47,15 @@ import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
public class StressAddMultiThreadedTest extends StressAddJcmdBase { public class StressAddMultiThreadedTest extends StressAddJcmdBase {
private static final int THREADS; private static final int THREADS = Integer.getInteger(
"compiler.compilercontrol.jcmd.StressAddMultiThreadedTest.threads",
5);
private volatile int commands = Integer.getInteger(
"compiler.compilercontrol.jcmd.StressAddMultiThreadedTest.commands",
20);
private final BlockingQueue<Runnable> queue; private final BlockingQueue<Runnable> queue;
private final ExecutorService executor; private final ExecutorService executor;
static {
THREADS = Runtime.getRuntime().availableProcessors()
* Integer.getInteger("compiler.compilercontrol.jcmd" +
".StressAddMultiThreadedTest.threadFactor", 10);
}
public StressAddMultiThreadedTest() { public StressAddMultiThreadedTest() {
queue = new ArrayBlockingQueue<>(THREADS); queue = new ArrayBlockingQueue<>(THREADS);
executor = new ThreadPoolExecutor(THREADS, THREADS, 100, executor = new ThreadPoolExecutor(THREADS, THREADS, 100,
@ -71,14 +68,10 @@ public class StressAddMultiThreadedTest extends StressAddJcmdBase {
} }
@Override @Override
protected boolean makeConnection(int pid, List<String> commands) { protected boolean makeConnection(int pid) {
commands.forEach(command -> { executor.submit(() -> new PidJcmdExecutor(String.valueOf(pid))
if (!executor.isShutdown()) { .execute(nextCommand()));
executor.submit(() -> new PidJcmdExecutor(String.valueOf(pid)) return (--commands != 0);
.execute(command));
}
});
return !executor.isShutdown();
} }
@Override @Override

View File

@ -1,55 +0,0 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8137167
* @summary Tests jcmd to be able to add a lot of huge directives
* @library /testlibrary /test/lib /compiler/testlibrary ../share /
* @build compiler.compilercontrol.jcmd.StressAddSequentiallyTest
* pool.sub.* pool.subpack.* sun.hotspot.WhiteBox
* compiler.testlibrary.CompilerUtils
* compiler.compilercontrol.share.actions.*
* @run main ClassFileInstaller sun.hotspot.WhiteBox
* sun.hotspot.WhiteBox$WhiteBoxPermission
* @run main/othervm/timeout=300 compiler.compilercontrol.jcmd.StressAddSequentiallyTest
*/
package compiler.compilercontrol.jcmd;
import jdk.test.lib.dcmd.PidJcmdExecutor;
import java.util.List;
public class StressAddSequentiallyTest extends StressAddJcmdBase {
public static void main(String[] args) {
new StressAddSequentiallyTest().test();
}
@Override
protected boolean makeConnection(int pid, List<String> commands) {
commands.forEach(command -> new PidJcmdExecutor(String.valueOf(pid))
.execute(command));
return true;
}
}

View File

@ -33,8 +33,8 @@
* -XX:+WhiteBoxAPI * -XX:+WhiteBoxAPI
* -XX:DisableIntrinsic=_putCharVolatile,_putInt * -XX:DisableIntrinsic=_putCharVolatile,_putInt
* -XX:DisableIntrinsic=_putIntVolatile * -XX:DisableIntrinsic=_putIntVolatile
* -XX:CompileCommand=option,sun.misc.Unsafe::putChar,ccstrlist,DisableIntrinsic,_getCharVolatile,_getInt * -XX:CompileCommand=option,jdk.internal.misc.Unsafe::putChar,ccstrlist,DisableIntrinsic,_getCharVolatile,_getInt
* -XX:CompileCommand=option,sun.misc.Unsafe::putCharVolatile,ccstrlist,DisableIntrinsic,_getIntVolatile * -XX:CompileCommand=option,jdk.internal.misc.Unsafe::putCharVolatile,ccstrlist,DisableIntrinsic,_getIntVolatile
* IntrinsicDisabledTest * IntrinsicDisabledTest
*/ */
@ -60,7 +60,7 @@ public class IntrinsicDisabledTest {
return Boolean.valueOf(Objects.toString(wb.getVMFlag("TieredCompilation"))); return Boolean.valueOf(Objects.toString(wb.getVMFlag("TieredCompilation")));
} }
/* This test uses several methods from sun.misc.Unsafe. The method /* This test uses several methods from jdk.internal.misc.Unsafe. The method
* getMethod() returns a different Executable for each different * getMethod() returns a different Executable for each different
* combination of its input parameters. There are eight possible * combination of its input parameters. There are eight possible
* combinations, getMethod can return an Executable representing * combinations, getMethod can return an Executable representing
@ -74,7 +74,7 @@ public class IntrinsicDisabledTest {
String methodTypeName = isChar ? "Char" : "Int"; String methodTypeName = isChar ? "Char" : "Int";
try { try {
Class aClass = Class.forName("sun.misc.Unsafe"); Class aClass = Class.forName("jdk.internal.misc.Unsafe");
if (isPut) { if (isPut) {
aMethod = aClass.getDeclaredMethod("put" + methodTypeName + (isVolatile ? "Volatile" : ""), aMethod = aClass.getDeclaredMethod("put" + methodTypeName + (isVolatile ? "Volatile" : ""),
Object.class, Object.class,

View File

@ -0,0 +1,166 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/*
* @test
* @bug 8150669
* @summary C1 intrinsic for Class.isPrimitive
* @modules java.base/jdk.internal.misc
* @run main/othervm -ea -Diters=200 -Xint TestClassIsPrimitive
* @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=1 TestClassIsPrimitive
* @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=4 TestClassIsPrimitive
*/
import java.lang.reflect.Field;
import java.lang.reflect.Array;
import java.util.concurrent.Callable;
public class TestClassIsPrimitive {
static final int ITERS = Integer.getInteger("iters", 1);
public static void main(String... args) throws Exception {
testOK(true, InlineConstants::testBoolean);
testOK(true, InlineConstants::testByte);
testOK(true, InlineConstants::testShort);
testOK(true, InlineConstants::testChar);
testOK(true, InlineConstants::testInt);
testOK(true, InlineConstants::testFloat);
testOK(true, InlineConstants::testLong);
testOK(true, InlineConstants::testDouble);
testOK(false, InlineConstants::testObject);
testOK(false, InlineConstants::testArray);
testOK(true, StaticConstants::testBoolean);
testOK(true, StaticConstants::testByte);
testOK(true, StaticConstants::testShort);
testOK(true, StaticConstants::testChar);
testOK(true, StaticConstants::testInt);
testOK(true, StaticConstants::testFloat);
testOK(true, StaticConstants::testLong);
testOK(true, StaticConstants::testDouble);
testOK(false, StaticConstants::testObject);
testOK(false, StaticConstants::testArray);
testNPE( StaticConstants::testNull);
testOK(true, NoConstants::testBoolean);
testOK(true, NoConstants::testByte);
testOK(true, NoConstants::testShort);
testOK(true, NoConstants::testChar);
testOK(true, NoConstants::testInt);
testOK(true, NoConstants::testFloat);
testOK(true, NoConstants::testLong);
testOK(true, NoConstants::testDouble);
testOK(false, NoConstants::testObject);
testOK(false, NoConstants::testArray);
testNPE( NoConstants::testNull);
}
public static void testOK(boolean expected, Callable<Object> test) throws Exception {
for (int c = 0; c < ITERS; c++) {
Object res = test.call();
if (!res.equals(expected)) {
throw new IllegalStateException("Wrong result: expected = " + expected + ", but got " + res);
}
}
}
static volatile Object sink;
public static void testNPE(Callable<Object> test) throws Exception {
for (int c = 0; c < ITERS; c++) {
try {
sink = test.call();
throw new IllegalStateException("Expected NPE");
} catch (NullPointerException iae) {
// expected
}
}
}
static volatile Class<?> classBoolean = boolean.class;
static volatile Class<?> classByte = byte.class;
static volatile Class<?> classShort = short.class;
static volatile Class<?> classChar = char.class;
static volatile Class<?> classInt = int.class;
static volatile Class<?> classFloat = float.class;
static volatile Class<?> classLong = long.class;
static volatile Class<?> classDouble = double.class;
static volatile Class<?> classObject = Object.class;
static volatile Class<?> classArray = Object[].class;
static volatile Class<?> classNull = null;
static final Class<?> staticClassBoolean = boolean.class;
static final Class<?> staticClassByte = byte.class;
static final Class<?> staticClassShort = short.class;
static final Class<?> staticClassChar = char.class;
static final Class<?> staticClassInt = int.class;
static final Class<?> staticClassFloat = float.class;
static final Class<?> staticClassLong = long.class;
static final Class<?> staticClassDouble = double.class;
static final Class<?> staticClassObject = Object.class;
static final Class<?> staticClassArray = Object[].class;
static final Class<?> staticClassNull = null;
static class InlineConstants {
static boolean testBoolean() { return boolean.class.isPrimitive(); }
static boolean testByte() { return byte.class.isPrimitive(); }
static boolean testShort() { return short.class.isPrimitive(); }
static boolean testChar() { return char.class.isPrimitive(); }
static boolean testInt() { return int.class.isPrimitive(); }
static boolean testFloat() { return float.class.isPrimitive(); }
static boolean testLong() { return long.class.isPrimitive(); }
static boolean testDouble() { return double.class.isPrimitive(); }
static boolean testObject() { return Object.class.isPrimitive(); }
static boolean testArray() { return Object[].class.isPrimitive(); }
}
static class StaticConstants {
static boolean testBoolean() { return staticClassBoolean.isPrimitive(); }
static boolean testByte() { return staticClassByte.isPrimitive(); }
static boolean testShort() { return staticClassShort.isPrimitive(); }
static boolean testChar() { return staticClassChar.isPrimitive(); }
static boolean testInt() { return staticClassInt.isPrimitive(); }
static boolean testFloat() { return staticClassFloat.isPrimitive(); }
static boolean testLong() { return staticClassLong.isPrimitive(); }
static boolean testDouble() { return staticClassDouble.isPrimitive(); }
static boolean testObject() { return staticClassObject.isPrimitive(); }
static boolean testArray() { return staticClassArray.isPrimitive(); }
static boolean testNull() { return staticClassNull.isPrimitive(); }
}
static class NoConstants {
static boolean testBoolean() { return classBoolean.isPrimitive(); }
static boolean testByte() { return classByte.isPrimitive(); }
static boolean testShort() { return classShort.isPrimitive(); }
static boolean testChar() { return classChar.isPrimitive(); }
static boolean testInt() { return classInt.isPrimitive(); }
static boolean testFloat() { return classFloat.isPrimitive(); }
static boolean testLong() { return classLong.isPrimitive(); }
static boolean testDouble() { return classDouble.isPrimitive(); }
static boolean testObject() { return classObject.isPrimitive(); }
static boolean testArray() { return classArray.isPrimitive(); }
static boolean testNull() { return classNull.isPrimitive(); }
}
}

View File

@ -0,0 +1,213 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/*
* @test
* @bug 8150465
* @summary Unsafe methods to produce uninitialized arrays
* @modules java.base/jdk.internal.misc
* @run main/othervm -ea -Diters=200 -Xint AllocateUninitializedArray
* @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=1 AllocateUninitializedArray
* @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=4 AllocateUninitializedArray
*/
import java.lang.reflect.Field;
import java.lang.reflect.Array;
import java.util.concurrent.Callable;
public class AllocateUninitializedArray {
static final int ITERS = Integer.getInteger("iters", 1);
static final jdk.internal.misc.Unsafe UNSAFE;
static {
try {
Field f = jdk.internal.misc.Unsafe.class.getDeclaredField("theUnsafe");
f.setAccessible(true);
UNSAFE = (jdk.internal.misc.Unsafe) f.get(null);
} catch (Exception e) {
throw new RuntimeException("Unable to get Unsafe instance.", e);
}
}
public static void main(String... args) throws Exception {
testIAE(AllConstants::testObject);
testIAE(LengthIsConstant::testObject);
testIAE(ClassIsConstant::testObject);
testIAE(NothingIsConstant::testObject);
testIAE(AllConstants::testArray);
testIAE(LengthIsConstant::testArray);
testIAE(ClassIsConstant::testArray);
testIAE(NothingIsConstant::testArray);
testIAE(AllConstants::testNull);
testIAE(LengthIsConstant::testNull);
testIAE(ClassIsConstant::testNull);
testIAE(NothingIsConstant::testNull);
testOK(boolean[].class, 10, AllConstants::testBoolean);
testOK(byte[].class, 10, AllConstants::testByte);
testOK(short[].class, 10, AllConstants::testShort);
testOK(char[].class, 10, AllConstants::testChar);
testOK(int[].class, 10, AllConstants::testInt);
testOK(float[].class, 10, AllConstants::testFloat);
testOK(long[].class, 10, AllConstants::testLong);
testOK(double[].class, 10, AllConstants::testDouble);
testOK(boolean[].class, 10, LengthIsConstant::testBoolean);
testOK(byte[].class, 10, LengthIsConstant::testByte);
testOK(short[].class, 10, LengthIsConstant::testShort);
testOK(char[].class, 10, LengthIsConstant::testChar);
testOK(int[].class, 10, LengthIsConstant::testInt);
testOK(float[].class, 10, LengthIsConstant::testFloat);
testOK(long[].class, 10, LengthIsConstant::testLong);
testOK(double[].class, 10, LengthIsConstant::testDouble);
testOK(boolean[].class, 10, ClassIsConstant::testBoolean);
testOK(byte[].class, 10, ClassIsConstant::testByte);
testOK(short[].class, 10, ClassIsConstant::testShort);
testOK(char[].class, 10, ClassIsConstant::testChar);
testOK(int[].class, 10, ClassIsConstant::testInt);
testOK(float[].class, 10, ClassIsConstant::testFloat);
testOK(long[].class, 10, ClassIsConstant::testLong);
testOK(double[].class, 10, ClassIsConstant::testDouble);
testOK(boolean[].class, 10, NothingIsConstant::testBoolean);
testOK(byte[].class, 10, NothingIsConstant::testByte);
testOK(short[].class, 10, NothingIsConstant::testShort);
testOK(char[].class, 10, NothingIsConstant::testChar);
testOK(int[].class, 10, NothingIsConstant::testInt);
testOK(float[].class, 10, NothingIsConstant::testFloat);
testOK(long[].class, 10, NothingIsConstant::testLong);
testOK(double[].class, 10, NothingIsConstant::testDouble);
}
public static void testOK(Class<?> expectClass, int expectLen, Callable<Object> test) throws Exception {
for (int c = 0; c < ITERS; c++) {
Object res = test.call();
Class<?> actualClass = res.getClass();
if (!actualClass.equals(expectClass)) {
throw new IllegalStateException("Wrong class: expected = " + expectClass + ", but got " + actualClass);
}
int actualLen = Array.getLength(res);
if (actualLen != expectLen) {
throw new IllegalStateException("Wrong length: expected = " + expectLen + ", but got " + actualLen);
}
}
}
static volatile Object sink;
public static void testIAE(Callable<Object> test) throws Exception {
for (int c = 0; c < ITERS; c++) {
try {
sink = test.call();
throw new IllegalStateException("Expected IAE");
} catch (IllegalArgumentException iae) {
// expected
}
}
}
static volatile int sampleLenNeg = -1;
static volatile int sampleLenZero = 0;
static volatile int sampleLen = 10;
static volatile Class<?> classBoolean = boolean.class;
static volatile Class<?> classByte = byte.class;
static volatile Class<?> classShort = short.class;
static volatile Class<?> classChar = char.class;
static volatile Class<?> classInt = int.class;
static volatile Class<?> classFloat = float.class;
static volatile Class<?> classLong = long.class;
static volatile Class<?> classDouble = double.class;
static volatile Class<?> classObject = Object.class;
static volatile Class<?> classArray = Object[].class;
static volatile Class<?> classNull = null;
static class AllConstants {
static Object testBoolean() { return UNSAFE.allocateUninitializedArray(boolean.class, 10); }
static Object testByte() { return UNSAFE.allocateUninitializedArray(byte.class, 10); }
static Object testShort() { return UNSAFE.allocateUninitializedArray(short.class, 10); }
static Object testChar() { return UNSAFE.allocateUninitializedArray(char.class, 10); }
static Object testInt() { return UNSAFE.allocateUninitializedArray(int.class, 10); }
static Object testFloat() { return UNSAFE.allocateUninitializedArray(float.class, 10); }
static Object testLong() { return UNSAFE.allocateUninitializedArray(long.class, 10); }
static Object testDouble() { return UNSAFE.allocateUninitializedArray(double.class, 10); }
static Object testObject() { return UNSAFE.allocateUninitializedArray(Object.class, 10); }
static Object testArray() { return UNSAFE.allocateUninitializedArray(Object[].class, 10); }
static Object testNull() { return UNSAFE.allocateUninitializedArray(null, 10); }
static Object testZero() { return UNSAFE.allocateUninitializedArray(int.class, 0); }
static Object testNeg() { return UNSAFE.allocateUninitializedArray(int.class, -1); }
}
static class ClassIsConstant {
static Object testBoolean() { return UNSAFE.allocateUninitializedArray(boolean.class, sampleLen); }
static Object testByte() { return UNSAFE.allocateUninitializedArray(byte.class, sampleLen); }
static Object testShort() { return UNSAFE.allocateUninitializedArray(short.class, sampleLen); }
static Object testChar() { return UNSAFE.allocateUninitializedArray(char.class, sampleLen); }
static Object testInt() { return UNSAFE.allocateUninitializedArray(int.class, sampleLen); }
static Object testFloat() { return UNSAFE.allocateUninitializedArray(float.class, sampleLen); }
static Object testLong() { return UNSAFE.allocateUninitializedArray(long.class, sampleLen); }
static Object testDouble() { return UNSAFE.allocateUninitializedArray(double.class, sampleLen); }
static Object testObject() { return UNSAFE.allocateUninitializedArray(Object.class, sampleLen); }
static Object testArray() { return UNSAFE.allocateUninitializedArray(Object[].class, sampleLen); }
static Object testNull() { return UNSAFE.allocateUninitializedArray(null, sampleLen); }
static Object testZero() { return UNSAFE.allocateUninitializedArray(int.class, sampleLenZero); }
static Object testNeg() { return UNSAFE.allocateUninitializedArray(int.class, sampleLenNeg); }
}
static class LengthIsConstant {
static Object testBoolean() { return UNSAFE.allocateUninitializedArray(classBoolean, 10); }
static Object testByte() { return UNSAFE.allocateUninitializedArray(classByte, 10); }
static Object testShort() { return UNSAFE.allocateUninitializedArray(classShort, 10); }
static Object testChar() { return UNSAFE.allocateUninitializedArray(classChar, 10); }
static Object testInt() { return UNSAFE.allocateUninitializedArray(classInt, 10); }
static Object testFloat() { return UNSAFE.allocateUninitializedArray(classFloat, 10); }
static Object testLong() { return UNSAFE.allocateUninitializedArray(classLong, 10); }
static Object testDouble() { return UNSAFE.allocateUninitializedArray(classDouble, 10); }
static Object testObject() { return UNSAFE.allocateUninitializedArray(classObject, 10); }
static Object testArray() { return UNSAFE.allocateUninitializedArray(classArray, 10); }
static Object testNull() { return UNSAFE.allocateUninitializedArray(classNull, 10); }
static Object testZero() { return UNSAFE.allocateUninitializedArray(classInt, 0); }
static Object testNeg() { return UNSAFE.allocateUninitializedArray(classInt, -1); }
}
static class NothingIsConstant {
static Object testBoolean() { return UNSAFE.allocateUninitializedArray(classBoolean, sampleLen); }
static Object testByte() { return UNSAFE.allocateUninitializedArray(classByte, sampleLen); }
static Object testShort() { return UNSAFE.allocateUninitializedArray(classShort, sampleLen); }
static Object testChar() { return UNSAFE.allocateUninitializedArray(classChar, sampleLen); }
static Object testInt() { return UNSAFE.allocateUninitializedArray(classInt, sampleLen); }
static Object testFloat() { return UNSAFE.allocateUninitializedArray(classFloat, sampleLen); }
static Object testLong() { return UNSAFE.allocateUninitializedArray(classLong, sampleLen); }
static Object testDouble() { return UNSAFE.allocateUninitializedArray(classDouble, sampleLen); }
static Object testObject() { return UNSAFE.allocateUninitializedArray(classObject, sampleLen); }
static Object testArray() { return UNSAFE.allocateUninitializedArray(classArray, sampleLen); }
static Object testNull() { return UNSAFE.allocateUninitializedArray(classNull, sampleLen); }
static Object testZero() { return UNSAFE.allocateUninitializedArray(classInt, sampleLenZero); }
static Object testNeg() { return UNSAFE.allocateUninitializedArray(classInt, sampleLenNeg); }
}
}