This commit is contained in:
Igor Veresov 2015-11-09 22:43:30 +00:00
commit 51884084c1
25 changed files with 4872 additions and 2643 deletions

View File

@ -1079,10 +1079,10 @@ source %{
// and for a volatile write we need
//
// stlr<x>
//
//
// Alternatively, we can implement them by pairing a normal
// load/store with a memory barrier. For a volatile read we need
//
//
// ldr<x>
// dmb ishld
//
@ -1240,7 +1240,7 @@ source %{
// Alternatively, we can elide generation of the dmb instructions
// and plant the alternative CompareAndSwap macro-instruction
// sequence (which uses ldaxr<x>).
//
//
// Of course, the above only applies when we see these signature
// configurations. We still want to plant dmb instructions in any
// other cases where we may see a MemBarAcquire, MemBarRelease or
@ -1367,7 +1367,7 @@ source %{
opcode = parent->Opcode();
return opcode == Op_MemBarRelease;
}
// 2) card mark detection helper
// helper predicate which can be used to detect a volatile membar
@ -1383,7 +1383,7 @@ source %{
// true
//
// iii) the node's Mem projection feeds a StoreCM node.
bool is_card_mark_membar(const MemBarNode *barrier)
{
if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
@ -1402,7 +1402,7 @@ source %{
return true;
}
}
return false;
}
@ -1430,7 +1430,7 @@ source %{
// where
// || and \\ represent Ctl and Mem feeds via Proj nodes
// | \ and / indicate further routing of the Ctl and Mem feeds
//
//
// this is the graph we see for non-object stores. however, for a
// volatile Object store (StoreN/P) we may see other nodes below the
// leading membar because of the need for a GC pre- or post-write
@ -1592,7 +1592,7 @@ source %{
// ordering but neither will a releasing store (stlr). The latter
// guarantees that the object put is visible but does not guarantee
// that writes by other threads have also been observed.
//
//
// So, returning to the task of translating the object put and the
// leading/trailing membar nodes: what do the non-normal node graph
// look like for these 2 special cases? and how can we determine the
@ -1731,7 +1731,7 @@ source %{
// | | | |
// C | M | M | M |
// \ | | /
// . . .
// . . .
// (post write subtree elided)
// . . .
// C \ M /
@ -1812,12 +1812,12 @@ source %{
// | | | / /
// | Region . . . Phi[M] _____/
// | / | /
// | | /
// | | /
// | . . . . . . | /
// | / | /
// Region | | Phi[M]
// | | | / Bot
// \ MergeMem
// \ MergeMem
// \ /
// MemBarVolatile
//
@ -1858,7 +1858,7 @@ source %{
// to a trailing barrier via a MergeMem. That feed is either direct
// (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
// memory flow (for G1).
//
//
// The predicates controlling generation of instructions for store
// and barrier nodes employ a few simple helper functions (described
// below) which identify the presence or absence of all these
@ -2112,8 +2112,8 @@ source %{
x = x->in(MemNode::Memory);
} else {
// the merge should get its Bottom mem feed from the leading membar
x = mm->in(Compile::AliasIdxBot);
}
x = mm->in(Compile::AliasIdxBot);
}
// ensure this is a non control projection
if (!x->is_Proj() || x->is_CFG()) {
@ -2190,12 +2190,12 @@ source %{
// . . .
// |
// MemBarVolatile (card mark)
// | |
// | |
// | StoreCM
// | |
// | . . .
// Bot | /
// MergeMem
// Bot | /
// MergeMem
// |
// |
// MemBarVolatile {trailing}
@ -2203,10 +2203,10 @@ source %{
// 2)
// MemBarRelease/CPUOrder (leading)
// |
// |
// |
// |\ . . .
// | \ |
// | \ MemBarVolatile (card mark)
// | \ |
// | \ MemBarVolatile (card mark)
// | \ | |
// \ \ | StoreCM . . .
// \ \ |
@ -2231,7 +2231,7 @@ source %{
// | \ \ | StoreCM . . .
// | \ \ |
// \ \ Phi
// \ \ /
// \ \ /
// \ Phi
// \ /
// Phi . . .
@ -2506,7 +2506,7 @@ bool unnecessary_acquire(const Node *barrier)
return (x->is_Load() && x->as_Load()->is_acquire());
}
// now check for an unsafe volatile get
// need to check for
@ -2644,7 +2644,7 @@ bool needs_acquiring_load(const Node *n)
}
membar = child_membar(membar);
if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
return false;
}
@ -2703,7 +2703,7 @@ bool unnecessary_volatile(const Node *n)
// first we check if this is part of a card mark. if so then we have
// to generate a StoreLoad barrier
if (is_card_mark_membar(mbvol)) {
return false;
}
@ -2769,7 +2769,7 @@ bool needs_releasing_store(const Node *n)
if (!is_card_mark_membar(mbvol)) {
return true;
}
// we found a card mark -- just make sure we have a trailing barrier
return (card_mark_to_trailing(mbvol) != NULL);
@ -2808,7 +2808,7 @@ bool needs_acquiring_load_exclusive(const Node *n)
assert(barrier->Opcode() == Op_MemBarCPUOrder,
"CAS not fed by cpuorder membar!");
MemBarNode *b = parent_membar(barrier);
assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
"CAS not fed by cpuorder+release membar pair!");
@ -3463,6 +3463,17 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
// TODO
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
// Add rules here.
return ret_value; // Per default match rules are supported.
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@ -4663,7 +4674,7 @@ encode %{
call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
}
if (call == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
ciEnv::current()->record_failure("CodeCache is full");
return;
}
@ -4671,7 +4682,7 @@ encode %{
// Emit stub for static call
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
if (stub == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
ciEnv::current()->record_failure("CodeCache is full");
return;
}
}
@ -4681,7 +4692,7 @@ encode %{
MacroAssembler _masm(&cbuf);
address call = __ ic_call((address)$meth$$method);
if (call == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
ciEnv::current()->record_failure("CodeCache is full");
return;
}
%}
@ -4706,7 +4717,7 @@ encode %{
if (cb) {
address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
if (call == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
ciEnv::current()->record_failure("CodeCache is full");
return;
}
} else {

View File

@ -73,6 +73,7 @@ define_pd_global(bool, UseCISCSpill, true);
define_pd_global(bool, OptoScheduling, false);
define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, false);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
define_pd_global(intx, ReservedCodeCacheSize, 48*M);
define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);

View File

@ -61,6 +61,7 @@ define_pd_global(bool, OptoPeephole, false);
define_pd_global(bool, UseCISCSpill, false);
define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, false);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
// GL:
// Detected a problem with unscaled compressed oops and
// narrow_oop_use_complex_address() == false.

View File

@ -2064,6 +2064,17 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
// TODO
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
// Add rules here.
return ret_value; // Per default match rules are supported.
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@ -3416,7 +3427,7 @@ encode %{
// The stub for call to interpreter.
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
if (stub == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
ciEnv::current()->record_failure("CodeCache is full");
return;
}
}
@ -3465,7 +3476,7 @@ encode %{
// The stub for call to interpreter.
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
if (stub == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
ciEnv::current()->record_failure("CodeCache is full");
return;
}
@ -6912,7 +6923,7 @@ instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
n_compare->_opnds[0] = op_crx;
n_compare->_opnds[1] = op_src;
n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
n2->add_req(n_region, n_src, n1);
n2->_opnds[0] = op_dst;
@ -10589,7 +10600,7 @@ instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
// Needs matchrule, see cmpDUnordered.
match(Set crx (CmpF src1 src2));
match(Set crx (CmpF src1 src2));
// no match-rule, false predicate
predicate(false);
@ -10698,13 +10709,13 @@ instruct cmpF3_reg_reg_ExEx(iRegIdst dst, regF src1, regF src2) %{
%}
instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
// Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
// node right before the conditional move using it.
// Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
// node right before the conditional move using it.
// In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
// compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
// crashed in register allocation where the flags Reg between cmpDUnoredered and a
// conditional move was supposed to be spilled.
match(Set crx (CmpD src1 src2));
match(Set crx (CmpD src1 src2));
// False predicate, shall not be matched.
predicate(false);

View File

@ -65,6 +65,7 @@ define_pd_global(bool, UseCISCSpill, false);
define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoScheduling, true);
define_pd_global(bool, OptoRegScheduling, false);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
#ifdef _LP64
// We need to make sure that all generated code is within

View File

@ -1860,6 +1860,17 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
// TODO
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
// Add rules here.
return ret_value; // Per default match rules are supported.
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@ -1905,7 +1916,7 @@ const bool Matcher::misaligned_vectors_ok() {
}
// Current (2013) SPARC platforms need to read original key
// to construct decryption expanded key
// to construct decryption expanded key
const bool Matcher::pass_original_key_for_aes() {
return true;
}
@ -2612,7 +2623,7 @@ encode %{
if (stub == NULL && !(TraceJumps && Compile::current()->in_scratch_emit_size())) {
ciEnv::current()->record_failure("CodeCache is full");
return;
}
}
}
%}
@ -3132,10 +3143,10 @@ ins_attrib ins_size(32); // Required size attribute (in bits)
// AVOID_NONE - instruction can be placed anywhere
// AVOID_BEFORE - instruction cannot be placed after an
// instruction with MachNode::AVOID_AFTER
// AVOID_AFTER - the next instruction cannot be the one
// AVOID_AFTER - the next instruction cannot be the one
// with MachNode::AVOID_BEFORE
// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at
// the same time
// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at
// the same time
ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE);
ins_attrib ins_short_branch(0); // Required flag: is this instruction a

File diff suppressed because it is too large Load Diff

View File

@ -438,6 +438,8 @@ class ArrayAddress VALUE_OBJ_CLASS_SPEC {
};
class InstructionAttr;
// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
// See fxsave and xsave(EVEX enabled) documentation for layout
const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
@ -568,7 +570,8 @@ class Assembler : public AbstractAssembler {
EVEX_8bit = 0,
EVEX_16bit = 1,
EVEX_32bit = 2,
EVEX_64bit = 3
EVEX_64bit = 3,
EVEX_NObit = 4
};
enum WhichOperand {
@ -598,16 +601,12 @@ class Assembler : public AbstractAssembler {
private:
int _evex_encoding;
int _input_size_in_bits;
int _avx_vector_len;
int _tuple_type;
bool _is_evex_instruction;
bool _legacy_mode_bw;
bool _legacy_mode_dq;
bool _legacy_mode_vl;
bool _legacy_mode_vlbw;
bool _instruction_uses_vl;
class InstructionAttr *_attributes;
// 64bit prefixes
int prefix_and_encode(int reg_enc, bool byteinst = false);
@ -637,181 +636,30 @@ private:
int rex_prefix_and_encode(int dst_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
int vector_len);
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
bool is_extended_context, bool is_merge_context,
int vector_len, bool no_mask_reg );
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
int nds_enc, VexSimdPrefix pre, VexOpcode opc);
void vex_prefix(Address adr, int nds_enc, int xreg_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, int vector_len,
bool legacy_mode = false, bool no_mask_reg = false);
void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre, int vector_len = AVX_128bit,
bool no_mask_reg = false, bool legacy_mode = false) {
int dst_enc = dst->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
}
void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre, int vector_len = AVX_128bit,
bool no_mask_reg = false) {
int dst_enc = dst->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
}
void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
bool vex_w = false;
int vector_len = AVX_128bit;
vex_prefix(src, nds->encoding(), dst->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
vector_len, no_mask_reg);
}
void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
bool vex_w = false;
int vector_len = AVX_128bit;
vex_prefix(src, nds->encoding(), dst->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
vector_len, true, no_mask_reg);
}
void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
bool vex_w = true;
int vector_len = AVX_128bit;
vex_prefix(src, nds->encoding(), dst->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
vector_len, no_mask_reg);
}
void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
bool vex_w = true;
int vector_len = AVX_128bit;
vex_prefix(src, nds->encoding(), dst->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
vector_len, true, no_mask_reg);
}
InstructionAttr *attributes);
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, int vector_len,
bool legacy_mode, bool no_mask_reg);
InstructionAttr *attributes);
int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
bool vex_w = false;
int vector_len = AVX_128bit;
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
false, no_mask_reg);
}
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
int vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
bool vex_w = false;
int vector_len = AVX_128bit;
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
true, no_mask_reg);
}
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
bool vex_w = true;
int vector_len = AVX_128bit;
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
false, no_mask_reg);
}
int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
int vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
bool vex_w = true;
int vector_len = AVX_128bit;
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
true, no_mask_reg);
}
int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, int vector_len = AVX_128bit,
VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
bool no_mask_reg = false) {
int src_enc = src->encoding();
int dst_enc = dst->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
}
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
}
void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
simd_prefix(src, dst, pre, no_mask_reg);
}
void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre, bool no_mask_reg = false) {
bool rex_w = true;
simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, bool no_mask_reg,
VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, int vector_len = AVX_128bit,
bool legacy_mode = false);
int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
VexSimdPrefix pre, bool no_mask_reg,
VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, int vector_len = AVX_128bit);
int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
VexSimdPrefix pre, bool no_mask_reg,
VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, int vector_len = AVX_128bit);
// Move/convert 32-bit integer value.
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre, bool no_mask_reg) {
// It is OK to cast from Register to XMMRegister to pass argument here
// since only encoding is used in simd_prefix_and_encode() and number of
// Gen and Xmm registers are the same.
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
}
int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
}
int simd_prefix_and_encode(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool no_mask_reg = false) {
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
}
// Move/convert 64-bit integer value.
int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre, bool no_mask_reg = false) {
bool rex_w = true;
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
}
int simd_prefix_and_encode_q(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool no_mask_reg = false) {
bool rex_w = true;
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
}
int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
// Helper functions for groups of instructions
void emit_arith_b(int op1, int op2, Register dst, int imm8);
@ -821,27 +669,6 @@ private:
void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
void emit_arith(int op1, int op2, Register dst, Register src);
void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
Address src, VexSimdPrefix pre, int vector_len,
bool no_mask_reg = false, bool legacy_mode = false);
void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
Address src, VexSimdPrefix pre, int vector_len,
bool no_mask_reg = false);
void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
XMMRegister src, VexSimdPrefix pre, int vector_len,
bool no_mask_reg = false, bool legacy_mode = false);
void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
XMMRegister src, VexSimdPrefix pre, int vector_len,
bool no_mask_reg = false);
bool emit_compressed_disp_byte(int &disp);
void emit_operand(Register reg,
@ -986,18 +813,16 @@ private:
// belong in macro assembler but there is no need for both varieties to exist
void init_attributes(void) {
_evex_encoding = 0;
_input_size_in_bits = 0;
_avx_vector_len = AVX_NoVec;
_tuple_type = EVEX_ETUP;
_is_evex_instruction = false;
_legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
_legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
_legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
_legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
_instruction_uses_vl = false;
_attributes = NULL;
}
void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
void clear_attributes(void) { _attributes = NULL; }
void lea(Register dst, Address src);
void mov(Register dst, Register src);
@ -2106,12 +1931,12 @@ private:
void vextracti128h(Address dst, XMMRegister src);
// Copy low 256bit into high 256bit of ZMM registers.
void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vextracti64x4h(XMMRegister dst, XMMRegister src);
void vextractf64x4h(XMMRegister dst, XMMRegister src);
void vextractf64x4h(Address dst, XMMRegister src);
void vinsertf64x4h(XMMRegister dst, Address src);
void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
void vextractf64x4h(Address dst, XMMRegister src, int value);
void vinsertf64x4h(XMMRegister dst, Address src, int value);
// Copy targeted 128bit segments of the ZMM registers
void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
@ -2173,4 +1998,95 @@ private:
};
// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
// are applied.
class InstructionAttr {
public:
InstructionAttr(
int vector_len,
bool rex_vex_w,
bool legacy_mode,
bool no_reg_mask,
bool uses_vl)
:
_avx_vector_len(vector_len),
_rex_vex_w(rex_vex_w),
_legacy_mode(legacy_mode),
_no_reg_mask(no_reg_mask),
_uses_vl(uses_vl),
_tuple_type(Assembler::EVEX_ETUP),
_input_size_in_bits(Assembler::EVEX_NObit),
_is_evex_instruction(false),
_evex_encoding(0),
_is_clear_context(false),
_is_extended_context(false),
_current_assembler(NULL) {
if (UseAVX < 3) _legacy_mode = true;
}
~InstructionAttr() {
if (_current_assembler != NULL) {
_current_assembler->clear_attributes();
}
_current_assembler = NULL;
}
private:
int _avx_vector_len;
bool _rex_vex_w;
bool _legacy_mode;
bool _no_reg_mask;
bool _uses_vl;
int _tuple_type;
int _input_size_in_bits;
bool _is_evex_instruction;
int _evex_encoding;
bool _is_clear_context;
bool _is_extended_context;
Assembler *_current_assembler;
public:
// query functions for field accessors
int get_vector_len(void) const { return _avx_vector_len; }
bool is_rex_vex_w(void) const { return _rex_vex_w; }
bool is_legacy_mode(void) const { return _legacy_mode; }
bool is_no_reg_mask(void) const { return _no_reg_mask; }
bool uses_vl(void) const { return _uses_vl; }
int get_tuple_type(void) const { return _tuple_type; }
int get_input_size(void) const { return _input_size_in_bits; }
int is_evex_instruction(void) const { return _is_evex_instruction; }
int get_evex_encoding(void) const { return _evex_encoding; }
bool is_clear_context(void) const { return _is_clear_context; }
bool is_extended_context(void) const { return _is_extended_context; }
// Set the vector len manually
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
// Set the instruction to be encoded in AVX mode
void set_is_legacy_mode(void) { _legacy_mode = true; }
// Set the current instuction to be encoded as an EVEX instuction
void set_is_evex_instruction(void) { _is_evex_instruction = true; }
// Internal encoding data used in compressed immediate offset programming
void set_evex_encoding(int value) { _evex_encoding = value; }
// Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
void set_is_clear_context(void) { _is_clear_context = true; }
// Map back to current asembler so that we can manage object level assocation
void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
// Address modifiers used for compressed displacement calculation
void set_address_attributes(int tuple_type, int input_size_in_bits) {
if (VM_Version::supports_evex()) {
_tuple_type = tuple_type;
_input_size_in_bits = input_size_in_bits;
}
}
};
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP

View File

@ -3714,7 +3714,7 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
__ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
}
if (UseAVX > 1) {
if (UseAVX > 0) {
__ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
ExternalAddress((address)float_signflip_pool));
} else {
@ -3725,7 +3725,7 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
__ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
}
if (UseAVX > 1) {
if (UseAVX > 0) {
__ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
ExternalAddress((address)double_signflip_pool));
} else {

View File

@ -84,6 +84,7 @@ define_pd_global(bool, UseCISCSpill, true);
define_pd_global(bool, OptoScheduling, false);
define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, true);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, true);
define_pd_global(intx, ReservedCodeCacheSize, 48*M);
define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);

View File

@ -58,6 +58,4 @@ void Compile::pd_compiler2_init() {
OptoReg::invalidate(i);
}
}
SuperWordLoopUnrollAnalysis = true;
}

File diff suppressed because it is too large Load Diff

View File

@ -962,10 +962,15 @@ public:
void divss(XMMRegister dst, AddressLiteral src);
// Move Unaligned Double Quadword
void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); }
void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); }
void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); }
void movdqu(Address dst, XMMRegister src);
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
void movdqu(XMMRegister dst, AddressLiteral src);
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(XMMRegister dst, AddressLiteral src);
// Move Aligned Double Quadword
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
@ -1024,12 +1029,12 @@ public:
void ucomisd(XMMRegister dst, AddressLiteral src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
void xorpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
void xorpd(XMMRegister dst, AddressLiteral src);
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, AddressLiteral src);
@ -1047,6 +1052,39 @@ public:
void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);

View File

@ -192,31 +192,22 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
} else if(UseSSE >= 2) {
// Save whole 128bit (16 bytes) XMM regiters
if (VM_Version::supports_avx512novl()) {
for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf32x4h(Address(rsp, off*wordSize), as_XMMRegister(n), 0);
off += delta;
}
} else {
for (int n = 0; n < num_xmm_regs; n++) {
__ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
off += delta;
}
for (int n = 0; n < num_xmm_regs; n++) {
__ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
off += delta;
}
}
if (vect_words > 0) {
assert(vect_words*wordSize == 128, "");
__ subptr(rsp, 128); // Save upper half of YMM registes
off = 0;
for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
__ vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
}
if (UseAVX > 2) {
__ subptr(rsp, 256); // Save upper half of ZMM registes
off = 0;
for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n));
__ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
}
}
}
@ -285,31 +276,23 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
off += delta;
}
} else if (UseSSE >= 2) {
if (VM_Version::supports_avx512novl()) {
for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf32x4h(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes), 0);
off += delta;
}
} else {
for (int n = 0; n < num_xmm_regs; n++) {
__ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
off += delta;
}
for (int n = 0; n < num_xmm_regs; n++) {
__ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
off += delta;
}
}
if (restore_vectors) {
assert(additional_frame_bytes == 128, "");
if (UseAVX > 2) {
off = 0;
// Restore upper half of ZMM registers.
for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32));
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
}
__ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes
}
// Restore upper half of YMM registes.
assert(additional_frame_bytes == 128, "");
off = 0;
for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
__ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
}
__ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes
}

View File

@ -72,45 +72,28 @@ class SimpleRuntimeFrame {
class RegisterSaver {
// Capture info about frame layout. Layout offsets are in jint
// units because compiler frame slots are jints.
#define HALF_ZMM_BANK_WORDS 128
#define XSAVE_AREA_BEGIN 160
#define XSAVE_AREA_YMM_BEGIN 576
#define XSAVE_AREA_ZMM_BEGIN 1152
#define XSAVE_AREA_UPPERBANK 1664
#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
#define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
enum layout {
fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area
xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area
DEF_XMM_OFFS(0),
DEF_XMM_OFFS(1),
DEF_XMM_OFFS(2),
DEF_XMM_OFFS(3),
DEF_XMM_OFFS(4),
DEF_XMM_OFFS(5),
DEF_XMM_OFFS(6),
DEF_XMM_OFFS(7),
DEF_XMM_OFFS(8),
DEF_XMM_OFFS(9),
DEF_XMM_OFFS(10),
DEF_XMM_OFFS(11),
DEF_XMM_OFFS(12),
DEF_XMM_OFFS(13),
DEF_XMM_OFFS(14),
DEF_XMM_OFFS(15),
zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
// 2..15 are implied in range usage
ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
DEF_YMM_OFFS(0),
DEF_YMM_OFFS(1),
// 2..15 are implied in range usage
zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,
DEF_ZMM_OFFS(16),
DEF_ZMM_OFFS(17),
DEF_ZMM_OFFS(18),
DEF_ZMM_OFFS(19),
DEF_ZMM_OFFS(20),
DEF_ZMM_OFFS(21),
DEF_ZMM_OFFS(22),
DEF_ZMM_OFFS(23),
DEF_ZMM_OFFS(24),
DEF_ZMM_OFFS(25),
DEF_ZMM_OFFS(26),
DEF_ZMM_OFFS(27),
DEF_ZMM_OFFS(28),
DEF_ZMM_OFFS(29),
DEF_ZMM_OFFS(30),
DEF_ZMM_OFFS(31),
// 18..31 are implied in range usage
fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
fpu_stateH_end,
r15_off, r15H_off,
@ -160,8 +143,6 @@ class RegisterSaver {
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
int vect_words = 0;
int ymmhi_offset = -1;
int off = 0;
int num_xmm_regs = XMMRegisterImpl::number_of_registers;
if (UseAVX < 3) {
@ -171,24 +152,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
if (save_vectors) {
assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
// Save upper half of YMM registers
vect_words = 16 * num_xmm_regs / wordSize;
if (UseAVX < 3) {
ymmhi_offset = additional_frame_words;
additional_frame_words += vect_words;
}
}
#else
assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
#endif
// Always make the frame size 16-byte aligned
int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
reg_save_size*BytesPerInt, num_xmm_regs);
// Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
// OopMap frame size is in compiler stack slots (jint's) not bytes or words
int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
// The caller will allocate additional_frame_words
int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
// CodeBlob frame size is in words.
int frame_size_in_words = frame_size_in_bytes / wordSize;
*total_frame_words = frame_size_in_words;
@ -203,12 +175,34 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
__ push_CPU_state(); // Push a multiple of 16 bytes
// push cpu state handles this on EVEX enabled targets
if ((vect_words > 0) && (UseAVX < 3)) {
assert(vect_words*wordSize >= 256, "");
// Save upper half of YMM registes(0..num_xmm_regs)
__ subptr(rsp, num_xmm_regs*16);
for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
if (save_vectors) {
// Save upper half of YMM registes(0..15)
int base_addr = XSAVE_AREA_YMM_BEGIN;
for (int n = 0; n < 16; n++) {
__ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
}
if (VM_Version::supports_evex()) {
// Save upper half of ZMM registes(0..15)
base_addr = XSAVE_AREA_ZMM_BEGIN;
for (int n = 0; n < 16; n++) {
__ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
}
// Save full ZMM registes(16..num_xmm_regs)
base_addr = XSAVE_AREA_UPPERBANK;
int off = 0;
int vector_len = Assembler::AVX_512bit;
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
}
}
} else {
if (VM_Version::supports_evex()) {
// Save upper bank of ZMM registers(16..31) for double/float usage
int base_addr = XSAVE_AREA_UPPERBANK;
int off = 0;
for (int n = 16; n < num_xmm_regs; n++) {
__ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
}
}
}
if (frame::arg_reg_save_area_bytes != 0) {
@ -224,8 +218,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
OopMapSet *oop_maps = new OopMapSet();
OopMap* map = new OopMap(frame_size_in_slots, 0);
#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
#define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
#define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
@ -257,31 +250,21 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
off = zmm16_off;
delta = zmm17_off - off;
for (int n = 16; n < num_xmm_regs; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
XMMRegister zmm_name = as_XMMRegister(n);
map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
off += delta;
}
}
#if defined(COMPILER2) || INCLUDE_JVMCI
if (save_vectors) {
assert(ymmhi_offset != -1, "save area must exist");
map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4));
off = ymm0_off;
int delta = ymm1_off - off;
for (int n = 0; n < 16; n++) {
XMMRegister ymm_name = as_XMMRegister(n);
map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
off += delta;
}
}
#endif // COMPILER2 || INCLUDE_JVMCI
@ -316,8 +299,8 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
off = zmm16H_off;
delta = zmm17H_off - off;
for (int n = 16; n < num_xmm_regs; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
XMMRegister zmm_name = as_XMMRegister(n);
map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
off += delta;
}
}
@ -335,21 +318,48 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
// Pop arg register save area
__ addptr(rsp, frame::arg_reg_save_area_bytes);
}
#if defined(COMPILER2) || INCLUDE_JVMCI
// On EVEX enabled targets everything is handled in pop fpu state
if ((restore_vectors) && (UseAVX < 3)) {
assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
int off = 0;
// Restore upper half of YMM registes (0..num_xmm_regs)
for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
}
__ addptr(rsp, num_xmm_regs*16);
if (restore_vectors) {
assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
}
#else
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
assert(!save_vectors, "vectors are generated only by C2");
#endif
// On EVEX enabled targets everything is handled in pop fpu state
if (restore_vectors) {
// Restore upper half of YMM registes (0..15)
int base_addr = XSAVE_AREA_YMM_BEGIN;
for (int n = 0; n < 16; n++) {
__ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16));
}
if (VM_Version::supports_evex()) {
// Restore upper half of ZMM registes (0..15)
base_addr = XSAVE_AREA_ZMM_BEGIN;
for (int n = 0; n < 16; n++) {
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
}
// Restore full ZMM registes(16..num_xmm_regs)
base_addr = XSAVE_AREA_UPPERBANK;
int vector_len = Assembler::AVX_512bit;
int off = 0;
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
}
}
} else {
if (VM_Version::supports_evex()) {
// Restore upper bank of ZMM registes(16..31) for double/float usage
int base_addr = XSAVE_AREA_UPPERBANK;
int off = 0;
for (int n = 16; n < num_xmm_regs; n++) {
__ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
}
}
}
// Recover CPU state
__ pop_CPU_state();
// Get the rbp described implicitly by the calling convention (no oopMap)

View File

@ -273,7 +273,7 @@ class StubGenerator: public StubCodeGenerator {
if (UseAVX > 2) {
last_reg = 31;
}
if (VM_Version::supports_avx512novl()) {
if (VM_Version::supports_evex()) {
for (int i = xmm_save_first; i <= last_reg; i++) {
__ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
}
@ -391,7 +391,7 @@ class StubGenerator: public StubCodeGenerator {
// restore regs belonging to calling function
#ifdef _WIN64
// emit the restores for xmm regs
if (VM_Version::supports_avx512novl()) {
if (VM_Version::supports_evex()) {
for (int i = xmm_save_first; i <= last_reg; i++) {
__ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
}

View File

@ -891,7 +891,7 @@ void VM_Version::get_processor_features() {
UseNewLongLShift = true;
}
if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
if( supports_sse4a() ) {
if (supports_sse4a()) {
UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
} else {
UseXmmLoadAndClearUpper = false;

View File

@ -552,6 +552,19 @@ protected:
break;
}
}
// zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
if (retVal == false) {
// Verify that OS save/restore all bits of EVEX registers
// during signal processing.
int nreg = 2 LP64_ONLY(+2);
retVal = true;
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
retVal = false;
break;
}
}
}
}
return retVal;
}
@ -706,6 +719,9 @@ public:
static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; }
static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
// Intel features
static bool is_intel_family_core() { return is_intel() &&
extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }

File diff suppressed because it is too large Load Diff

View File

@ -291,9 +291,7 @@ static int pre_call_resets_size() {
size += 6; // fldcw
}
if (C->max_vector_size() > 16) {
if(UseAVX <= 2) {
size += 3; // vzeroupper
}
size += 3; // vzeroupper
}
return size;
}
@ -1915,7 +1913,7 @@ encode %{
if (stub == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
return;
}
}
}
%}

View File

@ -536,11 +536,7 @@ source %{
#define __ _masm.
static int clear_avx_size() {
if(UseAVX > 2) {
return 0; // vzeroupper is ignored
} else {
return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper
}
return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper
}
// !!!!! Special hack to get all types of calls to specify the byte offset
@ -871,7 +867,7 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
if (framesize > 0) {
st->print("\n\t");
st->print("addq rbp, #%d", framesize);
}
}
}
}

View File

@ -186,9 +186,9 @@
"Maximum number of unrolls for main loop") \
range(0, max_jint) \
\
product(bool, SuperWordLoopUnrollAnalysis, false, \
"Map number of unrolls for main loop via " \
"Superword Level Parallelism analysis") \
product_pd(bool, SuperWordLoopUnrollAnalysis, \
"Map number of unrolls for main loop via " \
"Superword Level Parallelism analysis") \
\
notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \
"Trace what Superword Level Parallelism analysis applies") \

View File

@ -269,6 +269,10 @@ public:
// should generate this one.
static const bool match_rule_supported(int opcode);
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
static const bool match_rule_supported_vector(int opcode, int vlen);
// Some uarchs have different sized float register resources
static const int float_pressure(int default_pressure_threshold);

View File

@ -2247,7 +2247,10 @@ void SuperWord::output() {
NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
// For atomic unrolled loops which are vector mapped, instigate more unrolling.
cl->set_notpassed_slp();
C->set_major_progress();
// if vector resources are limited, do not allow additional unrolling
if (FLOATPRESSURE > 8) {
C->set_major_progress();
}
cl->mark_do_unroll_only();
}
}

View File

@ -188,7 +188,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
int vopc = VectorNode::opcode(opc, bt);
return vopc > 0 && Matcher::match_rule_supported(vopc) && (vopc != Op_CMoveD || vlen == 4);
return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
}
return false;
}