Merge
This commit is contained in:
commit
51884084c1
@ -1079,10 +1079,10 @@ source %{
|
||||
// and for a volatile write we need
|
||||
//
|
||||
// stlr<x>
|
||||
//
|
||||
//
|
||||
// Alternatively, we can implement them by pairing a normal
|
||||
// load/store with a memory barrier. For a volatile read we need
|
||||
//
|
||||
//
|
||||
// ldr<x>
|
||||
// dmb ishld
|
||||
//
|
||||
@ -1240,7 +1240,7 @@ source %{
|
||||
// Alternatively, we can elide generation of the dmb instructions
|
||||
// and plant the alternative CompareAndSwap macro-instruction
|
||||
// sequence (which uses ldaxr<x>).
|
||||
//
|
||||
//
|
||||
// Of course, the above only applies when we see these signature
|
||||
// configurations. We still want to plant dmb instructions in any
|
||||
// other cases where we may see a MemBarAcquire, MemBarRelease or
|
||||
@ -1367,7 +1367,7 @@ source %{
|
||||
opcode = parent->Opcode();
|
||||
return opcode == Op_MemBarRelease;
|
||||
}
|
||||
|
||||
|
||||
// 2) card mark detection helper
|
||||
|
||||
// helper predicate which can be used to detect a volatile membar
|
||||
@ -1383,7 +1383,7 @@ source %{
|
||||
// true
|
||||
//
|
||||
// iii) the node's Mem projection feeds a StoreCM node.
|
||||
|
||||
|
||||
bool is_card_mark_membar(const MemBarNode *barrier)
|
||||
{
|
||||
if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
|
||||
@ -1402,7 +1402,7 @@ source %{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1430,7 +1430,7 @@ source %{
|
||||
// where
|
||||
// || and \\ represent Ctl and Mem feeds via Proj nodes
|
||||
// | \ and / indicate further routing of the Ctl and Mem feeds
|
||||
//
|
||||
//
|
||||
// this is the graph we see for non-object stores. however, for a
|
||||
// volatile Object store (StoreN/P) we may see other nodes below the
|
||||
// leading membar because of the need for a GC pre- or post-write
|
||||
@ -1592,7 +1592,7 @@ source %{
|
||||
// ordering but neither will a releasing store (stlr). The latter
|
||||
// guarantees that the object put is visible but does not guarantee
|
||||
// that writes by other threads have also been observed.
|
||||
//
|
||||
//
|
||||
// So, returning to the task of translating the object put and the
|
||||
// leading/trailing membar nodes: what do the non-normal node graph
|
||||
// look like for these 2 special cases? and how can we determine the
|
||||
@ -1731,7 +1731,7 @@ source %{
|
||||
// | | | |
|
||||
// C | M | M | M |
|
||||
// \ | | /
|
||||
// . . .
|
||||
// . . .
|
||||
// (post write subtree elided)
|
||||
// . . .
|
||||
// C \ M /
|
||||
@ -1812,12 +1812,12 @@ source %{
|
||||
// | | | / /
|
||||
// | Region . . . Phi[M] _____/
|
||||
// | / | /
|
||||
// | | /
|
||||
// | | /
|
||||
// | . . . . . . | /
|
||||
// | / | /
|
||||
// Region | | Phi[M]
|
||||
// | | | / Bot
|
||||
// \ MergeMem
|
||||
// \ MergeMem
|
||||
// \ /
|
||||
// MemBarVolatile
|
||||
//
|
||||
@ -1858,7 +1858,7 @@ source %{
|
||||
// to a trailing barrier via a MergeMem. That feed is either direct
|
||||
// (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
|
||||
// memory flow (for G1).
|
||||
//
|
||||
//
|
||||
// The predicates controlling generation of instructions for store
|
||||
// and barrier nodes employ a few simple helper functions (described
|
||||
// below) which identify the presence or absence of all these
|
||||
@ -2112,8 +2112,8 @@ source %{
|
||||
x = x->in(MemNode::Memory);
|
||||
} else {
|
||||
// the merge should get its Bottom mem feed from the leading membar
|
||||
x = mm->in(Compile::AliasIdxBot);
|
||||
}
|
||||
x = mm->in(Compile::AliasIdxBot);
|
||||
}
|
||||
|
||||
// ensure this is a non control projection
|
||||
if (!x->is_Proj() || x->is_CFG()) {
|
||||
@ -2190,12 +2190,12 @@ source %{
|
||||
// . . .
|
||||
// |
|
||||
// MemBarVolatile (card mark)
|
||||
// | |
|
||||
// | |
|
||||
// | StoreCM
|
||||
// | |
|
||||
// | . . .
|
||||
// Bot | /
|
||||
// MergeMem
|
||||
// Bot | /
|
||||
// MergeMem
|
||||
// |
|
||||
// |
|
||||
// MemBarVolatile {trailing}
|
||||
@ -2203,10 +2203,10 @@ source %{
|
||||
// 2)
|
||||
// MemBarRelease/CPUOrder (leading)
|
||||
// |
|
||||
// |
|
||||
// |
|
||||
// |\ . . .
|
||||
// | \ |
|
||||
// | \ MemBarVolatile (card mark)
|
||||
// | \ |
|
||||
// | \ MemBarVolatile (card mark)
|
||||
// | \ | |
|
||||
// \ \ | StoreCM . . .
|
||||
// \ \ |
|
||||
@ -2231,7 +2231,7 @@ source %{
|
||||
// | \ \ | StoreCM . . .
|
||||
// | \ \ |
|
||||
// \ \ Phi
|
||||
// \ \ /
|
||||
// \ \ /
|
||||
// \ Phi
|
||||
// \ /
|
||||
// Phi . . .
|
||||
@ -2506,7 +2506,7 @@ bool unnecessary_acquire(const Node *barrier)
|
||||
|
||||
return (x->is_Load() && x->as_Load()->is_acquire());
|
||||
}
|
||||
|
||||
|
||||
// now check for an unsafe volatile get
|
||||
|
||||
// need to check for
|
||||
@ -2644,7 +2644,7 @@ bool needs_acquiring_load(const Node *n)
|
||||
}
|
||||
|
||||
membar = child_membar(membar);
|
||||
|
||||
|
||||
if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
|
||||
return false;
|
||||
}
|
||||
@ -2703,7 +2703,7 @@ bool unnecessary_volatile(const Node *n)
|
||||
|
||||
// first we check if this is part of a card mark. if so then we have
|
||||
// to generate a StoreLoad barrier
|
||||
|
||||
|
||||
if (is_card_mark_membar(mbvol)) {
|
||||
return false;
|
||||
}
|
||||
@ -2769,7 +2769,7 @@ bool needs_releasing_store(const Node *n)
|
||||
if (!is_card_mark_membar(mbvol)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// we found a card mark -- just make sure we have a trailing barrier
|
||||
|
||||
return (card_mark_to_trailing(mbvol) != NULL);
|
||||
@ -2808,7 +2808,7 @@ bool needs_acquiring_load_exclusive(const Node *n)
|
||||
|
||||
assert(barrier->Opcode() == Op_MemBarCPUOrder,
|
||||
"CAS not fed by cpuorder membar!");
|
||||
|
||||
|
||||
MemBarNode *b = parent_membar(barrier);
|
||||
assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
|
||||
"CAS not fed by cpuorder+release membar pair!");
|
||||
@ -3463,6 +3463,17 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return true; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
|
||||
// TODO
|
||||
// identify extra cases that we might want to provide match rules for
|
||||
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
||||
bool ret_value = match_rule_supported(opcode);
|
||||
// Add rules here.
|
||||
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
@ -4663,7 +4674,7 @@ encode %{
|
||||
call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
|
||||
}
|
||||
if (call == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -4671,7 +4682,7 @@ encode %{
|
||||
// Emit stub for static call
|
||||
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
|
||||
if (stub == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -4681,7 +4692,7 @@ encode %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
address call = __ ic_call((address)$meth$$method);
|
||||
if (call == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
%}
|
||||
@ -4706,7 +4717,7 @@ encode %{
|
||||
if (cb) {
|
||||
address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
|
||||
if (call == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
|
@ -73,6 +73,7 @@ define_pd_global(bool, UseCISCSpill, true);
|
||||
define_pd_global(bool, OptoScheduling, false);
|
||||
define_pd_global(bool, OptoBundling, false);
|
||||
define_pd_global(bool, OptoRegScheduling, false);
|
||||
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
|
||||
|
||||
define_pd_global(intx, ReservedCodeCacheSize, 48*M);
|
||||
define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);
|
||||
|
@ -61,6 +61,7 @@ define_pd_global(bool, OptoPeephole, false);
|
||||
define_pd_global(bool, UseCISCSpill, false);
|
||||
define_pd_global(bool, OptoBundling, false);
|
||||
define_pd_global(bool, OptoRegScheduling, false);
|
||||
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
|
||||
// GL:
|
||||
// Detected a problem with unscaled compressed oops and
|
||||
// narrow_oop_use_complex_address() == false.
|
||||
|
@ -2064,6 +2064,17 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return true; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
|
||||
// TODO
|
||||
// identify extra cases that we might want to provide match rules for
|
||||
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
||||
bool ret_value = match_rule_supported(opcode);
|
||||
// Add rules here.
|
||||
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
@ -3416,7 +3427,7 @@ encode %{
|
||||
// The stub for call to interpreter.
|
||||
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
|
||||
if (stub == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -3465,7 +3476,7 @@ encode %{
|
||||
// The stub for call to interpreter.
|
||||
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
|
||||
if (stub == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -6912,7 +6923,7 @@ instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
|
||||
n_compare->_opnds[0] = op_crx;
|
||||
n_compare->_opnds[1] = op_src;
|
||||
n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
|
||||
|
||||
|
||||
decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
|
||||
n2->add_req(n_region, n_src, n1);
|
||||
n2->_opnds[0] = op_dst;
|
||||
@ -10589,7 +10600,7 @@ instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
|
||||
|
||||
instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
|
||||
// Needs matchrule, see cmpDUnordered.
|
||||
match(Set crx (CmpF src1 src2));
|
||||
match(Set crx (CmpF src1 src2));
|
||||
// no match-rule, false predicate
|
||||
predicate(false);
|
||||
|
||||
@ -10698,13 +10709,13 @@ instruct cmpF3_reg_reg_ExEx(iRegIdst dst, regF src1, regF src2) %{
|
||||
%}
|
||||
|
||||
instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
|
||||
// Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
|
||||
// node right before the conditional move using it.
|
||||
// Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
|
||||
// node right before the conditional move using it.
|
||||
// In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
|
||||
// compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
|
||||
// crashed in register allocation where the flags Reg between cmpDUnoredered and a
|
||||
// conditional move was supposed to be spilled.
|
||||
match(Set crx (CmpD src1 src2));
|
||||
match(Set crx (CmpD src1 src2));
|
||||
// False predicate, shall not be matched.
|
||||
predicate(false);
|
||||
|
||||
|
@ -65,6 +65,7 @@ define_pd_global(bool, UseCISCSpill, false);
|
||||
define_pd_global(bool, OptoBundling, false);
|
||||
define_pd_global(bool, OptoScheduling, true);
|
||||
define_pd_global(bool, OptoRegScheduling, false);
|
||||
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
|
||||
|
||||
#ifdef _LP64
|
||||
// We need to make sure that all generated code is within
|
||||
|
@ -1860,6 +1860,17 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return true; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
|
||||
// TODO
|
||||
// identify extra cases that we might want to provide match rules for
|
||||
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
||||
bool ret_value = match_rule_supported(opcode);
|
||||
// Add rules here.
|
||||
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
@ -1905,7 +1916,7 @@ const bool Matcher::misaligned_vectors_ok() {
|
||||
}
|
||||
|
||||
// Current (2013) SPARC platforms need to read original key
|
||||
// to construct decryption expanded key
|
||||
// to construct decryption expanded key
|
||||
const bool Matcher::pass_original_key_for_aes() {
|
||||
return true;
|
||||
}
|
||||
@ -2612,7 +2623,7 @@ encode %{
|
||||
if (stub == NULL && !(TraceJumps && Compile::current()->in_scratch_emit_size())) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
%}
|
||||
|
||||
@ -3132,10 +3143,10 @@ ins_attrib ins_size(32); // Required size attribute (in bits)
|
||||
// AVOID_NONE - instruction can be placed anywhere
|
||||
// AVOID_BEFORE - instruction cannot be placed after an
|
||||
// instruction with MachNode::AVOID_AFTER
|
||||
// AVOID_AFTER - the next instruction cannot be the one
|
||||
// AVOID_AFTER - the next instruction cannot be the one
|
||||
// with MachNode::AVOID_BEFORE
|
||||
// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at
|
||||
// the same time
|
||||
// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at
|
||||
// the same time
|
||||
ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE);
|
||||
|
||||
ins_attrib ins_short_branch(0); // Required flag: is this instruction a
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -438,6 +438,8 @@ class ArrayAddress VALUE_OBJ_CLASS_SPEC {
|
||||
|
||||
};
|
||||
|
||||
class InstructionAttr;
|
||||
|
||||
// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
|
||||
// See fxsave and xsave(EVEX enabled) documentation for layout
|
||||
const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
|
||||
@ -568,7 +570,8 @@ class Assembler : public AbstractAssembler {
|
||||
EVEX_8bit = 0,
|
||||
EVEX_16bit = 1,
|
||||
EVEX_32bit = 2,
|
||||
EVEX_64bit = 3
|
||||
EVEX_64bit = 3,
|
||||
EVEX_NObit = 4
|
||||
};
|
||||
|
||||
enum WhichOperand {
|
||||
@ -598,16 +601,12 @@ class Assembler : public AbstractAssembler {
|
||||
|
||||
private:
|
||||
|
||||
int _evex_encoding;
|
||||
int _input_size_in_bits;
|
||||
int _avx_vector_len;
|
||||
int _tuple_type;
|
||||
bool _is_evex_instruction;
|
||||
bool _legacy_mode_bw;
|
||||
bool _legacy_mode_dq;
|
||||
bool _legacy_mode_vl;
|
||||
bool _legacy_mode_vlbw;
|
||||
bool _instruction_uses_vl;
|
||||
|
||||
class InstructionAttr *_attributes;
|
||||
|
||||
// 64bit prefixes
|
||||
int prefix_and_encode(int reg_enc, bool byteinst = false);
|
||||
@ -637,181 +636,30 @@ private:
|
||||
int rex_prefix_and_encode(int dst_enc, int src_enc,
|
||||
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
|
||||
|
||||
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
|
||||
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
|
||||
int vector_len);
|
||||
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
|
||||
|
||||
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
|
||||
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
|
||||
bool is_extended_context, bool is_merge_context,
|
||||
int vector_len, bool no_mask_reg );
|
||||
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
|
||||
int nds_enc, VexSimdPrefix pre, VexOpcode opc);
|
||||
|
||||
void vex_prefix(Address adr, int nds_enc, int xreg_enc,
|
||||
VexSimdPrefix pre, VexOpcode opc,
|
||||
bool vex_w, int vector_len,
|
||||
bool legacy_mode = false, bool no_mask_reg = false);
|
||||
|
||||
void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
|
||||
VexSimdPrefix pre, int vector_len = AVX_128bit,
|
||||
bool no_mask_reg = false, bool legacy_mode = false) {
|
||||
int dst_enc = dst->encoding();
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
|
||||
}
|
||||
|
||||
void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
|
||||
VexSimdPrefix pre, int vector_len = AVX_128bit,
|
||||
bool no_mask_reg = false) {
|
||||
int dst_enc = dst->encoding();
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
|
||||
}
|
||||
|
||||
void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
|
||||
bool vex_w = false;
|
||||
int vector_len = AVX_128bit;
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(),
|
||||
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
|
||||
vector_len, no_mask_reg);
|
||||
}
|
||||
|
||||
void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
|
||||
bool vex_w = false;
|
||||
int vector_len = AVX_128bit;
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(),
|
||||
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
|
||||
vector_len, true, no_mask_reg);
|
||||
}
|
||||
|
||||
void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
|
||||
bool vex_w = true;
|
||||
int vector_len = AVX_128bit;
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(),
|
||||
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
|
||||
vector_len, no_mask_reg);
|
||||
}
|
||||
|
||||
void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
|
||||
bool vex_w = true;
|
||||
int vector_len = AVX_128bit;
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(),
|
||||
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
|
||||
vector_len, true, no_mask_reg);
|
||||
}
|
||||
InstructionAttr *attributes);
|
||||
|
||||
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
|
||||
VexSimdPrefix pre, VexOpcode opc,
|
||||
bool vex_w, int vector_len,
|
||||
bool legacy_mode, bool no_mask_reg);
|
||||
InstructionAttr *attributes);
|
||||
|
||||
int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
|
||||
bool vex_w = false;
|
||||
int vector_len = AVX_128bit;
|
||||
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
|
||||
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
|
||||
false, no_mask_reg);
|
||||
}
|
||||
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
|
||||
VexOpcode opc, InstructionAttr *attributes);
|
||||
|
||||
int vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
|
||||
bool vex_w = false;
|
||||
int vector_len = AVX_128bit;
|
||||
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
|
||||
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
|
||||
true, no_mask_reg);
|
||||
}
|
||||
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
|
||||
VexOpcode opc, InstructionAttr *attributes);
|
||||
|
||||
int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
|
||||
bool vex_w = true;
|
||||
int vector_len = AVX_128bit;
|
||||
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
|
||||
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
|
||||
false, no_mask_reg);
|
||||
}
|
||||
int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
|
||||
VexOpcode opc, InstructionAttr *attributes);
|
||||
|
||||
int vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
|
||||
bool vex_w = true;
|
||||
int vector_len = AVX_128bit;
|
||||
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
|
||||
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
|
||||
true, no_mask_reg);
|
||||
}
|
||||
|
||||
int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
VexSimdPrefix pre, int vector_len = AVX_128bit,
|
||||
VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
|
||||
bool no_mask_reg = false) {
|
||||
int src_enc = src->encoding();
|
||||
int dst_enc = dst->encoding();
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
|
||||
}
|
||||
|
||||
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
|
||||
VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
|
||||
bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
|
||||
|
||||
void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
|
||||
bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
|
||||
simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
|
||||
}
|
||||
|
||||
void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
|
||||
simd_prefix(src, dst, pre, no_mask_reg);
|
||||
}
|
||||
void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
|
||||
VexSimdPrefix pre, bool no_mask_reg = false) {
|
||||
bool rex_w = true;
|
||||
simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
|
||||
}
|
||||
|
||||
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
VexSimdPrefix pre, bool no_mask_reg,
|
||||
VexOpcode opc = VEX_OPCODE_0F,
|
||||
bool rex_w = false, int vector_len = AVX_128bit,
|
||||
bool legacy_mode = false);
|
||||
|
||||
int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
|
||||
VexSimdPrefix pre, bool no_mask_reg,
|
||||
VexOpcode opc = VEX_OPCODE_0F,
|
||||
bool rex_w = false, int vector_len = AVX_128bit);
|
||||
|
||||
int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
|
||||
VexSimdPrefix pre, bool no_mask_reg,
|
||||
VexOpcode opc = VEX_OPCODE_0F,
|
||||
bool rex_w = false, int vector_len = AVX_128bit);
|
||||
|
||||
// Move/convert 32-bit integer value.
|
||||
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
|
||||
VexSimdPrefix pre, bool no_mask_reg) {
|
||||
// It is OK to cast from Register to XMMRegister to pass argument here
|
||||
// since only encoding is used in simd_prefix_and_encode() and number of
|
||||
// Gen and Xmm registers are the same.
|
||||
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
|
||||
}
|
||||
int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
|
||||
return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
|
||||
}
|
||||
int simd_prefix_and_encode(Register dst, XMMRegister src,
|
||||
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
|
||||
bool no_mask_reg = false) {
|
||||
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
|
||||
}
|
||||
|
||||
// Move/convert 64-bit integer value.
|
||||
int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
|
||||
VexSimdPrefix pre, bool no_mask_reg = false) {
|
||||
bool rex_w = true;
|
||||
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
|
||||
}
|
||||
int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
|
||||
return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
|
||||
}
|
||||
int simd_prefix_and_encode_q(Register dst, XMMRegister src,
|
||||
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
|
||||
bool no_mask_reg = false) {
|
||||
bool rex_w = true;
|
||||
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
|
||||
}
|
||||
int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
|
||||
VexOpcode opc, InstructionAttr *attributes);
|
||||
|
||||
// Helper functions for groups of instructions
|
||||
void emit_arith_b(int op1, int op2, Register dst, int imm8);
|
||||
@ -821,27 +669,6 @@ private:
|
||||
void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
|
||||
void emit_arith(int op1, int op2, Register dst, Register src);
|
||||
|
||||
void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
|
||||
void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
|
||||
void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
|
||||
void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
|
||||
void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
|
||||
void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
|
||||
void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
|
||||
void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
|
||||
void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
|
||||
Address src, VexSimdPrefix pre, int vector_len,
|
||||
bool no_mask_reg = false, bool legacy_mode = false);
|
||||
void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
|
||||
Address src, VexSimdPrefix pre, int vector_len,
|
||||
bool no_mask_reg = false);
|
||||
void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
|
||||
XMMRegister src, VexSimdPrefix pre, int vector_len,
|
||||
bool no_mask_reg = false, bool legacy_mode = false);
|
||||
void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
|
||||
XMMRegister src, VexSimdPrefix pre, int vector_len,
|
||||
bool no_mask_reg = false);
|
||||
|
||||
bool emit_compressed_disp_byte(int &disp);
|
||||
|
||||
void emit_operand(Register reg,
|
||||
@ -986,18 +813,16 @@ private:
|
||||
// belong in macro assembler but there is no need for both varieties to exist
|
||||
|
||||
void init_attributes(void) {
|
||||
_evex_encoding = 0;
|
||||
_input_size_in_bits = 0;
|
||||
_avx_vector_len = AVX_NoVec;
|
||||
_tuple_type = EVEX_ETUP;
|
||||
_is_evex_instruction = false;
|
||||
_legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
|
||||
_legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
|
||||
_legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
|
||||
_legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
|
||||
_instruction_uses_vl = false;
|
||||
_attributes = NULL;
|
||||
}
|
||||
|
||||
void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
|
||||
void clear_attributes(void) { _attributes = NULL; }
|
||||
|
||||
void lea(Register dst, Address src);
|
||||
|
||||
void mov(Register dst, Register src);
|
||||
@ -2106,12 +1931,12 @@ private:
|
||||
void vextracti128h(Address dst, XMMRegister src);
|
||||
|
||||
// Copy low 256bit into high 256bit of ZMM registers.
|
||||
void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vextracti64x4h(XMMRegister dst, XMMRegister src);
|
||||
void vextractf64x4h(XMMRegister dst, XMMRegister src);
|
||||
void vextractf64x4h(Address dst, XMMRegister src);
|
||||
void vinsertf64x4h(XMMRegister dst, Address src);
|
||||
void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
|
||||
void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
|
||||
void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
|
||||
void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
|
||||
void vextractf64x4h(Address dst, XMMRegister src, int value);
|
||||
void vinsertf64x4h(XMMRegister dst, Address src, int value);
|
||||
|
||||
// Copy targeted 128bit segments of the ZMM registers
|
||||
void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
|
||||
@ -2173,4 +1998,95 @@ private:
|
||||
|
||||
};
|
||||
|
||||
// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
|
||||
// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
|
||||
// are applied.
|
||||
class InstructionAttr {
|
||||
public:
|
||||
InstructionAttr(
|
||||
int vector_len,
|
||||
bool rex_vex_w,
|
||||
bool legacy_mode,
|
||||
bool no_reg_mask,
|
||||
bool uses_vl)
|
||||
:
|
||||
_avx_vector_len(vector_len),
|
||||
_rex_vex_w(rex_vex_w),
|
||||
_legacy_mode(legacy_mode),
|
||||
_no_reg_mask(no_reg_mask),
|
||||
_uses_vl(uses_vl),
|
||||
_tuple_type(Assembler::EVEX_ETUP),
|
||||
_input_size_in_bits(Assembler::EVEX_NObit),
|
||||
_is_evex_instruction(false),
|
||||
_evex_encoding(0),
|
||||
_is_clear_context(false),
|
||||
_is_extended_context(false),
|
||||
_current_assembler(NULL) {
|
||||
if (UseAVX < 3) _legacy_mode = true;
|
||||
}
|
||||
|
||||
~InstructionAttr() {
|
||||
if (_current_assembler != NULL) {
|
||||
_current_assembler->clear_attributes();
|
||||
}
|
||||
_current_assembler = NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
int _avx_vector_len;
|
||||
bool _rex_vex_w;
|
||||
bool _legacy_mode;
|
||||
bool _no_reg_mask;
|
||||
bool _uses_vl;
|
||||
int _tuple_type;
|
||||
int _input_size_in_bits;
|
||||
bool _is_evex_instruction;
|
||||
int _evex_encoding;
|
||||
bool _is_clear_context;
|
||||
bool _is_extended_context;
|
||||
|
||||
Assembler *_current_assembler;
|
||||
|
||||
public:
|
||||
// query functions for field accessors
|
||||
int get_vector_len(void) const { return _avx_vector_len; }
|
||||
bool is_rex_vex_w(void) const { return _rex_vex_w; }
|
||||
bool is_legacy_mode(void) const { return _legacy_mode; }
|
||||
bool is_no_reg_mask(void) const { return _no_reg_mask; }
|
||||
bool uses_vl(void) const { return _uses_vl; }
|
||||
int get_tuple_type(void) const { return _tuple_type; }
|
||||
int get_input_size(void) const { return _input_size_in_bits; }
|
||||
int is_evex_instruction(void) const { return _is_evex_instruction; }
|
||||
int get_evex_encoding(void) const { return _evex_encoding; }
|
||||
bool is_clear_context(void) const { return _is_clear_context; }
|
||||
bool is_extended_context(void) const { return _is_extended_context; }
|
||||
|
||||
// Set the vector len manually
|
||||
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
|
||||
|
||||
// Set the instruction to be encoded in AVX mode
|
||||
void set_is_legacy_mode(void) { _legacy_mode = true; }
|
||||
|
||||
// Set the current instuction to be encoded as an EVEX instuction
|
||||
void set_is_evex_instruction(void) { _is_evex_instruction = true; }
|
||||
|
||||
// Internal encoding data used in compressed immediate offset programming
|
||||
void set_evex_encoding(int value) { _evex_encoding = value; }
|
||||
|
||||
// Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
|
||||
void set_is_clear_context(void) { _is_clear_context = true; }
|
||||
|
||||
// Map back to current asembler so that we can manage object level assocation
|
||||
void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
|
||||
|
||||
// Address modifiers used for compressed displacement calculation
|
||||
void set_address_attributes(int tuple_type, int input_size_in_bits) {
|
||||
if (VM_Version::supports_evex()) {
|
||||
_tuple_type = tuple_type;
|
||||
_input_size_in_bits = input_size_in_bits;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP
|
||||
|
@ -3714,7 +3714,7 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
|
||||
if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
|
||||
__ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
|
||||
}
|
||||
if (UseAVX > 1) {
|
||||
if (UseAVX > 0) {
|
||||
__ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
|
||||
ExternalAddress((address)float_signflip_pool));
|
||||
} else {
|
||||
@ -3725,7 +3725,7 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
|
||||
if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
|
||||
__ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
|
||||
}
|
||||
if (UseAVX > 1) {
|
||||
if (UseAVX > 0) {
|
||||
__ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
|
||||
ExternalAddress((address)double_signflip_pool));
|
||||
} else {
|
||||
|
@ -84,6 +84,7 @@ define_pd_global(bool, UseCISCSpill, true);
|
||||
define_pd_global(bool, OptoScheduling, false);
|
||||
define_pd_global(bool, OptoBundling, false);
|
||||
define_pd_global(bool, OptoRegScheduling, true);
|
||||
define_pd_global(bool, SuperWordLoopUnrollAnalysis, true);
|
||||
|
||||
define_pd_global(intx, ReservedCodeCacheSize, 48*M);
|
||||
define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);
|
||||
|
@ -58,6 +58,4 @@ void Compile::pd_compiler2_init() {
|
||||
OptoReg::invalidate(i);
|
||||
}
|
||||
}
|
||||
|
||||
SuperWordLoopUnrollAnalysis = true;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -962,10 +962,15 @@ public:
|
||||
void divss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
// Move Unaligned Double Quadword
|
||||
void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); }
|
||||
void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); }
|
||||
void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); }
|
||||
void movdqu(Address dst, XMMRegister src);
|
||||
void movdqu(XMMRegister dst, Address src);
|
||||
void movdqu(XMMRegister dst, XMMRegister src);
|
||||
void movdqu(XMMRegister dst, AddressLiteral src);
|
||||
// AVX Unaligned forms
|
||||
void vmovdqu(Address dst, XMMRegister src);
|
||||
void vmovdqu(XMMRegister dst, Address src);
|
||||
void vmovdqu(XMMRegister dst, XMMRegister src);
|
||||
void vmovdqu(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
// Move Aligned Double Quadword
|
||||
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
|
||||
@ -1024,12 +1029,12 @@ public:
|
||||
void ucomisd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
|
||||
void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
|
||||
void xorpd(XMMRegister dst, XMMRegister src);
|
||||
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
|
||||
void xorpd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
|
||||
void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
|
||||
void xorps(XMMRegister dst, XMMRegister src);
|
||||
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
|
||||
void xorps(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
@ -1047,6 +1052,39 @@ public:
|
||||
void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
|
||||
void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
|
||||
|
||||
void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
|
||||
void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
|
||||
|
||||
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
|
||||
void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
|
||||
void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
|
||||
void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
|
||||
void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
|
||||
void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
|
||||
void punpcklbw(XMMRegister dst, XMMRegister src);
|
||||
void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
|
||||
|
||||
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
|
||||
void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
|
||||
|
||||
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
|
||||
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
|
||||
void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
|
||||
|
@ -192,31 +192,22 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
}
|
||||
} else if(UseSSE >= 2) {
|
||||
// Save whole 128bit (16 bytes) XMM regiters
|
||||
if (VM_Version::supports_avx512novl()) {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vextractf32x4h(Address(rsp, off*wordSize), as_XMMRegister(n), 0);
|
||||
off += delta;
|
||||
}
|
||||
} else {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
|
||||
off += delta;
|
||||
}
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
|
||||
off += delta;
|
||||
}
|
||||
}
|
||||
|
||||
if (vect_words > 0) {
|
||||
assert(vect_words*wordSize == 128, "");
|
||||
__ subptr(rsp, 128); // Save upper half of YMM registes
|
||||
off = 0;
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
|
||||
__ vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
|
||||
}
|
||||
if (UseAVX > 2) {
|
||||
__ subptr(rsp, 256); // Save upper half of ZMM registes
|
||||
off = 0;
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n));
|
||||
__ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -285,31 +276,23 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
||||
off += delta;
|
||||
}
|
||||
} else if (UseSSE >= 2) {
|
||||
if (VM_Version::supports_avx512novl()) {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf32x4h(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes), 0);
|
||||
off += delta;
|
||||
}
|
||||
} else {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
|
||||
off += delta;
|
||||
}
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
|
||||
off += delta;
|
||||
}
|
||||
}
|
||||
if (restore_vectors) {
|
||||
assert(additional_frame_bytes == 128, "");
|
||||
if (UseAVX > 2) {
|
||||
off = 0;
|
||||
// Restore upper half of ZMM registers.
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32));
|
||||
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
|
||||
}
|
||||
__ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes
|
||||
}
|
||||
// Restore upper half of YMM registes.
|
||||
assert(additional_frame_bytes == 128, "");
|
||||
off = 0;
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
|
||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
|
||||
}
|
||||
__ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes
|
||||
}
|
||||
|
@ -72,45 +72,28 @@ class SimpleRuntimeFrame {
|
||||
class RegisterSaver {
|
||||
// Capture info about frame layout. Layout offsets are in jint
|
||||
// units because compiler frame slots are jints.
|
||||
#define HALF_ZMM_BANK_WORDS 128
|
||||
#define XSAVE_AREA_BEGIN 160
|
||||
#define XSAVE_AREA_YMM_BEGIN 576
|
||||
#define XSAVE_AREA_ZMM_BEGIN 1152
|
||||
#define XSAVE_AREA_UPPERBANK 1664
|
||||
#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
|
||||
#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
|
||||
#define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
|
||||
enum layout {
|
||||
fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
|
||||
xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area
|
||||
xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area
|
||||
DEF_XMM_OFFS(0),
|
||||
DEF_XMM_OFFS(1),
|
||||
DEF_XMM_OFFS(2),
|
||||
DEF_XMM_OFFS(3),
|
||||
DEF_XMM_OFFS(4),
|
||||
DEF_XMM_OFFS(5),
|
||||
DEF_XMM_OFFS(6),
|
||||
DEF_XMM_OFFS(7),
|
||||
DEF_XMM_OFFS(8),
|
||||
DEF_XMM_OFFS(9),
|
||||
DEF_XMM_OFFS(10),
|
||||
DEF_XMM_OFFS(11),
|
||||
DEF_XMM_OFFS(12),
|
||||
DEF_XMM_OFFS(13),
|
||||
DEF_XMM_OFFS(14),
|
||||
DEF_XMM_OFFS(15),
|
||||
zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
|
||||
// 2..15 are implied in range usage
|
||||
ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
|
||||
DEF_YMM_OFFS(0),
|
||||
DEF_YMM_OFFS(1),
|
||||
// 2..15 are implied in range usage
|
||||
zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
|
||||
zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,
|
||||
DEF_ZMM_OFFS(16),
|
||||
DEF_ZMM_OFFS(17),
|
||||
DEF_ZMM_OFFS(18),
|
||||
DEF_ZMM_OFFS(19),
|
||||
DEF_ZMM_OFFS(20),
|
||||
DEF_ZMM_OFFS(21),
|
||||
DEF_ZMM_OFFS(22),
|
||||
DEF_ZMM_OFFS(23),
|
||||
DEF_ZMM_OFFS(24),
|
||||
DEF_ZMM_OFFS(25),
|
||||
DEF_ZMM_OFFS(26),
|
||||
DEF_ZMM_OFFS(27),
|
||||
DEF_ZMM_OFFS(28),
|
||||
DEF_ZMM_OFFS(29),
|
||||
DEF_ZMM_OFFS(30),
|
||||
DEF_ZMM_OFFS(31),
|
||||
// 18..31 are implied in range usage
|
||||
fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
|
||||
fpu_stateH_end,
|
||||
r15_off, r15H_off,
|
||||
@ -160,8 +143,6 @@ class RegisterSaver {
|
||||
};
|
||||
|
||||
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
|
||||
int vect_words = 0;
|
||||
int ymmhi_offset = -1;
|
||||
int off = 0;
|
||||
int num_xmm_regs = XMMRegisterImpl::number_of_registers;
|
||||
if (UseAVX < 3) {
|
||||
@ -171,24 +152,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
if (save_vectors) {
|
||||
assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
|
||||
assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
|
||||
// Save upper half of YMM registers
|
||||
vect_words = 16 * num_xmm_regs / wordSize;
|
||||
if (UseAVX < 3) {
|
||||
ymmhi_offset = additional_frame_words;
|
||||
additional_frame_words += vect_words;
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
|
||||
#endif
|
||||
|
||||
// Always make the frame size 16-byte aligned
|
||||
int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
|
||||
reg_save_size*BytesPerInt, num_xmm_regs);
|
||||
// Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
|
||||
int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
|
||||
// OopMap frame size is in compiler stack slots (jint's) not bytes or words
|
||||
int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
|
||||
// The caller will allocate additional_frame_words
|
||||
int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
|
||||
// CodeBlob frame size is in words.
|
||||
int frame_size_in_words = frame_size_in_bytes / wordSize;
|
||||
*total_frame_words = frame_size_in_words;
|
||||
@ -203,12 +175,34 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
__ push_CPU_state(); // Push a multiple of 16 bytes
|
||||
|
||||
// push cpu state handles this on EVEX enabled targets
|
||||
if ((vect_words > 0) && (UseAVX < 3)) {
|
||||
assert(vect_words*wordSize >= 256, "");
|
||||
// Save upper half of YMM registes(0..num_xmm_regs)
|
||||
__ subptr(rsp, num_xmm_regs*16);
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
|
||||
if (save_vectors) {
|
||||
// Save upper half of YMM registes(0..15)
|
||||
int base_addr = XSAVE_AREA_YMM_BEGIN;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
__ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
|
||||
}
|
||||
if (VM_Version::supports_evex()) {
|
||||
// Save upper half of ZMM registes(0..15)
|
||||
base_addr = XSAVE_AREA_ZMM_BEGIN;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
__ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
|
||||
}
|
||||
// Save full ZMM registes(16..num_xmm_regs)
|
||||
base_addr = XSAVE_AREA_UPPERBANK;
|
||||
int off = 0;
|
||||
int vector_len = Assembler::AVX_512bit;
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
__ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (VM_Version::supports_evex()) {
|
||||
// Save upper bank of ZMM registers(16..31) for double/float usage
|
||||
int base_addr = XSAVE_AREA_UPPERBANK;
|
||||
int off = 0;
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
__ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (frame::arg_reg_save_area_bytes != 0) {
|
||||
@ -224,8 +218,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
OopMapSet *oop_maps = new OopMapSet();
|
||||
OopMap* map = new OopMap(frame_size_in_slots, 0);
|
||||
|
||||
#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
|
||||
#define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
|
||||
#define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
|
||||
|
||||
map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
|
||||
@ -257,31 +250,21 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
off = zmm16_off;
|
||||
delta = zmm17_off - off;
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
XMMRegister xmm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
|
||||
XMMRegister zmm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
|
||||
off += delta;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(COMPILER2) || INCLUDE_JVMCI
|
||||
if (save_vectors) {
|
||||
assert(ymmhi_offset != -1, "save area must exist");
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4));
|
||||
map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4));
|
||||
off = ymm0_off;
|
||||
int delta = ymm1_off - off;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
XMMRegister ymm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
|
||||
off += delta;
|
||||
}
|
||||
}
|
||||
#endif // COMPILER2 || INCLUDE_JVMCI
|
||||
|
||||
@ -316,8 +299,8 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
off = zmm16H_off;
|
||||
delta = zmm17H_off - off;
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
XMMRegister xmm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
|
||||
XMMRegister zmm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
|
||||
off += delta;
|
||||
}
|
||||
}
|
||||
@ -335,21 +318,48 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
||||
// Pop arg register save area
|
||||
__ addptr(rsp, frame::arg_reg_save_area_bytes);
|
||||
}
|
||||
|
||||
#if defined(COMPILER2) || INCLUDE_JVMCI
|
||||
// On EVEX enabled targets everything is handled in pop fpu state
|
||||
if ((restore_vectors) && (UseAVX < 3)) {
|
||||
assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
|
||||
assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
|
||||
int off = 0;
|
||||
// Restore upper half of YMM registes (0..num_xmm_regs)
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
|
||||
}
|
||||
__ addptr(rsp, num_xmm_regs*16);
|
||||
if (restore_vectors) {
|
||||
assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
|
||||
assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
|
||||
}
|
||||
#else
|
||||
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
|
||||
assert(!save_vectors, "vectors are generated only by C2");
|
||||
#endif
|
||||
|
||||
// On EVEX enabled targets everything is handled in pop fpu state
|
||||
if (restore_vectors) {
|
||||
// Restore upper half of YMM registes (0..15)
|
||||
int base_addr = XSAVE_AREA_YMM_BEGIN;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16));
|
||||
}
|
||||
if (VM_Version::supports_evex()) {
|
||||
// Restore upper half of ZMM registes (0..15)
|
||||
base_addr = XSAVE_AREA_ZMM_BEGIN;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
|
||||
}
|
||||
// Restore full ZMM registes(16..num_xmm_regs)
|
||||
base_addr = XSAVE_AREA_UPPERBANK;
|
||||
int vector_len = Assembler::AVX_512bit;
|
||||
int off = 0;
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
__ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (VM_Version::supports_evex()) {
|
||||
// Restore upper bank of ZMM registes(16..31) for double/float usage
|
||||
int base_addr = XSAVE_AREA_UPPERBANK;
|
||||
int off = 0;
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
__ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recover CPU state
|
||||
__ pop_CPU_state();
|
||||
// Get the rbp described implicitly by the calling convention (no oopMap)
|
||||
|
@ -273,7 +273,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (UseAVX > 2) {
|
||||
last_reg = 31;
|
||||
}
|
||||
if (VM_Version::supports_avx512novl()) {
|
||||
if (VM_Version::supports_evex()) {
|
||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||
__ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
|
||||
}
|
||||
@ -391,7 +391,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// restore regs belonging to calling function
|
||||
#ifdef _WIN64
|
||||
// emit the restores for xmm regs
|
||||
if (VM_Version::supports_avx512novl()) {
|
||||
if (VM_Version::supports_evex()) {
|
||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||
__ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
|
||||
}
|
||||
|
@ -891,7 +891,7 @@ void VM_Version::get_processor_features() {
|
||||
UseNewLongLShift = true;
|
||||
}
|
||||
if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
|
||||
if( supports_sse4a() ) {
|
||||
if (supports_sse4a()) {
|
||||
UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
|
||||
} else {
|
||||
UseXmmLoadAndClearUpper = false;
|
||||
|
@ -552,6 +552,19 @@ protected:
|
||||
break;
|
||||
}
|
||||
}
|
||||
// zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
|
||||
if (retVal == false) {
|
||||
// Verify that OS save/restore all bits of EVEX registers
|
||||
// during signal processing.
|
||||
int nreg = 2 LP64_ONLY(+2);
|
||||
retVal = true;
|
||||
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
|
||||
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
@ -706,6 +719,9 @@ public:
|
||||
static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; }
|
||||
static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
|
||||
static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
|
||||
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
|
||||
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
|
||||
static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
|
||||
// Intel features
|
||||
static bool is_intel_family_core() { return is_intel() &&
|
||||
extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -291,9 +291,7 @@ static int pre_call_resets_size() {
|
||||
size += 6; // fldcw
|
||||
}
|
||||
if (C->max_vector_size() > 16) {
|
||||
if(UseAVX <= 2) {
|
||||
size += 3; // vzeroupper
|
||||
}
|
||||
size += 3; // vzeroupper
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -1915,7 +1913,7 @@ encode %{
|
||||
if (stub == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
%}
|
||||
|
||||
|
@ -536,11 +536,7 @@ source %{
|
||||
#define __ _masm.
|
||||
|
||||
static int clear_avx_size() {
|
||||
if(UseAVX > 2) {
|
||||
return 0; // vzeroupper is ignored
|
||||
} else {
|
||||
return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper
|
||||
}
|
||||
return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper
|
||||
}
|
||||
|
||||
// !!!!! Special hack to get all types of calls to specify the byte offset
|
||||
@ -871,7 +867,7 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
|
||||
if (framesize > 0) {
|
||||
st->print("\n\t");
|
||||
st->print("addq rbp, #%d", framesize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -186,9 +186,9 @@
|
||||
"Maximum number of unrolls for main loop") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
product(bool, SuperWordLoopUnrollAnalysis, false, \
|
||||
"Map number of unrolls for main loop via " \
|
||||
"Superword Level Parallelism analysis") \
|
||||
product_pd(bool, SuperWordLoopUnrollAnalysis, \
|
||||
"Map number of unrolls for main loop via " \
|
||||
"Superword Level Parallelism analysis") \
|
||||
\
|
||||
notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \
|
||||
"Trace what Superword Level Parallelism analysis applies") \
|
||||
|
@ -269,6 +269,10 @@ public:
|
||||
// should generate this one.
|
||||
static const bool match_rule_supported(int opcode);
|
||||
|
||||
// identify extra cases that we might want to provide match rules for
|
||||
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
||||
static const bool match_rule_supported_vector(int opcode, int vlen);
|
||||
|
||||
// Some uarchs have different sized float register resources
|
||||
static const int float_pressure(int default_pressure_threshold);
|
||||
|
||||
|
@ -2247,7 +2247,10 @@ void SuperWord::output() {
|
||||
NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
|
||||
// For atomic unrolled loops which are vector mapped, instigate more unrolling.
|
||||
cl->set_notpassed_slp();
|
||||
C->set_major_progress();
|
||||
// if vector resources are limited, do not allow additional unrolling
|
||||
if (FLOATPRESSURE > 8) {
|
||||
C->set_major_progress();
|
||||
}
|
||||
cl->mark_do_unroll_only();
|
||||
}
|
||||
}
|
||||
|
@ -188,7 +188,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
|
||||
(vlen > 1) && is_power_of_2(vlen) &&
|
||||
Matcher::vector_size_supported(bt, vlen)) {
|
||||
int vopc = VectorNode::opcode(opc, bt);
|
||||
return vopc > 0 && Matcher::match_rule_supported(vopc) && (vopc != Op_CMoveD || vlen == 4);
|
||||
return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user