This commit is contained in:
Igor Veresov 2015-11-09 22:43:30 +00:00
commit 51884084c1
25 changed files with 4872 additions and 2643 deletions

View File

@ -3463,6 +3463,17 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported. return true; // Per default match rules are supported.
} }
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
// TODO
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
// Add rules here.
return ret_value; // Per default match rules are supported.
}
const int Matcher::float_pressure(int default_pressure_threshold) { const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold; return default_pressure_threshold;
} }

View File

@ -73,6 +73,7 @@ define_pd_global(bool, UseCISCSpill, true);
define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoScheduling, false);
define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, false); define_pd_global(bool, OptoRegScheduling, false);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, ReservedCodeCacheSize, 48*M);
define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);

View File

@ -61,6 +61,7 @@ define_pd_global(bool, OptoPeephole, false);
define_pd_global(bool, UseCISCSpill, false); define_pd_global(bool, UseCISCSpill, false);
define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, false); define_pd_global(bool, OptoRegScheduling, false);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
// GL: // GL:
// Detected a problem with unscaled compressed oops and // Detected a problem with unscaled compressed oops and
// narrow_oop_use_complex_address() == false. // narrow_oop_use_complex_address() == false.

View File

@ -2064,6 +2064,17 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported. return true; // Per default match rules are supported.
} }
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
// TODO
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
// Add rules here.
return ret_value; // Per default match rules are supported.
}
const int Matcher::float_pressure(int default_pressure_threshold) { const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold; return default_pressure_threshold;
} }

View File

@ -65,6 +65,7 @@ define_pd_global(bool, UseCISCSpill, false);
define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoScheduling, true); define_pd_global(bool, OptoScheduling, true);
define_pd_global(bool, OptoRegScheduling, false); define_pd_global(bool, OptoRegScheduling, false);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
#ifdef _LP64 #ifdef _LP64
// We need to make sure that all generated code is within // We need to make sure that all generated code is within

View File

@ -1860,6 +1860,17 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported. return true; // Per default match rules are supported.
} }
const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
// TODO
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
// Add rules here.
return ret_value; // Per default match rules are supported.
}
const int Matcher::float_pressure(int default_pressure_threshold) { const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold; return default_pressure_threshold;
} }

File diff suppressed because it is too large Load Diff

View File

@ -438,6 +438,8 @@ class ArrayAddress VALUE_OBJ_CLASS_SPEC {
}; };
class InstructionAttr;
// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes // 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
// See fxsave and xsave(EVEX enabled) documentation for layout // See fxsave and xsave(EVEX enabled) documentation for layout
const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize); const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
@ -568,7 +570,8 @@ class Assembler : public AbstractAssembler {
EVEX_8bit = 0, EVEX_8bit = 0,
EVEX_16bit = 1, EVEX_16bit = 1,
EVEX_32bit = 2, EVEX_32bit = 2,
EVEX_64bit = 3 EVEX_64bit = 3,
EVEX_NObit = 4
}; };
enum WhichOperand { enum WhichOperand {
@ -598,16 +601,12 @@ class Assembler : public AbstractAssembler {
private: private:
int _evex_encoding;
int _input_size_in_bits;
int _avx_vector_len;
int _tuple_type;
bool _is_evex_instruction;
bool _legacy_mode_bw; bool _legacy_mode_bw;
bool _legacy_mode_dq; bool _legacy_mode_dq;
bool _legacy_mode_vl; bool _legacy_mode_vl;
bool _legacy_mode_vlbw; bool _legacy_mode_vlbw;
bool _instruction_uses_vl;
class InstructionAttr *_attributes;
// 64bit prefixes // 64bit prefixes
int prefix_and_encode(int reg_enc, bool byteinst = false); int prefix_and_encode(int reg_enc, bool byteinst = false);
@ -637,181 +636,30 @@ private:
int rex_prefix_and_encode(int dst_enc, int src_enc, int rex_prefix_and_encode(int dst_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc, bool rex_w); VexSimdPrefix pre, VexOpcode opc, bool rex_w);
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
int vector_len);
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v, void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
int nds_enc, VexSimdPrefix pre, VexOpcode opc, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
bool is_extended_context, bool is_merge_context,
int vector_len, bool no_mask_reg );
void vex_prefix(Address adr, int nds_enc, int xreg_enc, void vex_prefix(Address adr, int nds_enc, int xreg_enc,
VexSimdPrefix pre, VexOpcode opc, VexSimdPrefix pre, VexOpcode opc,
bool vex_w, int vector_len, InstructionAttr *attributes);
bool legacy_mode = false, bool no_mask_reg = false);
void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre, int vector_len = AVX_128bit,
bool no_mask_reg = false, bool legacy_mode = false) {
int dst_enc = dst->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
}
void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre, int vector_len = AVX_128bit,
bool no_mask_reg = false) {
int dst_enc = dst->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
}
void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
bool vex_w = false;
int vector_len = AVX_128bit;
vex_prefix(src, nds->encoding(), dst->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
vector_len, no_mask_reg);
}
void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
bool vex_w = false;
int vector_len = AVX_128bit;
vex_prefix(src, nds->encoding(), dst->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
vector_len, true, no_mask_reg);
}
void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
bool vex_w = true;
int vector_len = AVX_128bit;
vex_prefix(src, nds->encoding(), dst->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
vector_len, no_mask_reg);
}
void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
bool vex_w = true;
int vector_len = AVX_128bit;
vex_prefix(src, nds->encoding(), dst->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
vector_len, true, no_mask_reg);
}
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc, VexSimdPrefix pre, VexOpcode opc,
bool vex_w, int vector_len, InstructionAttr *attributes);
bool legacy_mode, bool no_mask_reg);
int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) { void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
bool vex_w = false; VexOpcode opc, InstructionAttr *attributes);
int vector_len = AVX_128bit;
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
false, no_mask_reg);
}
int vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) { int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
bool vex_w = false; VexOpcode opc, InstructionAttr *attributes);
int vector_len = AVX_128bit;
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
true, no_mask_reg);
}
int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) { int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
bool vex_w = true; VexOpcode opc, InstructionAttr *attributes);
int vector_len = AVX_128bit;
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
false, no_mask_reg);
}
int vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) { int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
bool vex_w = true; VexOpcode opc, InstructionAttr *attributes);
int vector_len = AVX_128bit;
return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
true, no_mask_reg);
}
int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, int vector_len = AVX_128bit,
VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
bool no_mask_reg = false) {
int src_enc = src->encoding();
int dst_enc = dst->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
}
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
}
void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
simd_prefix(src, dst, pre, no_mask_reg);
}
void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre, bool no_mask_reg = false) {
bool rex_w = true;
simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, bool no_mask_reg,
VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, int vector_len = AVX_128bit,
bool legacy_mode = false);
int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
VexSimdPrefix pre, bool no_mask_reg,
VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, int vector_len = AVX_128bit);
int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
VexSimdPrefix pre, bool no_mask_reg,
VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, int vector_len = AVX_128bit);
// Move/convert 32-bit integer value.
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre, bool no_mask_reg) {
// It is OK to cast from Register to XMMRegister to pass argument here
// since only encoding is used in simd_prefix_and_encode() and number of
// Gen and Xmm registers are the same.
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
}
int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
}
int simd_prefix_and_encode(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool no_mask_reg = false) {
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
}
// Move/convert 64-bit integer value.
int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre, bool no_mask_reg = false) {
bool rex_w = true;
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
}
int simd_prefix_and_encode_q(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool no_mask_reg = false) {
bool rex_w = true;
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
}
// Helper functions for groups of instructions // Helper functions for groups of instructions
void emit_arith_b(int op1, int op2, Register dst, int imm8); void emit_arith_b(int op1, int op2, Register dst, int imm8);
@ -821,27 +669,6 @@ private:
void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
void emit_arith(int op1, int op2, Register dst, Register src); void emit_arith(int op1, int op2, Register dst, Register src);
void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
Address src, VexSimdPrefix pre, int vector_len,
bool no_mask_reg = false, bool legacy_mode = false);
void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
Address src, VexSimdPrefix pre, int vector_len,
bool no_mask_reg = false);
void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
XMMRegister src, VexSimdPrefix pre, int vector_len,
bool no_mask_reg = false, bool legacy_mode = false);
void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
XMMRegister src, VexSimdPrefix pre, int vector_len,
bool no_mask_reg = false);
bool emit_compressed_disp_byte(int &disp); bool emit_compressed_disp_byte(int &disp);
void emit_operand(Register reg, void emit_operand(Register reg,
@ -986,18 +813,16 @@ private:
// belong in macro assembler but there is no need for both varieties to exist // belong in macro assembler but there is no need for both varieties to exist
void init_attributes(void) { void init_attributes(void) {
_evex_encoding = 0;
_input_size_in_bits = 0;
_avx_vector_len = AVX_NoVec;
_tuple_type = EVEX_ETUP;
_is_evex_instruction = false;
_legacy_mode_bw = (VM_Version::supports_avx512bw() == false); _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
_legacy_mode_dq = (VM_Version::supports_avx512dq() == false); _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
_legacy_mode_vl = (VM_Version::supports_avx512vl() == false); _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
_legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false); _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
_instruction_uses_vl = false; _attributes = NULL;
} }
void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
void clear_attributes(void) { _attributes = NULL; }
void lea(Register dst, Address src); void lea(Register dst, Address src);
void mov(Register dst, Register src); void mov(Register dst, Register src);
@ -2106,12 +1931,12 @@ private:
void vextracti128h(Address dst, XMMRegister src); void vextracti128h(Address dst, XMMRegister src);
// Copy low 256bit into high 256bit of ZMM registers. // Copy low 256bit into high 256bit of ZMM registers.
void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src); void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src); void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
void vextracti64x4h(XMMRegister dst, XMMRegister src); void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
void vextractf64x4h(XMMRegister dst, XMMRegister src); void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
void vextractf64x4h(Address dst, XMMRegister src); void vextractf64x4h(Address dst, XMMRegister src, int value);
void vinsertf64x4h(XMMRegister dst, Address src); void vinsertf64x4h(XMMRegister dst, Address src, int value);
// Copy targeted 128bit segments of the ZMM registers // Copy targeted 128bit segments of the ZMM registers
void vextracti64x2h(XMMRegister dst, XMMRegister src, int value); void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
@ -2173,4 +1998,95 @@ private:
}; };
// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
// are applied.
class InstructionAttr {
public:
InstructionAttr(
int vector_len,
bool rex_vex_w,
bool legacy_mode,
bool no_reg_mask,
bool uses_vl)
:
_avx_vector_len(vector_len),
_rex_vex_w(rex_vex_w),
_legacy_mode(legacy_mode),
_no_reg_mask(no_reg_mask),
_uses_vl(uses_vl),
_tuple_type(Assembler::EVEX_ETUP),
_input_size_in_bits(Assembler::EVEX_NObit),
_is_evex_instruction(false),
_evex_encoding(0),
_is_clear_context(false),
_is_extended_context(false),
_current_assembler(NULL) {
if (UseAVX < 3) _legacy_mode = true;
}
~InstructionAttr() {
if (_current_assembler != NULL) {
_current_assembler->clear_attributes();
}
_current_assembler = NULL;
}
private:
int _avx_vector_len;
bool _rex_vex_w;
bool _legacy_mode;
bool _no_reg_mask;
bool _uses_vl;
int _tuple_type;
int _input_size_in_bits;
bool _is_evex_instruction;
int _evex_encoding;
bool _is_clear_context;
bool _is_extended_context;
Assembler *_current_assembler;
public:
// query functions for field accessors
int get_vector_len(void) const { return _avx_vector_len; }
bool is_rex_vex_w(void) const { return _rex_vex_w; }
bool is_legacy_mode(void) const { return _legacy_mode; }
bool is_no_reg_mask(void) const { return _no_reg_mask; }
bool uses_vl(void) const { return _uses_vl; }
int get_tuple_type(void) const { return _tuple_type; }
int get_input_size(void) const { return _input_size_in_bits; }
int is_evex_instruction(void) const { return _is_evex_instruction; }
int get_evex_encoding(void) const { return _evex_encoding; }
bool is_clear_context(void) const { return _is_clear_context; }
bool is_extended_context(void) const { return _is_extended_context; }
// Set the vector len manually
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
// Set the instruction to be encoded in AVX mode
void set_is_legacy_mode(void) { _legacy_mode = true; }
// Set the current instuction to be encoded as an EVEX instuction
void set_is_evex_instruction(void) { _is_evex_instruction = true; }
// Internal encoding data used in compressed immediate offset programming
void set_evex_encoding(int value) { _evex_encoding = value; }
// Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
void set_is_clear_context(void) { _is_clear_context = true; }
// Map back to current asembler so that we can manage object level assocation
void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
// Address modifiers used for compressed displacement calculation
void set_address_attributes(int tuple_type, int input_size_in_bits) {
if (VM_Version::supports_evex()) {
_tuple_type = tuple_type;
_input_size_in_bits = input_size_in_bits;
}
}
};
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP #endif // CPU_X86_VM_ASSEMBLER_X86_HPP

View File

@ -3714,7 +3714,7 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) { if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
__ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg()); __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
} }
if (UseAVX > 1) { if (UseAVX > 0) {
__ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(), __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
ExternalAddress((address)float_signflip_pool)); ExternalAddress((address)float_signflip_pool));
} else { } else {
@ -3725,7 +3725,7 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) { if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
__ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg()); __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
} }
if (UseAVX > 1) { if (UseAVX > 0) {
__ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(), __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
ExternalAddress((address)double_signflip_pool)); ExternalAddress((address)double_signflip_pool));
} else { } else {

View File

@ -84,6 +84,7 @@ define_pd_global(bool, UseCISCSpill, true);
define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoScheduling, false);
define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, true); define_pd_global(bool, OptoRegScheduling, true);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, true);
define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, ReservedCodeCacheSize, 48*M);
define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);

View File

@ -58,6 +58,4 @@ void Compile::pd_compiler2_init() {
OptoReg::invalidate(i); OptoReg::invalidate(i);
} }
} }
SuperWordLoopUnrollAnalysis = true;
} }

File diff suppressed because it is too large Load Diff

View File

@ -962,10 +962,15 @@ public:
void divss(XMMRegister dst, AddressLiteral src); void divss(XMMRegister dst, AddressLiteral src);
// Move Unaligned Double Quadword // Move Unaligned Double Quadword
void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); } void movdqu(Address dst, XMMRegister src);
void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); } void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); } void movdqu(XMMRegister dst, XMMRegister src);
void movdqu(XMMRegister dst, AddressLiteral src); void movdqu(XMMRegister dst, AddressLiteral src);
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(XMMRegister dst, AddressLiteral src);
// Move Aligned Double Quadword // Move Aligned Double Quadword
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
@ -1024,12 +1029,12 @@ public:
void ucomisd(XMMRegister dst, AddressLiteral src); void ucomisd(XMMRegister dst, AddressLiteral src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); } void xorpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
void xorpd(XMMRegister dst, AddressLiteral src); void xorpd(XMMRegister dst, AddressLiteral src);
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, AddressLiteral src); void xorps(XMMRegister dst, AddressLiteral src);
@ -1047,6 +1052,39 @@ public:
void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);

View File

@ -192,31 +192,22 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
} }
} else if(UseSSE >= 2) { } else if(UseSSE >= 2) {
// Save whole 128bit (16 bytes) XMM regiters // Save whole 128bit (16 bytes) XMM regiters
if (VM_Version::supports_avx512novl()) {
for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf32x4h(Address(rsp, off*wordSize), as_XMMRegister(n), 0);
off += delta;
}
} else {
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
off += delta; off += delta;
} }
} }
}
if (vect_words > 0) { if (vect_words > 0) {
assert(vect_words*wordSize == 128, ""); assert(vect_words*wordSize == 128, "");
__ subptr(rsp, 128); // Save upper half of YMM registes __ subptr(rsp, 128); // Save upper half of YMM registes
off = 0;
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
} }
if (UseAVX > 2) { if (UseAVX > 2) {
__ subptr(rsp, 256); // Save upper half of ZMM registes __ subptr(rsp, 256); // Save upper half of ZMM registes
off = 0;
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n)); __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
} }
} }
} }
@ -285,31 +276,23 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
off += delta; off += delta;
} }
} else if (UseSSE >= 2) { } else if (UseSSE >= 2) {
if (VM_Version::supports_avx512novl()) {
for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf32x4h(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes), 0);
off += delta;
}
} else {
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
off += delta; off += delta;
} }
} }
}
if (restore_vectors) { if (restore_vectors) {
assert(additional_frame_bytes == 128, "");
if (UseAVX > 2) { if (UseAVX > 2) {
off = 0; // Restore upper half of ZMM registers.
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32)); __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
} }
__ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes
} }
// Restore upper half of YMM registes. // Restore upper half of YMM registes.
assert(additional_frame_bytes == 128, "");
off = 0;
for (int n = 0; n < num_xmm_regs; n++) { for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16)); __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
} }
__ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes
} }

View File

@ -72,45 +72,28 @@ class SimpleRuntimeFrame {
class RegisterSaver { class RegisterSaver {
// Capture info about frame layout. Layout offsets are in jint // Capture info about frame layout. Layout offsets are in jint
// units because compiler frame slots are jints. // units because compiler frame slots are jints.
#define HALF_ZMM_BANK_WORDS 128 #define XSAVE_AREA_BEGIN 160
#define XSAVE_AREA_YMM_BEGIN 576
#define XSAVE_AREA_ZMM_BEGIN 1152
#define XSAVE_AREA_UPPERBANK 1664
#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
#define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
enum layout { enum layout {
fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area
DEF_XMM_OFFS(0), DEF_XMM_OFFS(0),
DEF_XMM_OFFS(1), DEF_XMM_OFFS(1),
DEF_XMM_OFFS(2), // 2..15 are implied in range usage
DEF_XMM_OFFS(3), ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
DEF_XMM_OFFS(4), DEF_YMM_OFFS(0),
DEF_XMM_OFFS(5), DEF_YMM_OFFS(1),
DEF_XMM_OFFS(6), // 2..15 are implied in range usage
DEF_XMM_OFFS(7), zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
DEF_XMM_OFFS(8), zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,
DEF_XMM_OFFS(9),
DEF_XMM_OFFS(10),
DEF_XMM_OFFS(11),
DEF_XMM_OFFS(12),
DEF_XMM_OFFS(13),
DEF_XMM_OFFS(14),
DEF_XMM_OFFS(15),
zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
DEF_ZMM_OFFS(16), DEF_ZMM_OFFS(16),
DEF_ZMM_OFFS(17), DEF_ZMM_OFFS(17),
DEF_ZMM_OFFS(18), // 18..31 are implied in range usage
DEF_ZMM_OFFS(19),
DEF_ZMM_OFFS(20),
DEF_ZMM_OFFS(21),
DEF_ZMM_OFFS(22),
DEF_ZMM_OFFS(23),
DEF_ZMM_OFFS(24),
DEF_ZMM_OFFS(25),
DEF_ZMM_OFFS(26),
DEF_ZMM_OFFS(27),
DEF_ZMM_OFFS(28),
DEF_ZMM_OFFS(29),
DEF_ZMM_OFFS(30),
DEF_ZMM_OFFS(31),
fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
fpu_stateH_end, fpu_stateH_end,
r15_off, r15H_off, r15_off, r15H_off,
@ -160,8 +143,6 @@ class RegisterSaver {
}; };
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
int vect_words = 0;
int ymmhi_offset = -1;
int off = 0; int off = 0;
int num_xmm_regs = XMMRegisterImpl::number_of_registers; int num_xmm_regs = XMMRegisterImpl::number_of_registers;
if (UseAVX < 3) { if (UseAVX < 3) {
@ -171,24 +152,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
if (save_vectors) { if (save_vectors) {
assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
// Save upper half of YMM registers
vect_words = 16 * num_xmm_regs / wordSize;
if (UseAVX < 3) {
ymmhi_offset = additional_frame_words;
additional_frame_words += vect_words;
}
} }
#else #else
assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
#endif #endif
// Always make the frame size 16-byte aligned // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
int frame_size_in_bytes = round_to(additional_frame_words*wordSize + int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
reg_save_size*BytesPerInt, num_xmm_regs);
// OopMap frame size is in compiler stack slots (jint's) not bytes or words // OopMap frame size is in compiler stack slots (jint's) not bytes or words
int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
// The caller will allocate additional_frame_words
int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
// CodeBlob frame size is in words. // CodeBlob frame size is in words.
int frame_size_in_words = frame_size_in_bytes / wordSize; int frame_size_in_words = frame_size_in_bytes / wordSize;
*total_frame_words = frame_size_in_words; *total_frame_words = frame_size_in_words;
@ -203,12 +175,34 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
__ push_CPU_state(); // Push a multiple of 16 bytes __ push_CPU_state(); // Push a multiple of 16 bytes
// push cpu state handles this on EVEX enabled targets // push cpu state handles this on EVEX enabled targets
if ((vect_words > 0) && (UseAVX < 3)) { if (save_vectors) {
assert(vect_words*wordSize >= 256, ""); // Save upper half of YMM registes(0..15)
// Save upper half of YMM registes(0..num_xmm_regs) int base_addr = XSAVE_AREA_YMM_BEGIN;
__ subptr(rsp, num_xmm_regs*16); for (int n = 0; n < 16; n++) {
for (int n = 0; n < num_xmm_regs; n++) { __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
__ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); }
if (VM_Version::supports_evex()) {
// Save upper half of ZMM registes(0..15)
base_addr = XSAVE_AREA_ZMM_BEGIN;
for (int n = 0; n < 16; n++) {
__ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
}
// Save full ZMM registes(16..num_xmm_regs)
base_addr = XSAVE_AREA_UPPERBANK;
int off = 0;
int vector_len = Assembler::AVX_512bit;
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
}
}
} else {
if (VM_Version::supports_evex()) {
// Save upper bank of ZMM registers(16..31) for double/float usage
int base_addr = XSAVE_AREA_UPPERBANK;
int off = 0;
for (int n = 16; n < num_xmm_regs; n++) {
__ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
}
} }
} }
if (frame::arg_reg_save_area_bytes != 0) { if (frame::arg_reg_save_area_bytes != 0) {
@ -224,8 +218,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
OopMapSet *oop_maps = new OopMapSet(); OopMapSet *oop_maps = new OopMapSet();
OopMap* map = new OopMap(frame_size_in_slots, 0); OopMap* map = new OopMap(frame_size_in_slots, 0);
#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
#define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
@ -257,31 +250,21 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
off = zmm16_off; off = zmm16_off;
delta = zmm17_off - off; delta = zmm17_off - off;
for (int n = 16; n < num_xmm_regs; n++) { for (int n = 16; n < num_xmm_regs; n++) {
XMMRegister xmm_name = as_XMMRegister(n); XMMRegister zmm_name = as_XMMRegister(n);
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
off += delta; off += delta;
} }
} }
#if defined(COMPILER2) || INCLUDE_JVMCI #if defined(COMPILER2) || INCLUDE_JVMCI
if (save_vectors) { if (save_vectors) {
assert(ymmhi_offset != -1, "save area must exist"); off = ymm0_off;
map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next(4)); int delta = ymm1_off - off;
map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4)); for (int n = 0; n < 16; n++) {
map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4)); XMMRegister ymm_name = as_XMMRegister(n);
map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4)); map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4)); off += delta;
map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4)); }
map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4));
map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4));
} }
#endif // COMPILER2 || INCLUDE_JVMCI #endif // COMPILER2 || INCLUDE_JVMCI
@ -316,8 +299,8 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
off = zmm16H_off; off = zmm16H_off;
delta = zmm17H_off - off; delta = zmm17H_off - off;
for (int n = 16; n < num_xmm_regs; n++) { for (int n = 16; n < num_xmm_regs; n++) {
XMMRegister xmm_name = as_XMMRegister(n); XMMRegister zmm_name = as_XMMRegister(n);
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
off += delta; off += delta;
} }
} }
@ -335,21 +318,48 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
// Pop arg register save area // Pop arg register save area
__ addptr(rsp, frame::arg_reg_save_area_bytes); __ addptr(rsp, frame::arg_reg_save_area_bytes);
} }
#if defined(COMPILER2) || INCLUDE_JVMCI #if defined(COMPILER2) || INCLUDE_JVMCI
// On EVEX enabled targets everything is handled in pop fpu state if (restore_vectors) {
if ((restore_vectors) && (UseAVX < 3)) { assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX"); assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
int off = 0;
// Restore upper half of YMM registes (0..num_xmm_regs)
for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
}
__ addptr(rsp, num_xmm_regs*16);
} }
#else #else
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); assert(!save_vectors, "vectors are generated only by C2");
#endif #endif
// On EVEX enabled targets everything is handled in pop fpu state
if (restore_vectors) {
// Restore upper half of YMM registes (0..15)
int base_addr = XSAVE_AREA_YMM_BEGIN;
for (int n = 0; n < 16; n++) {
__ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16));
}
if (VM_Version::supports_evex()) {
// Restore upper half of ZMM registes (0..15)
base_addr = XSAVE_AREA_ZMM_BEGIN;
for (int n = 0; n < 16; n++) {
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
}
// Restore full ZMM registes(16..num_xmm_regs)
base_addr = XSAVE_AREA_UPPERBANK;
int vector_len = Assembler::AVX_512bit;
int off = 0;
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
}
}
} else {
if (VM_Version::supports_evex()) {
// Restore upper bank of ZMM registes(16..31) for double/float usage
int base_addr = XSAVE_AREA_UPPERBANK;
int off = 0;
for (int n = 16; n < num_xmm_regs; n++) {
__ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
}
}
}
// Recover CPU state // Recover CPU state
__ pop_CPU_state(); __ pop_CPU_state();
// Get the rbp described implicitly by the calling convention (no oopMap) // Get the rbp described implicitly by the calling convention (no oopMap)

View File

@ -273,7 +273,7 @@ class StubGenerator: public StubCodeGenerator {
if (UseAVX > 2) { if (UseAVX > 2) {
last_reg = 31; last_reg = 31;
} }
if (VM_Version::supports_avx512novl()) { if (VM_Version::supports_evex()) {
for (int i = xmm_save_first; i <= last_reg; i++) { for (int i = xmm_save_first; i <= last_reg; i++) {
__ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0); __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
} }
@ -391,7 +391,7 @@ class StubGenerator: public StubCodeGenerator {
// restore regs belonging to calling function // restore regs belonging to calling function
#ifdef _WIN64 #ifdef _WIN64
// emit the restores for xmm regs // emit the restores for xmm regs
if (VM_Version::supports_avx512novl()) { if (VM_Version::supports_evex()) {
for (int i = xmm_save_first; i <= last_reg; i++) { for (int i = xmm_save_first; i <= last_reg; i++) {
__ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0); __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
} }

View File

@ -552,6 +552,19 @@ protected:
break; break;
} }
} }
// zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
if (retVal == false) {
// Verify that OS save/restore all bits of EVEX registers
// during signal processing.
int nreg = 2 LP64_ONLY(+2);
retVal = true;
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
retVal = false;
break;
}
}
}
} }
return retVal; return retVal;
} }
@ -706,6 +719,9 @@ public:
static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; } static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; }
static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); } static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); } static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
// Intel features // Intel features
static bool is_intel_family_core() { return is_intel() && static bool is_intel_family_core() { return is_intel() &&
extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }

File diff suppressed because it is too large Load Diff

View File

@ -291,10 +291,8 @@ static int pre_call_resets_size() {
size += 6; // fldcw size += 6; // fldcw
} }
if (C->max_vector_size() > 16) { if (C->max_vector_size() > 16) {
if(UseAVX <= 2) {
size += 3; // vzeroupper size += 3; // vzeroupper
} }
}
return size; return size;
} }

View File

@ -536,12 +536,8 @@ source %{
#define __ _masm. #define __ _masm.
static int clear_avx_size() { static int clear_avx_size() {
if(UseAVX > 2) {
return 0; // vzeroupper is ignored
} else {
return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper
} }
}
// !!!!! Special hack to get all types of calls to specify the byte offset // !!!!! Special hack to get all types of calls to specify the byte offset
// from the start of the call to the point where the return address // from the start of the call to the point where the return address

View File

@ -186,7 +186,7 @@
"Maximum number of unrolls for main loop") \ "Maximum number of unrolls for main loop") \
range(0, max_jint) \ range(0, max_jint) \
\ \
product(bool, SuperWordLoopUnrollAnalysis, false, \ product_pd(bool, SuperWordLoopUnrollAnalysis, \
"Map number of unrolls for main loop via " \ "Map number of unrolls for main loop via " \
"Superword Level Parallelism analysis") \ "Superword Level Parallelism analysis") \
\ \

View File

@ -269,6 +269,10 @@ public:
// should generate this one. // should generate this one.
static const bool match_rule_supported(int opcode); static const bool match_rule_supported(int opcode);
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
static const bool match_rule_supported_vector(int opcode, int vlen);
// Some uarchs have different sized float register resources // Some uarchs have different sized float register resources
static const int float_pressure(int default_pressure_threshold); static const int float_pressure(int default_pressure_threshold);

View File

@ -2247,7 +2247,10 @@ void SuperWord::output() {
NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte)); NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
// For atomic unrolled loops which are vector mapped, instigate more unrolling. // For atomic unrolled loops which are vector mapped, instigate more unrolling.
cl->set_notpassed_slp(); cl->set_notpassed_slp();
// if vector resources are limited, do not allow additional unrolling
if (FLOATPRESSURE > 8) {
C->set_major_progress(); C->set_major_progress();
}
cl->mark_do_unroll_only(); cl->mark_do_unroll_only();
} }
} }

View File

@ -188,7 +188,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
(vlen > 1) && is_power_of_2(vlen) && (vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) { Matcher::vector_size_supported(bt, vlen)) {
int vopc = VectorNode::opcode(opc, bt); int vopc = VectorNode::opcode(opc, bt);
return vopc > 0 && Matcher::match_rule_supported(vopc) && (vopc != Op_CMoveD || vlen == 4); return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
} }
return false; return false;
} }