8329032: C2 compiler register allocation support for APX EGPRs
Reviewed-by: kvn, sviswanathan
This commit is contained in:
parent
4b4a483b6f
commit
e5de26ddf0
@ -842,7 +842,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
||||
|
||||
case REX2:
|
||||
NOT_LP64(assert(false, "64bit prefixes"));
|
||||
if ((0xFF & *ip++) & REXBIT_W) {
|
||||
if ((0xFF & *ip++) & REX2BIT_W) {
|
||||
is_64bit = true;
|
||||
}
|
||||
goto again_after_prefix;
|
||||
@ -899,7 +899,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
||||
|
||||
case REX2:
|
||||
NOT_LP64(assert(false, "64bit prefix found"));
|
||||
if ((0xFF & *ip++) & REXBIT_W) {
|
||||
if ((0xFF & *ip++) & REX2BIT_W) {
|
||||
is_64bit = true;
|
||||
}
|
||||
goto again_after_size_prefix2;
|
||||
@ -4498,7 +4498,7 @@ void Assembler::ud2() {
|
||||
|
||||
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
|
||||
assert(VM_Version::supports_sse4_2(), "");
|
||||
assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs");
|
||||
assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs as BASE or INDEX of address operand");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
@ -5893,6 +5893,71 @@ void Assembler::evpunpckhqdq(XMMRegister dst, KRegister mask, XMMRegister src1,
|
||||
emit_int16(0x6D, (0xC0 | encode));
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void Assembler::push2(Register src1, Register src2, bool with_ppx) {
|
||||
assert(VM_Version::supports_apx_f(), "requires APX");
|
||||
InstructionAttr attributes(0, /* rex_w */ with_ppx, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
/* EVEX.BASE */
|
||||
int src_enc = src1->encoding();
|
||||
/* EVEX.VVVV */
|
||||
int nds_enc = src2->encoding();
|
||||
|
||||
bool vex_b = (src_enc & 8) == 8;
|
||||
bool evex_v = (nds_enc >= 16);
|
||||
bool evex_b = (src_enc >= 16);
|
||||
|
||||
// EVEX.ND = 1;
|
||||
attributes.set_extended_context();
|
||||
attributes.set_is_evex_instruction();
|
||||
set_attributes(&attributes);
|
||||
|
||||
evex_prefix(0, vex_b, 0, 0, evex_b, evex_v, false /*eevex_x*/, nds_enc, VEX_SIMD_NONE, /* map4 */ VEX_OPCODE_0F_3C);
|
||||
emit_int16(0xFF, (0xC0 | (0x6 << 3) | (src_enc & 7)));
|
||||
}
|
||||
|
||||
void Assembler::pop2(Register src1, Register src2, bool with_ppx) {
|
||||
assert(VM_Version::supports_apx_f(), "requires APX");
|
||||
InstructionAttr attributes(0, /* rex_w */ with_ppx, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
/* EVEX.BASE */
|
||||
int src_enc = src1->encoding();
|
||||
/* EVEX.VVVV */
|
||||
int nds_enc = src2->encoding();
|
||||
|
||||
bool vex_b = (src_enc & 8) == 8;
|
||||
bool evex_v = (nds_enc >= 16);
|
||||
bool evex_b = (src_enc >= 16);
|
||||
|
||||
// EVEX.ND = 1;
|
||||
attributes.set_extended_context();
|
||||
attributes.set_is_evex_instruction();
|
||||
set_attributes(&attributes);
|
||||
|
||||
evex_prefix(0, vex_b, 0, 0, evex_b, evex_v, false /*eevex_x*/, nds_enc, VEX_SIMD_NONE, /* map4 */ VEX_OPCODE_0F_3C);
|
||||
emit_int16(0x8F, (0xC0 | (src_enc & 7)));
|
||||
}
|
||||
|
||||
void Assembler::push2p(Register src1, Register src2) {
|
||||
push2(src1, src2, true);
|
||||
}
|
||||
|
||||
void Assembler::pop2p(Register src1, Register src2) {
|
||||
pop2(src1, src2, true);
|
||||
}
|
||||
|
||||
void Assembler::pushp(Register src) {
|
||||
assert(VM_Version::supports_apx_f(), "requires APX");
|
||||
int encode = prefixq_and_encode_rex2(src->encoding());
|
||||
emit_int8(0x50 | encode);
|
||||
}
|
||||
|
||||
void Assembler::popp(Register dst) {
|
||||
assert(VM_Version::supports_apx_f(), "requires APX");
|
||||
int encode = prefixq_and_encode_rex2(dst->encoding());
|
||||
emit_int8((unsigned char)0x58 | encode);
|
||||
}
|
||||
#endif //_LP64
|
||||
|
||||
|
||||
void Assembler::push(int32_t imm32) {
|
||||
// in 64bits we push 64bits onto the stack but only
|
||||
// take a 32bit immediate
|
||||
@ -7207,6 +7272,7 @@ void Assembler::vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int ve
|
||||
|
||||
void Assembler::vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs as BASE or INDEX of address operand");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
@ -11011,6 +11077,7 @@ void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
|
||||
|
||||
void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(!needs_eevex(src.base()), "does not support extended gprs as BASE of address operand");
|
||||
assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(src.isxmmindex(),"expected to be xmm index");
|
||||
@ -11024,6 +11091,7 @@ void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int v
|
||||
|
||||
void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(!needs_eevex(src.base()), "does not support extended gprs as BASE of address operand");
|
||||
assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(src.isxmmindex(),"expected to be xmm index");
|
||||
@ -11037,6 +11105,7 @@ void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int v
|
||||
|
||||
void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(!needs_eevex(src.base()), "does not support extended gprs as BASE of address operand");
|
||||
assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(src.isxmmindex(),"expected to be xmm index");
|
||||
@ -11050,6 +11119,7 @@ void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int v
|
||||
|
||||
void Assembler::vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(!needs_eevex(src.base()), "does not support extended gprs as BASE of address operand");
|
||||
assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(src.isxmmindex(),"expected to be xmm index");
|
||||
@ -11808,7 +11878,6 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
|
||||
_attributes->get_embedded_opmask_register_specifier() != 0) {
|
||||
byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
|
||||
}
|
||||
|
||||
emit_int32(EVEX_4bytes, byte2, byte3, byte4);
|
||||
}
|
||||
|
||||
@ -12921,14 +12990,14 @@ void Assembler::emit_data64(jlong data,
|
||||
int Assembler::get_base_prefix_bits(int enc) {
|
||||
int bits = 0;
|
||||
if (enc & 16) bits |= REX2BIT_B4;
|
||||
if (enc & 8) bits |= REXBIT_B;
|
||||
if (enc & 8) bits |= REX2BIT_B;
|
||||
return bits;
|
||||
}
|
||||
|
||||
int Assembler::get_index_prefix_bits(int enc) {
|
||||
int bits = 0;
|
||||
if (enc & 16) bits |= REX2BIT_X4;
|
||||
if (enc & 8) bits |= REXBIT_X;
|
||||
if (enc & 8) bits |= REX2BIT_X;
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ -12943,7 +13012,7 @@ int Assembler::get_index_prefix_bits(Register index) {
|
||||
int Assembler::get_reg_prefix_bits(int enc) {
|
||||
int bits = 0;
|
||||
if (enc & 16) bits |= REX2BIT_R4;
|
||||
if (enc & 8) bits |= REXBIT_R;
|
||||
if (enc & 8) bits |= REX2BIT_R;
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ -13181,6 +13250,15 @@ bool Assembler::prefix_is_rex2(int prefix) {
|
||||
return (prefix & 0xFF00) == WREX2;
|
||||
}
|
||||
|
||||
int Assembler::get_prefixq_rex2(Address adr, bool is_map1) {
|
||||
assert(UseAPX, "APX features not enabled");
|
||||
int bits = REX2BIT_W;
|
||||
if (is_map1) bits |= REX2BIT_M0;
|
||||
bits |= get_base_prefix_bits(adr.base());
|
||||
bits |= get_index_prefix_bits(adr.index());
|
||||
return WREX2 | bits;
|
||||
}
|
||||
|
||||
int Assembler::get_prefixq(Address adr, bool is_map1) {
|
||||
if (adr.base_needs_rex2() || adr.index_needs_rex2()) {
|
||||
return get_prefixq_rex2(adr, is_map1);
|
||||
@ -13190,15 +13268,6 @@ int Assembler::get_prefixq(Address adr, bool is_map1) {
|
||||
return is_map1 ? (((int16_t)prfx) << 8) | 0x0F : (int16_t)prfx;
|
||||
}
|
||||
|
||||
int Assembler::get_prefixq_rex2(Address adr, bool is_map1) {
|
||||
assert(UseAPX, "APX features not enabled");
|
||||
int bits = REXBIT_W;
|
||||
if (is_map1) bits |= REX2BIT_M0;
|
||||
bits |= get_base_prefix_bits(adr.base());
|
||||
bits |= get_index_prefix_bits(adr.index());
|
||||
return WREX2 | bits;
|
||||
}
|
||||
|
||||
int Assembler::get_prefixq(Address adr, Register src, bool is_map1) {
|
||||
if (adr.base_needs_rex2() || adr.index_needs_rex2() || src->encoding() >= 16) {
|
||||
return get_prefixq_rex2(adr, src, is_map1);
|
||||
@ -13243,7 +13312,7 @@ int Assembler::get_prefixq(Address adr, Register src, bool is_map1) {
|
||||
|
||||
int Assembler::get_prefixq_rex2(Address adr, Register src, bool is_map1) {
|
||||
assert(UseAPX, "APX features not enabled");
|
||||
int bits = REXBIT_W;
|
||||
int bits = REX2BIT_W;
|
||||
if (is_map1) bits |= REX2BIT_M0;
|
||||
bits |= get_base_prefix_bits(adr.base());
|
||||
bits |= get_index_prefix_bits(adr.index());
|
||||
@ -13306,7 +13375,7 @@ void Assembler::prefixq(Address adr, XMMRegister src) {
|
||||
}
|
||||
|
||||
void Assembler::prefixq_rex2(Address adr, XMMRegister src) {
|
||||
int bits = REXBIT_W;
|
||||
int bits = REX2BIT_W;
|
||||
bits |= get_base_prefix_bits(adr.base());
|
||||
bits |= get_index_prefix_bits(adr.index());
|
||||
bits |= get_reg_prefix_bits(src->encoding());
|
||||
@ -13329,7 +13398,7 @@ int Assembler::prefixq_and_encode(int reg_enc, bool is_map1) {
|
||||
|
||||
|
||||
int Assembler::prefixq_and_encode_rex2(int reg_enc, bool is_map1) {
|
||||
prefix16(WREX2 | REXBIT_W | (is_map1 ? REX2BIT_M0: 0) | get_base_prefix_bits(reg_enc));
|
||||
prefix16(WREX2 | REX2BIT_W | (is_map1 ? REX2BIT_M0: 0) | get_base_prefix_bits(reg_enc));
|
||||
return reg_enc & 0x7;
|
||||
}
|
||||
|
||||
@ -13358,7 +13427,7 @@ int Assembler::prefixq_and_encode(int dst_enc, int src_enc, bool is_map1) {
|
||||
}
|
||||
|
||||
int Assembler::prefixq_and_encode_rex2(int dst_enc, int src_enc, bool is_map1) {
|
||||
int init_bits = REXBIT_W | (is_map1 ? REX2BIT_M0 : 0);
|
||||
int init_bits = REX2BIT_W | (is_map1 ? REX2BIT_M0 : 0);
|
||||
return prefix_and_encode_rex2(dst_enc, src_enc, init_bits);
|
||||
}
|
||||
|
||||
@ -14168,7 +14237,7 @@ void Assembler::precompute_instructions() {
|
||||
ResourceMark rm;
|
||||
|
||||
// Make a temporary buffer big enough for the routines we're capturing
|
||||
int size = 256;
|
||||
int size = UseAPX ? 512 : 256;
|
||||
char* tmp_code = NEW_RESOURCE_ARRAY(char, size);
|
||||
CodeBuffer buffer((address)tmp_code, size);
|
||||
MacroAssembler masm(&buffer);
|
||||
@ -14212,11 +14281,97 @@ static void emit_copy(CodeSection* code_section, u_char* src, int src_len) {
|
||||
code_section->set_end(end + src_len);
|
||||
}
|
||||
|
||||
|
||||
// Does not actually store the value of rsp on the stack.
|
||||
// The slot for rsp just contains an arbitrary value.
|
||||
void Assembler::pusha() { // 64bit
|
||||
emit_copy(code_section(), pusha_code, pusha_len);
|
||||
}
|
||||
|
||||
// Does not actually store the value of rsp on the stack.
|
||||
// The slot for rsp just contains an arbitrary value.
|
||||
void Assembler::pusha_uncached() { // 64bit
|
||||
if (UseAPX) {
|
||||
// Data being pushed by PUSH2 must be 16B-aligned on the stack, for this push rax upfront
|
||||
// and use it as a temporary register for stack alignment.
|
||||
pushp(rax);
|
||||
// Move original stack pointer to RAX and align stack pointer to 16B boundary.
|
||||
movq(rax, rsp);
|
||||
andq(rsp, -(StackAlignmentInBytes));
|
||||
// Push pair of original stack pointer along with remaining registers
|
||||
// at 16B aligned boundary.
|
||||
push2p(rax, r31);
|
||||
push2p(r30, r29);
|
||||
push2p(r28, r27);
|
||||
push2p(r26, r25);
|
||||
push2p(r24, r23);
|
||||
push2p(r22, r21);
|
||||
push2p(r20, r19);
|
||||
push2p(r18, r17);
|
||||
push2p(r16, r15);
|
||||
push2p(r14, r13);
|
||||
push2p(r12, r11);
|
||||
push2p(r10, r9);
|
||||
push2p(r8, rdi);
|
||||
push2p(rsi, rbp);
|
||||
push2p(rbx, rdx);
|
||||
// To maintain 16 byte alignment after rcx is pushed.
|
||||
subq(rsp, 8);
|
||||
pushp(rcx);
|
||||
} else {
|
||||
subq(rsp, 16 * wordSize);
|
||||
movq(Address(rsp, 15 * wordSize), rax);
|
||||
movq(Address(rsp, 14 * wordSize), rcx);
|
||||
movq(Address(rsp, 13 * wordSize), rdx);
|
||||
movq(Address(rsp, 12 * wordSize), rbx);
|
||||
// Skip rsp as the value is normally not used. There are a few places where
|
||||
// the original value of rsp needs to be known but that can be computed
|
||||
// from the value of rsp immediately after pusha (rsp + 16 * wordSize).
|
||||
// FIXME: For APX any such direct access should also consider EGPR size
|
||||
// during address compution.
|
||||
movq(Address(rsp, 10 * wordSize), rbp);
|
||||
movq(Address(rsp, 9 * wordSize), rsi);
|
||||
movq(Address(rsp, 8 * wordSize), rdi);
|
||||
movq(Address(rsp, 7 * wordSize), r8);
|
||||
movq(Address(rsp, 6 * wordSize), r9);
|
||||
movq(Address(rsp, 5 * wordSize), r10);
|
||||
movq(Address(rsp, 4 * wordSize), r11);
|
||||
movq(Address(rsp, 3 * wordSize), r12);
|
||||
movq(Address(rsp, 2 * wordSize), r13);
|
||||
movq(Address(rsp, wordSize), r14);
|
||||
movq(Address(rsp, 0), r15);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::popa() { // 64bit
|
||||
emit_copy(code_section(), popa_code, popa_len);
|
||||
}
|
||||
|
||||
void Assembler::popa_uncached() { // 64bit
|
||||
if (UseAPX) {
|
||||
popp(rcx);
|
||||
addq(rsp, 8);
|
||||
// Data being popped by POP2 must be 16B-aligned on the stack.
|
||||
pop2p(rdx, rbx);
|
||||
pop2p(rbp, rsi);
|
||||
pop2p(rdi, r8);
|
||||
pop2p(r9, r10);
|
||||
pop2p(r11, r12);
|
||||
pop2p(r13, r14);
|
||||
pop2p(r15, r16);
|
||||
pop2p(r17, r18);
|
||||
pop2p(r19, r20);
|
||||
pop2p(r21, r22);
|
||||
pop2p(r23, r24);
|
||||
pop2p(r25, r26);
|
||||
pop2p(r27, r28);
|
||||
pop2p(r29, r30);
|
||||
// Popped value in RAX holds original unaligned stack pointer.
|
||||
pop2p(r31, rax);
|
||||
// Reinstantiate original stack pointer.
|
||||
movq(rsp, rax);
|
||||
popp(rax);
|
||||
} else {
|
||||
movq(r15, Address(rsp, 0));
|
||||
movq(r14, Address(rsp, wordSize));
|
||||
movq(r13, Address(rsp, 2 * wordSize));
|
||||
@ -14236,37 +14391,7 @@ void Assembler::popa_uncached() { // 64bit
|
||||
movq(rax, Address(rsp, 15 * wordSize));
|
||||
|
||||
addq(rsp, 16 * wordSize);
|
||||
}
|
||||
|
||||
// Does not actually store the value of rsp on the stack.
|
||||
// The slot for rsp just contains an arbitrary value.
|
||||
void Assembler::pusha() { // 64bit
|
||||
emit_copy(code_section(), pusha_code, pusha_len);
|
||||
}
|
||||
|
||||
// Does not actually store the value of rsp on the stack.
|
||||
// The slot for rsp just contains an arbitrary value.
|
||||
void Assembler::pusha_uncached() { // 64bit
|
||||
subq(rsp, 16 * wordSize);
|
||||
|
||||
movq(Address(rsp, 15 * wordSize), rax);
|
||||
movq(Address(rsp, 14 * wordSize), rcx);
|
||||
movq(Address(rsp, 13 * wordSize), rdx);
|
||||
movq(Address(rsp, 12 * wordSize), rbx);
|
||||
// Skip rsp as the value is normally not used. There are a few places where
|
||||
// the original value of rsp needs to be known but that can be computed
|
||||
// from the value of rsp immediately after pusha (rsp + 16 * wordSize).
|
||||
movq(Address(rsp, 10 * wordSize), rbp);
|
||||
movq(Address(rsp, 9 * wordSize), rsi);
|
||||
movq(Address(rsp, 8 * wordSize), rdi);
|
||||
movq(Address(rsp, 7 * wordSize), r8);
|
||||
movq(Address(rsp, 6 * wordSize), r9);
|
||||
movq(Address(rsp, 5 * wordSize), r10);
|
||||
movq(Address(rsp, 4 * wordSize), r11);
|
||||
movq(Address(rsp, 3 * wordSize), r12);
|
||||
movq(Address(rsp, 2 * wordSize), r13);
|
||||
movq(Address(rsp, wordSize), r14);
|
||||
movq(Address(rsp, 0), r15);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::vzeroupper() {
|
||||
|
@ -530,14 +530,16 @@ class Assembler : public AbstractAssembler {
|
||||
};
|
||||
|
||||
enum PrefixBits {
|
||||
REXBIT_B = 0x01,
|
||||
REXBIT_X = 0x02,
|
||||
REXBIT_R = 0x04,
|
||||
REXBIT_W = 0x08,
|
||||
REX2BIT_B = 0x01,
|
||||
REX2BIT_X = 0x02,
|
||||
REX2BIT_R = 0x04,
|
||||
REX2BIT_W = 0x08,
|
||||
REX2BIT_B4 = 0x10,
|
||||
REX2BIT_X4 = 0x20,
|
||||
REX2BIT_R4 = 0x40,
|
||||
REX2BIT_M0 = 0x80
|
||||
REX2BIT_M0 = 0x80,
|
||||
REX2BIT_WB = 0x09,
|
||||
REX2BIT_WB4 = 0x18,
|
||||
};
|
||||
|
||||
enum VexPrefix {
|
||||
@ -1017,6 +1019,15 @@ private:
|
||||
|
||||
void pusha_uncached();
|
||||
void popa_uncached();
|
||||
|
||||
// APX ISA extensions for register save/restore optimizations.
|
||||
void push2(Register src1, Register src2, bool with_ppx = false);
|
||||
void pop2(Register src1, Register src2, bool with_ppx = false);
|
||||
void push2p(Register src1, Register src2);
|
||||
void pop2p(Register src1, Register src2);
|
||||
void pushp(Register src);
|
||||
void popp(Register src);
|
||||
|
||||
#endif
|
||||
void vzeroupper_uncached();
|
||||
void decq(Register dst);
|
||||
@ -3070,7 +3081,6 @@ public:
|
||||
}
|
||||
|
||||
void set_extended_context(void) { _is_extended_context = true; }
|
||||
|
||||
};
|
||||
|
||||
#endif // CPU_X86_ASSEMBLER_X86_HPP
|
||||
|
@ -39,7 +39,7 @@ enum {
|
||||
|
||||
// registers
|
||||
enum {
|
||||
pd_nof_cpu_regs_frame_map = Register::number_of_registers, // number of registers used during code emission
|
||||
pd_nof_cpu_regs_frame_map = NOT_LP64(8) LP64_ONLY(16), // number of registers used during code emission
|
||||
pd_nof_fpu_regs_frame_map = FloatRegister::number_of_registers, // number of registers used during code emission
|
||||
pd_nof_xmm_regs_frame_map = XMMRegister::number_of_registers, // number of registers used during code emission
|
||||
|
||||
|
@ -2836,7 +2836,7 @@ void LIR_Assembler::align_call(LIR_Code code) {
|
||||
offset += NativeCall::displacement_offset;
|
||||
break;
|
||||
case lir_icvirtual_call:
|
||||
offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
|
||||
offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size_rex;
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
@ -2873,7 +2873,7 @@ void LIR_Assembler::emit_static_call_stub() {
|
||||
int start = __ offset();
|
||||
|
||||
// make sure that the displacement word of the call ends up word aligned
|
||||
__ align(BytesPerWord, __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset);
|
||||
__ align(BytesPerWord, __ offset() + NativeMovConstReg::instruction_size_rex + NativeCall::displacement_offset);
|
||||
__ relocate(static_stub_Relocation::spec(call_pc));
|
||||
__ mov_metadata(rbx, (Metadata*)nullptr);
|
||||
// must be set to -1 at code generation time
|
||||
|
@ -420,7 +420,12 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
|
||||
void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers) {
|
||||
__ block_comment("save_live_registers");
|
||||
|
||||
__ pusha(); // integer registers
|
||||
// Push CPU state in multiple of 16 bytes
|
||||
#ifdef _LP64
|
||||
__ save_legacy_gprs();
|
||||
#else
|
||||
__ pusha();
|
||||
#endif
|
||||
|
||||
// assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
|
||||
// assert(xmm_regs_as_doubles_off % 2 == 0, "misaligned offset");
|
||||
@ -560,7 +565,12 @@ void C1_MacroAssembler::restore_live_registers(bool restore_fpu_registers) {
|
||||
__ block_comment("restore_live_registers");
|
||||
|
||||
restore_fpu(this, restore_fpu_registers);
|
||||
#ifdef _LP64
|
||||
__ restore_legacy_gprs();
|
||||
#else
|
||||
__ popa();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -583,6 +583,25 @@ void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
|
||||
|
||||
if (UseAPX) {
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
|
||||
}
|
||||
|
||||
int gp_spill_size = 0;
|
||||
int opmask_spill_size = 0;
|
||||
int xmm_spill_size = 0;
|
||||
|
@ -484,6 +484,25 @@ private:
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
|
||||
caller_saved.Remove(OptoReg::as_OptoReg(stub->ref()->as_VMReg()));
|
||||
|
||||
if (UseAPX) {
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
|
||||
}
|
||||
|
||||
// Create mask of live registers
|
||||
RegMask live = stub->live();
|
||||
if (stub->tmp() != noreg) {
|
||||
|
@ -115,7 +115,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
"Highest supported AVX instructions set on x86/x64") \
|
||||
range(0, 3) \
|
||||
\
|
||||
\
|
||||
product(bool, UseAPX, false, EXPERIMENTAL, \
|
||||
"Use Intel Advanced Performance Extensions") \
|
||||
\
|
||||
@ -192,7 +191,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
product(bool, IntelJccErratumMitigation, true, DIAGNOSTIC, \
|
||||
"Turn off JVM mitigations related to Intel micro code " \
|
||||
"mitigations for the Intel JCC erratum") \
|
||||
\
|
||||
// end of ARCH_FLAGS
|
||||
|
||||
#endif // CPU_X86_GLOBALS_X86_HPP
|
||||
|
@ -49,12 +49,17 @@ jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMC
|
||||
return (pc_offset + NativeCall::instruction_size);
|
||||
} else if (inst->is_mov_literal64()) {
|
||||
// mov+call instruction pair
|
||||
jint offset = pc_offset + NativeMovConstReg::instruction_size;
|
||||
jint offset = pc_offset + ((NativeMovConstReg*)inst)->instruction_size();
|
||||
u_char* call = (u_char*) (_instructions->start() + offset);
|
||||
if (call[0] == Assembler::REX_B) {
|
||||
offset += 1; /* prefix byte for extended register R8-R15 */
|
||||
call++;
|
||||
}
|
||||
if (call[0] == Assembler::REX2) {
|
||||
offset += 2; /* prefix byte for APX extended GPR register R16-R31 */
|
||||
call+=2;
|
||||
}
|
||||
// Register indirect call.
|
||||
assert(call[0] == 0xFF, "expected call");
|
||||
offset += 2; /* opcode byte + modrm byte */
|
||||
return (offset);
|
||||
|
@ -4087,6 +4087,11 @@ RegSet MacroAssembler::call_clobbered_gp_registers() {
|
||||
regs += RegSet::range(r8, r11);
|
||||
#else
|
||||
regs += RegSet::of(rax, rcx, rdx);
|
||||
#endif
|
||||
#ifdef _LP64
|
||||
if (UseAPX) {
|
||||
regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1));
|
||||
}
|
||||
#endif
|
||||
return regs;
|
||||
}
|
||||
@ -10379,3 +10384,45 @@ void MacroAssembler::lightweight_unlock(Register obj, Register reg_rax, Register
|
||||
|
||||
bind(unlocked);
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
// Saves legacy GPRs state on stack.
|
||||
void MacroAssembler::save_legacy_gprs() {
|
||||
subq(rsp, 16 * wordSize);
|
||||
movq(Address(rsp, 15 * wordSize), rax);
|
||||
movq(Address(rsp, 14 * wordSize), rcx);
|
||||
movq(Address(rsp, 13 * wordSize), rdx);
|
||||
movq(Address(rsp, 12 * wordSize), rbx);
|
||||
movq(Address(rsp, 10 * wordSize), rbp);
|
||||
movq(Address(rsp, 9 * wordSize), rsi);
|
||||
movq(Address(rsp, 8 * wordSize), rdi);
|
||||
movq(Address(rsp, 7 * wordSize), r8);
|
||||
movq(Address(rsp, 6 * wordSize), r9);
|
||||
movq(Address(rsp, 5 * wordSize), r10);
|
||||
movq(Address(rsp, 4 * wordSize), r11);
|
||||
movq(Address(rsp, 3 * wordSize), r12);
|
||||
movq(Address(rsp, 2 * wordSize), r13);
|
||||
movq(Address(rsp, wordSize), r14);
|
||||
movq(Address(rsp, 0), r15);
|
||||
}
|
||||
|
||||
// Resotres back legacy GPRs state from stack.
|
||||
void MacroAssembler::restore_legacy_gprs() {
|
||||
movq(r15, Address(rsp, 0));
|
||||
movq(r14, Address(rsp, wordSize));
|
||||
movq(r13, Address(rsp, 2 * wordSize));
|
||||
movq(r12, Address(rsp, 3 * wordSize));
|
||||
movq(r11, Address(rsp, 4 * wordSize));
|
||||
movq(r10, Address(rsp, 5 * wordSize));
|
||||
movq(r9, Address(rsp, 6 * wordSize));
|
||||
movq(r8, Address(rsp, 7 * wordSize));
|
||||
movq(rdi, Address(rsp, 8 * wordSize));
|
||||
movq(rsi, Address(rsp, 9 * wordSize));
|
||||
movq(rbp, Address(rsp, 10 * wordSize));
|
||||
movq(rbx, Address(rsp, 12 * wordSize));
|
||||
movq(rdx, Address(rsp, 13 * wordSize));
|
||||
movq(rcx, Address(rsp, 14 * wordSize));
|
||||
movq(rax, Address(rsp, 15 * wordSize));
|
||||
addq(rsp, 16 * wordSize);
|
||||
}
|
||||
#endif
|
||||
|
@ -2150,6 +2150,11 @@ public:
|
||||
|
||||
void lightweight_lock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow);
|
||||
void lightweight_unlock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow);
|
||||
|
||||
#ifdef _LP64
|
||||
void save_legacy_gprs();
|
||||
void restore_legacy_gprs();
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -536,10 +536,11 @@ void trace_method_handle_stub(const char* adaptername,
|
||||
Register r = as_Register(i);
|
||||
// The registers are stored in reverse order on the stack (by pusha).
|
||||
#ifdef AMD64
|
||||
assert(Register::number_of_registers == 16, "sanity");
|
||||
int num_regs = UseAPX ? 32 : 16;
|
||||
assert(Register::available_gp_registers() == num_regs, "sanity");
|
||||
if (r == rsp) {
|
||||
// rsp is actually not stored by pusha(), compute the old rsp from saved_regs (rsp after pusha): saved_regs + 16 = old rsp
|
||||
ls.print("%3s=" PTR_FORMAT, r->name(), (intptr_t)(&saved_regs[16]));
|
||||
ls.print("%3s=" PTR_FORMAT, r->name(), (intptr_t)(&saved_regs[num_regs]));
|
||||
} else {
|
||||
ls.print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
|
||||
}
|
||||
|
@ -160,8 +160,13 @@ void NativeCall::set_destination_mt_safe(address dest) {
|
||||
void NativeMovConstReg::verify() {
|
||||
#ifdef AMD64
|
||||
// make sure code pattern is actually a mov reg64, imm64 instruction
|
||||
if ((ubyte_at(0) != Assembler::REX_W && ubyte_at(0) != Assembler::REX_WB) ||
|
||||
(ubyte_at(1) & (0xff ^ register_mask)) != 0xB8) {
|
||||
bool valid_rex_prefix = ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB;
|
||||
bool valid_rex2_prefix = ubyte_at(0) == Assembler::REX2 &&
|
||||
(ubyte_at(1) == Assembler::REX2BIT_W ||
|
||||
ubyte_at(1) == Assembler::REX2BIT_WB ||
|
||||
ubyte_at(1) == Assembler::REX2BIT_WB4);
|
||||
int opcode = has_rex2_prefix() ? ubyte_at(2) : ubyte_at(1);
|
||||
if ((!valid_rex_prefix || !valid_rex2_prefix) && (opcode & (0xff ^ register_mask)) != 0xB8) {
|
||||
print();
|
||||
fatal("not a REX.W[B] mov reg64, imm64");
|
||||
}
|
||||
@ -208,6 +213,11 @@ int NativeMovRegMem::instruction_start() const {
|
||||
instr_0 = ubyte_at(off);
|
||||
}
|
||||
|
||||
if (instr_0 == instruction_REX2_prefix) {
|
||||
off+=2;
|
||||
instr_0 = ubyte_at(off);
|
||||
}
|
||||
|
||||
if (instr_0 == instruction_code_xor) {
|
||||
off += 2;
|
||||
instr_0 = ubyte_at(off);
|
||||
@ -226,29 +236,39 @@ int NativeMovRegMem::instruction_start() const {
|
||||
instr_0 = ubyte_at(off);
|
||||
}
|
||||
|
||||
if (instr_0 == instruction_REX2_prefix) {
|
||||
off+=2;
|
||||
instr_0 = ubyte_at(off);
|
||||
}
|
||||
|
||||
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
|
||||
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
|
||||
off++;
|
||||
instr_0 = ubyte_at(off);
|
||||
}
|
||||
|
||||
|
||||
// Extended prefixes can only follow REX prefixes,
|
||||
// REX2 is directly followed by main opcode.
|
||||
if (instr_0 == instruction_extended_prefix ) { // 0x0f
|
||||
off++;
|
||||
}
|
||||
|
||||
// Offset of instruction opcode.
|
||||
return off;
|
||||
}
|
||||
|
||||
// Format [REX/REX2] [OPCODE] [ModRM] [SIB] [IMM/DISP32]
|
||||
int NativeMovRegMem::patch_offset() const {
|
||||
int off = data_offset + instruction_start();
|
||||
u_char mod_rm = *(u_char*)(instruction_address() + 1);
|
||||
// nnnn(r12|rsp) isn't coded as simple mod/rm since that is
|
||||
// the encoding to use an SIB byte. Which will have the nnnn
|
||||
// field off by one byte
|
||||
// ModRM Byte Format = Mod[2] REG[3] RM[3]
|
||||
if ((mod_rm & 7) == 0x4) {
|
||||
off++;
|
||||
}
|
||||
// Displacement offset.
|
||||
return off;
|
||||
}
|
||||
|
||||
@ -294,12 +314,6 @@ void NativeMovRegMem::print() {
|
||||
void NativeLoadAddress::verify() {
|
||||
// make sure code pattern is actually a mov [reg+offset], reg instruction
|
||||
u_char test_byte = *(u_char*)instruction_address();
|
||||
#ifdef _LP64
|
||||
if ( (test_byte == instruction_prefix_wide ||
|
||||
test_byte == instruction_prefix_wide_extended) ) {
|
||||
test_byte = *(u_char*)(instruction_address() + 1);
|
||||
}
|
||||
#endif // _LP64
|
||||
if ( ! ((test_byte == lea_instruction_code)
|
||||
LP64_ONLY(|| (test_byte == mov64_instruction_code) ))) {
|
||||
fatal ("not a lea reg, [reg+offs] instruction");
|
||||
|
@ -90,6 +90,7 @@ class NativeInstruction {
|
||||
void wrote(int offset);
|
||||
|
||||
public:
|
||||
bool has_rex2_prefix() const { return ubyte_at(0) == Assembler::REX2; }
|
||||
|
||||
inline friend NativeInstruction* nativeInstruction_at(address address);
|
||||
};
|
||||
@ -178,19 +179,28 @@ inline NativeCall* nativeCall_before(address return_address) {
|
||||
return call;
|
||||
}
|
||||
|
||||
// Call with target address in a general purpose register(indirect absolute addressing).
|
||||
// Encoding : FF /2 CALL r/m32
|
||||
// Primary Opcode: FF
|
||||
// Opcode Extension(part of ModRM.REG): /2
|
||||
// Operand ModRM.RM = r/m32
|
||||
class NativeCallReg: public NativeInstruction {
|
||||
public:
|
||||
enum Intel_specific_constants {
|
||||
instruction_code = 0xFF,
|
||||
instruction_offset = 0,
|
||||
return_address_offset_norex = 2,
|
||||
return_address_offset_rex = 3
|
||||
return_address_offset_rex = 3,
|
||||
return_address_offset_rex2 = 4
|
||||
};
|
||||
|
||||
int next_instruction_offset() const {
|
||||
if (ubyte_at(0) == NativeCallReg::instruction_code) {
|
||||
return return_address_offset_norex;
|
||||
} else if (has_rex2_prefix()) {
|
||||
return return_address_offset_rex2;
|
||||
} else {
|
||||
assert((ubyte_at(0) & 0xF0) == Assembler::REX, "");
|
||||
return return_address_offset_rex;
|
||||
}
|
||||
}
|
||||
@ -198,28 +208,38 @@ class NativeCallReg: public NativeInstruction {
|
||||
|
||||
// An interface for accessing/manipulating native mov reg, imm32 instructions.
|
||||
// (used to manipulate inlined 32bit data dll calls, etc.)
|
||||
// Instruction format for implied addressing mode immediate operand move to register instruction:
|
||||
// [REX/REX2] [OPCODE] [IMM32]
|
||||
class NativeMovConstReg: public NativeInstruction {
|
||||
#ifdef AMD64
|
||||
static const bool has_rex = true;
|
||||
static const int rex_size = 1;
|
||||
static const int rex2_size = 2;
|
||||
#else
|
||||
static const bool has_rex = false;
|
||||
static const int rex_size = 0;
|
||||
static const int rex2_size = 0;
|
||||
#endif // AMD64
|
||||
public:
|
||||
enum Intel_specific_constants {
|
||||
instruction_code = 0xB8,
|
||||
instruction_size = 1 + rex_size + wordSize,
|
||||
instruction_offset = 0,
|
||||
data_offset = 1 + rex_size,
|
||||
next_instruction_offset = instruction_size,
|
||||
instruction_size_rex = 1 + rex_size + wordSize,
|
||||
instruction_size_rex2 = 1 + rex2_size + wordSize,
|
||||
data_offset_rex = 1 + rex_size,
|
||||
data_offset_rex2 = 1 + rex2_size,
|
||||
next_instruction_offset_rex = instruction_size_rex,
|
||||
next_instruction_offset_rex2 = instruction_size_rex2,
|
||||
register_mask = 0x07
|
||||
};
|
||||
|
||||
int instruction_size() const { return has_rex2_prefix() ? instruction_size_rex2 : instruction_size_rex; }
|
||||
int next_inst_offset() const { return has_rex2_prefix() ? next_instruction_offset_rex2 : next_instruction_offset_rex; }
|
||||
int data_byte_offset() const { return has_rex2_prefix() ? data_offset_rex2 : data_offset_rex;}
|
||||
address instruction_address() const { return addr_at(instruction_offset); }
|
||||
address next_instruction_address() const { return addr_at(next_instruction_offset); }
|
||||
intptr_t data() const { return ptr_at(data_offset); }
|
||||
void set_data(intptr_t x) { set_ptr_at(data_offset, x); }
|
||||
address next_instruction_address() const { return addr_at(next_inst_offset()); }
|
||||
intptr_t data() const { return ptr_at(data_byte_offset()); }
|
||||
void set_data(intptr_t x) { set_ptr_at(data_byte_offset(), x); }
|
||||
|
||||
void verify();
|
||||
void print();
|
||||
@ -238,7 +258,10 @@ inline NativeMovConstReg* nativeMovConstReg_at(address address) {
|
||||
}
|
||||
|
||||
inline NativeMovConstReg* nativeMovConstReg_before(address address) {
|
||||
NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
|
||||
int instruction_size = ((NativeInstruction*)(address))->has_rex2_prefix() ?
|
||||
NativeMovConstReg::instruction_size_rex2 :
|
||||
NativeMovConstReg::instruction_size_rex;
|
||||
NativeMovConstReg* test = (NativeMovConstReg*)(address - instruction_size - NativeMovConstReg::instruction_offset);
|
||||
#ifdef ASSERT
|
||||
test->verify();
|
||||
#endif
|
||||
@ -279,35 +302,47 @@ class NativeMovRegMem: public NativeInstruction {
|
||||
instruction_prefix_wide_hi = Assembler::REX_WRXB,
|
||||
instruction_code_xor = 0x33,
|
||||
instruction_extended_prefix = 0x0F,
|
||||
|
||||
// Legacy encoding MAP1 instructions promotable to REX2 encoding.
|
||||
instruction_code_mem2reg_movslq = 0x63,
|
||||
instruction_code_mem2reg_movzxb = 0xB6,
|
||||
instruction_code_mem2reg_movsxb = 0xBE,
|
||||
instruction_code_mem2reg_movzxw = 0xB7,
|
||||
instruction_code_mem2reg_movsxw = 0xBF,
|
||||
instruction_operandsize_prefix = 0x66,
|
||||
|
||||
// Legacy encoding MAP0 instructions promotable to REX2 encoding.
|
||||
instruction_code_reg2mem = 0x89,
|
||||
instruction_code_mem2reg = 0x8b,
|
||||
instruction_code_reg2memb = 0x88,
|
||||
instruction_code_mem2regb = 0x8a,
|
||||
instruction_code_lea = 0x8d,
|
||||
|
||||
instruction_code_float_s = 0xd9,
|
||||
instruction_code_float_d = 0xdd,
|
||||
instruction_code_long_volatile = 0xdf,
|
||||
|
||||
// VEX/EVEX/Legacy encodeded MAP1 instructions promotable to REX2 encoding.
|
||||
instruction_code_xmm_ss_prefix = 0xf3,
|
||||
instruction_code_xmm_sd_prefix = 0xf2,
|
||||
|
||||
instruction_code_xmm_code = 0x0f,
|
||||
|
||||
// Address operand load/store/ldp are promotable to REX2 to accomodate
|
||||
// extended SIB encoding.
|
||||
instruction_code_xmm_load = 0x10,
|
||||
instruction_code_xmm_store = 0x11,
|
||||
instruction_code_xmm_lpd = 0x12,
|
||||
|
||||
instruction_code_lea = 0x8d,
|
||||
|
||||
instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes,
|
||||
instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes,
|
||||
instruction_EVEX_prefix_4bytes = Assembler::EVEX_4bytes,
|
||||
instruction_REX2_prefix = Assembler::REX2,
|
||||
|
||||
instruction_offset = 0,
|
||||
data_offset = 2,
|
||||
next_instruction_offset = 4
|
||||
next_instruction_offset_rex = 4,
|
||||
next_instruction_offset_rex2 = 5
|
||||
};
|
||||
|
||||
// helper
|
||||
@ -438,7 +473,8 @@ inline NativeJump* nativeJump_at(address address) {
|
||||
return jump;
|
||||
}
|
||||
|
||||
// Handles all kinds of jump on Intel. Long/far, conditional/unconditional
|
||||
// Handles all kinds of jump on Intel. Long/far, conditional/unconditional with relative offsets
|
||||
// barring register indirect jumps.
|
||||
class NativeGeneralJump: public NativeInstruction {
|
||||
public:
|
||||
enum Intel_specific_constants {
|
||||
@ -538,7 +574,7 @@ inline bool NativeInstruction::is_cond_jump() { return (int_at(0) & 0xF0FF) =
|
||||
inline bool NativeInstruction::is_safepoint_poll() {
|
||||
#ifdef AMD64
|
||||
const bool has_rex_prefix = ubyte_at(0) == NativeTstRegMem::instruction_rex_b_prefix;
|
||||
const int test_offset = has_rex_prefix ? 1 : 0;
|
||||
const int test_offset = has_rex2_prefix() ? 2 : (has_rex_prefix ? 1 : 0);
|
||||
#else
|
||||
const int test_offset = 0;
|
||||
#endif
|
||||
@ -549,8 +585,14 @@ inline bool NativeInstruction::is_safepoint_poll() {
|
||||
|
||||
inline bool NativeInstruction::is_mov_literal64() {
|
||||
#ifdef AMD64
|
||||
return ((ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB) &&
|
||||
(ubyte_at(1) & (0xff ^ NativeMovConstReg::register_mask)) == 0xB8);
|
||||
bool valid_rex_prefix = ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB;
|
||||
bool valid_rex2_prefix = ubyte_at(0) == Assembler::REX2 &&
|
||||
(ubyte_at(1) == Assembler::REX2BIT_W ||
|
||||
ubyte_at(1) == Assembler::REX2BIT_WB ||
|
||||
ubyte_at(1) == Assembler::REX2BIT_WB4);
|
||||
|
||||
int opcode = has_rex2_prefix() ? ubyte_at(2) : ubyte_at(1);
|
||||
return ((valid_rex_prefix || valid_rex2_prefix) && (opcode & (0xff ^ NativeMovConstReg::register_mask)) == 0xB8);
|
||||
#else
|
||||
return false;
|
||||
#endif // AMD64
|
||||
|
@ -35,7 +35,9 @@ const char * Register::RegisterImpl::name() const {
|
||||
static const char *const names[number_of_registers] = {
|
||||
#ifdef _LP64
|
||||
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
|
||||
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
|
||||
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
||||
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
|
||||
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
|
||||
#else
|
||||
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"
|
||||
#endif // _LP64
|
||||
|
@ -45,8 +45,8 @@ public:
|
||||
inline friend constexpr Register as_Register(int encoding);
|
||||
|
||||
enum {
|
||||
number_of_registers = LP64_ONLY( 16 ) NOT_LP64( 8 ),
|
||||
number_of_byte_registers = LP64_ONLY( 16 ) NOT_LP64( 4 ),
|
||||
number_of_registers = LP64_ONLY( 32 ) NOT_LP64( 8 ),
|
||||
number_of_byte_registers = LP64_ONLY( 32 ) NOT_LP64( 4 ),
|
||||
max_slots_per_register = LP64_ONLY( 2 ) NOT_LP64( 1 )
|
||||
};
|
||||
|
||||
@ -76,6 +76,16 @@ public:
|
||||
int operator!=(const Register r) const { return _encoding != r._encoding; }
|
||||
|
||||
constexpr const RegisterImpl* operator->() const { return RegisterImpl::first() + _encoding; }
|
||||
|
||||
// Actually available GP registers for use, depending on actual CPU capabilities and flags.
|
||||
static int available_gp_registers() {
|
||||
#ifdef _LP64
|
||||
if (!UseAPX) {
|
||||
return number_of_registers / 2;
|
||||
}
|
||||
#endif // _LP64
|
||||
return number_of_registers;
|
||||
}
|
||||
};
|
||||
|
||||
extern const Register::RegisterImpl all_RegisterImpls[Register::number_of_registers + 1] INTERNAL_VISIBILITY;
|
||||
@ -115,6 +125,22 @@ constexpr Register r12 = as_Register(12);
|
||||
constexpr Register r13 = as_Register(13);
|
||||
constexpr Register r14 = as_Register(14);
|
||||
constexpr Register r15 = as_Register(15);
|
||||
constexpr Register r16 = as_Register(16);
|
||||
constexpr Register r17 = as_Register(17);
|
||||
constexpr Register r18 = as_Register(18);
|
||||
constexpr Register r19 = as_Register(19);
|
||||
constexpr Register r20 = as_Register(20);
|
||||
constexpr Register r21 = as_Register(21);
|
||||
constexpr Register r22 = as_Register(22);
|
||||
constexpr Register r23 = as_Register(23);
|
||||
constexpr Register r24 = as_Register(24);
|
||||
constexpr Register r25 = as_Register(25);
|
||||
constexpr Register r26 = as_Register(26);
|
||||
constexpr Register r27 = as_Register(27);
|
||||
constexpr Register r28 = as_Register(28);
|
||||
constexpr Register r29 = as_Register(29);
|
||||
constexpr Register r30 = as_Register(30);
|
||||
constexpr Register r31 = as_Register(31);
|
||||
#endif // _LP64
|
||||
|
||||
|
||||
|
@ -95,6 +95,7 @@ class RegisterSaver {
|
||||
// units because compiler frame slots are jints.
|
||||
#define XSAVE_AREA_BEGIN 160
|
||||
#define XSAVE_AREA_YMM_BEGIN 576
|
||||
#define XSAVE_AREA_EGPRS 960
|
||||
#define XSAVE_AREA_OPMASK_BEGIN 1088
|
||||
#define XSAVE_AREA_ZMM_BEGIN 1152
|
||||
#define XSAVE_AREA_UPPERBANK 1664
|
||||
@ -113,6 +114,23 @@ class RegisterSaver {
|
||||
DEF_YMM_OFFS(0),
|
||||
DEF_YMM_OFFS(1),
|
||||
// 2..15 are implied in range usage
|
||||
r31_off = xmm_off + (XSAVE_AREA_EGPRS - XSAVE_AREA_BEGIN)/BytesPerInt,
|
||||
r31H_off,
|
||||
r30_off, r30H_off,
|
||||
r29_off, r29H_off,
|
||||
r28_off, r28H_off,
|
||||
r27_off, r27H_off,
|
||||
r26_off, r26H_off,
|
||||
r25_off, r25H_off,
|
||||
r24_off, r24H_off,
|
||||
r23_off, r23H_off,
|
||||
r22_off, r22H_off,
|
||||
r21_off, r21H_off,
|
||||
r20_off, r20H_off,
|
||||
r19_off, r19H_off,
|
||||
r18_off, r18H_off,
|
||||
r17_off, r17H_off,
|
||||
r16_off, r16H_off,
|
||||
opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
|
||||
DEF_OPMASK_OFFS(0),
|
||||
DEF_OPMASK_OFFS(1),
|
||||
@ -199,7 +217,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
// to be under the return like a normal enter.
|
||||
|
||||
__ enter(); // rsp becomes 16-byte aligned here
|
||||
__ push_CPU_state(); // Push a multiple of 16 bytes
|
||||
__ pushf();
|
||||
// Make sure rsp stays 16-byte aligned
|
||||
__ subq(rsp, 8);
|
||||
// Push CPU state in multiple of 16 bytes
|
||||
__ save_legacy_gprs();
|
||||
__ push_FPU_state();
|
||||
|
||||
|
||||
// push cpu state handles this on EVEX enabled targets
|
||||
if (save_wide_vectors) {
|
||||
@ -247,6 +271,17 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if COMPILER2_OR_JVMCI
|
||||
if (UseAPX) {
|
||||
int base_addr = XSAVE_AREA_EGPRS;
|
||||
off = 0;
|
||||
for(int n = 16; n < Register::number_of_registers; n++) {
|
||||
__ movq(Address(rsp, base_addr+(off++*8)), as_Register(n));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
__ vzeroupper();
|
||||
if (frame::arg_reg_save_area_bytes != 0) {
|
||||
// Allocate argument register save area
|
||||
@ -279,6 +314,25 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
|
||||
|
||||
if (UseAPX) {
|
||||
map->set_callee_saved(STACK_OFFSET( r16_off ), r16->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r17_off ), r17->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r18_off ), r18->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r19_off ), r19->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r20_off ), r20->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r21_off ), r21->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r22_off ), r22->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r23_off ), r23->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r24_off ), r24->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r25_off ), r25->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r26_off ), r26->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r27_off ), r27->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r28_off ), r28->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r29_off ), r29->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r30_off ), r30->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r31_off ), r31->as_VMReg());
|
||||
}
|
||||
// For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
|
||||
// on EVEX enabled targets, we get it included in the xsave area
|
||||
off = xmm0_off;
|
||||
@ -339,6 +393,24 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
|
||||
if (UseAPX) {
|
||||
map->set_callee_saved(STACK_OFFSET( r16H_off ), r16->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r17H_off ), r17->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r18H_off ), r18->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r19H_off ), r19->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r20H_off ), r20->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r21H_off ), r21->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r22H_off ), r22->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r23H_off ), r23->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r24H_off ), r24->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r25H_off ), r25->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r26H_off ), r26->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r27H_off ), r27->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r28H_off ), r28->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r29H_off ), r29->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r30H_off ), r30->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r31H_off ), r31->as_VMReg()->next());
|
||||
}
|
||||
// For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
|
||||
// on EVEX enabled targets, we get it included in the xsave area
|
||||
off = xmm0H_off;
|
||||
@ -428,8 +500,21 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wi
|
||||
}
|
||||
}
|
||||
|
||||
#if COMPILER2_OR_JVMCI
|
||||
if (UseAPX) {
|
||||
int base_addr = XSAVE_AREA_EGPRS;
|
||||
int off = 0;
|
||||
for (int n = 16; n < Register::number_of_registers; n++) {
|
||||
__ movq(as_Register(n), Address(rsp, base_addr+(off++*8)));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Recover CPU state
|
||||
__ pop_CPU_state();
|
||||
__ pop_FPU_state();
|
||||
__ restore_legacy_gprs();
|
||||
__ addq(rsp, 8);
|
||||
__ popf();
|
||||
// Get the rbp described implicitly by the calling convention (no oopMap)
|
||||
__ pop(rbp);
|
||||
}
|
||||
@ -2543,6 +2628,9 @@ void SharedRuntime::generate_deopt_blob() {
|
||||
if (UseAVX > 2) {
|
||||
pad += 1024;
|
||||
}
|
||||
if (UseAPX) {
|
||||
pad += 1024;
|
||||
}
|
||||
#if INCLUDE_JVMCI
|
||||
if (EnableJVMCI) {
|
||||
pad += 512; // Increase the buffer size when compiling for JVMCI
|
||||
@ -3091,7 +3179,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
|
||||
OopMap* map;
|
||||
|
||||
// Allocate space for the code. Setup code generation tools.
|
||||
CodeBuffer buffer("handler_blob", 2048, 1024);
|
||||
CodeBuffer buffer("handler_blob", 2348, 1024);
|
||||
MacroAssembler* masm = new MacroAssembler(&buffer);
|
||||
|
||||
address start = __ pc();
|
||||
@ -3247,7 +3335,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
|
||||
// allocate space for the code
|
||||
ResourceMark rm;
|
||||
|
||||
CodeBuffer buffer(name, 1200, 512);
|
||||
CodeBuffer buffer(name, 1552, 512);
|
||||
MacroAssembler* masm = new MacroAssembler(&buffer);
|
||||
|
||||
int frame_size_in_words;
|
||||
|
@ -40,13 +40,17 @@
|
||||
#define __ _masm->
|
||||
|
||||
static bool is_valid_XMM(XMMRegister reg) {
|
||||
return reg->is_valid() && (UseAVX >= 3 || (reg->encoding() < 16)); // why is this not covered by is_valid()?
|
||||
return reg->is_valid() && (reg->encoding() < (UseAVX >= 3 ? 32 : 16)); // why is this not covered by is_valid()?
|
||||
}
|
||||
|
||||
static bool is_valid_gp(Register reg) {
|
||||
return reg->is_valid() && (reg->encoding() < (UseAPX ? 32 : 16));
|
||||
}
|
||||
|
||||
// for callee saved regs, according to the caller's ABI
|
||||
static int compute_reg_save_area_size(const ABIDescriptor& abi) {
|
||||
int size = 0;
|
||||
for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
|
||||
for (Register reg = as_Register(0); is_valid_gp(reg); reg = reg->successor()) {
|
||||
if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
|
||||
if (!abi.is_volatile_reg(reg)) {
|
||||
size += 8; // bytes
|
||||
@ -84,7 +88,7 @@ static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDesc
|
||||
int offset = reg_save_area_offset;
|
||||
|
||||
__ block_comment("{ preserve_callee_saved_regs ");
|
||||
for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
|
||||
for (Register reg = as_Register(0); is_valid_gp(reg); reg = reg->successor()) {
|
||||
if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
|
||||
if (!abi.is_volatile_reg(reg)) {
|
||||
__ movptr(Address(rsp, offset), reg);
|
||||
@ -134,7 +138,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
|
||||
int offset = reg_save_area_offset;
|
||||
|
||||
__ block_comment("{ restore_callee_saved_regs ");
|
||||
for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
|
||||
for (Register reg = as_Register(0); is_valid_gp(reg); reg = reg->successor()) {
|
||||
if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
|
||||
if (!abi.is_volatile_reg(reg)) {
|
||||
__ movptr(reg, Address(rsp, offset));
|
||||
|
@ -108,6 +108,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
|
||||
VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
|
||||
|
||||
#if defined(_LP64)
|
||||
address clear_apx_test_state() {
|
||||
# define __ _masm->
|
||||
address start = __ pc();
|
||||
@ -115,7 +116,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
// handling guarantees that preserved register values post signal handling were
|
||||
// re-instantiated by operating system and not because they were not modified externally.
|
||||
|
||||
/* FIXME Uncomment following code after OS enablement of
|
||||
bool save_apx = UseAPX;
|
||||
VM_Version::set_apx_cpuFeatures();
|
||||
UseAPX = true;
|
||||
@ -124,10 +124,10 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
__ mov64(r31, 0L);
|
||||
UseAPX = save_apx;
|
||||
VM_Version::clean_cpuFeatures();
|
||||
*/
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
#endif
|
||||
|
||||
address generate_get_cpu_info() {
|
||||
// Flags to test CPU type.
|
||||
@ -419,7 +419,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
__ movl(Address(rsi, 8), rcx);
|
||||
__ movl(Address(rsi,12), rdx);
|
||||
|
||||
#ifndef PRODUCT
|
||||
#if defined(_LP64)
|
||||
//
|
||||
// Check if OS has enabled XGETBV instruction to access XCR0
|
||||
// (OSXSAVE feature flag) and CPU supports APX
|
||||
@ -437,26 +437,22 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
__ cmpl(rax, 0x80000);
|
||||
__ jcc(Assembler::notEqual, vector_save_restore);
|
||||
|
||||
/* FIXME: Uncomment while integrating JDK-8329032
|
||||
bool save_apx = UseAPX;
|
||||
VM_Version::set_apx_cpuFeatures();
|
||||
UseAPX = true;
|
||||
__ mov64(r16, VM_Version::egpr_test_value());
|
||||
__ mov64(r31, VM_Version::egpr_test_value());
|
||||
*/
|
||||
__ xorl(rsi, rsi);
|
||||
VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
|
||||
// Generate SEGV
|
||||
__ movl(rax, Address(rsi, 0));
|
||||
|
||||
VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
|
||||
/* FIXME: Uncomment after integration of JDK-8329032
|
||||
__ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
|
||||
__ movq(Address(rsi, 0), r16);
|
||||
__ movq(Address(rsi, 8), r31);
|
||||
|
||||
UseAPX = save_apx;
|
||||
*/
|
||||
#endif
|
||||
__ bind(vector_save_restore);
|
||||
//
|
||||
@ -2170,9 +2166,11 @@ int VM_Version::avx3_threshold() {
|
||||
FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
|
||||
}
|
||||
|
||||
#if defined(_LP64)
|
||||
void VM_Version::clear_apx_test_state() {
|
||||
clear_apx_test_state_stub();
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool _vm_version_initialized = false;
|
||||
|
||||
@ -2191,8 +2189,10 @@ void VM_Version::initialize() {
|
||||
detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
|
||||
g.generate_detect_virt());
|
||||
|
||||
#if defined(_LP64)
|
||||
clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
|
||||
g.clear_apx_test_state());
|
||||
#endif
|
||||
get_processor_features();
|
||||
|
||||
LP64_ONLY(Assembler::precompute_instructions();)
|
||||
@ -3183,11 +3183,17 @@ bool VM_Version::os_supports_apx_egprs() {
|
||||
if (!supports_apx_f()) {
|
||||
return false;
|
||||
}
|
||||
// Enable APX support for product builds after
|
||||
// completion of planned features listed in JDK-8329030.
|
||||
#if !defined(PRODUCT)
|
||||
if (_cpuid_info.apx_save[0] != egpr_test_value() ||
|
||||
_cpuid_info.apx_save[1] != egpr_test_value()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
uint VM_Version::cores_per_cpu() {
|
||||
|
@ -635,7 +635,7 @@ public:
|
||||
static void set_cpuinfo_cont_addr_apx(address pc) { _cpuinfo_cont_addr_apx = pc; }
|
||||
static address cpuinfo_cont_addr_apx() { return _cpuinfo_cont_addr_apx; }
|
||||
|
||||
static void clear_apx_test_state();
|
||||
LP64_ONLY(static void clear_apx_test_state());
|
||||
|
||||
static void clean_cpuFeatures() { _features = 0; }
|
||||
static void set_avx_cpuFeatures() { _features = (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); }
|
||||
|
@ -28,7 +28,8 @@
|
||||
#include "register_x86.hpp"
|
||||
|
||||
inline bool is_Register() {
|
||||
return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
|
||||
int uarch_max_gpr = Register::max_slots_per_register * Register::available_gp_registers();
|
||||
return (unsigned int) value() < (unsigned int) uarch_max_gpr;
|
||||
}
|
||||
|
||||
inline bool is_FloatRegister() {
|
||||
|
@ -128,6 +128,53 @@ reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
|
||||
reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
|
||||
reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
|
||||
|
||||
reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
|
||||
reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
|
||||
|
||||
reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
|
||||
reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
|
||||
|
||||
reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
|
||||
reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
|
||||
|
||||
reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
|
||||
reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
|
||||
|
||||
reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
|
||||
reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
|
||||
|
||||
reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
|
||||
reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
|
||||
|
||||
reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
|
||||
reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
|
||||
|
||||
reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
|
||||
reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
|
||||
|
||||
reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
|
||||
reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
|
||||
|
||||
reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
|
||||
reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
|
||||
|
||||
reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
|
||||
reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
|
||||
|
||||
reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
|
||||
reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
|
||||
|
||||
reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
|
||||
reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
|
||||
|
||||
reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
|
||||
reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
|
||||
|
||||
reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
|
||||
reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
|
||||
|
||||
reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
|
||||
reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
|
||||
|
||||
// Floating Point Registers
|
||||
|
||||
@ -154,6 +201,22 @@ alloc_class chunk0(R10, R10_H,
|
||||
R13, R13_H,
|
||||
R14, R14_H,
|
||||
R15, R15_H,
|
||||
R16, R16_H,
|
||||
R17, R17_H,
|
||||
R18, R18_H,
|
||||
R19, R19_H,
|
||||
R20, R20_H,
|
||||
R21, R21_H,
|
||||
R22, R22_H,
|
||||
R23, R23_H,
|
||||
R24, R24_H,
|
||||
R25, R25_H,
|
||||
R26, R26_H,
|
||||
R27, R27_H,
|
||||
R28, R28_H,
|
||||
R29, R29_H,
|
||||
R30, R30_H,
|
||||
R31, R31_H,
|
||||
RSP, RSP_H);
|
||||
|
||||
|
||||
@ -167,7 +230,7 @@ alloc_class chunk0(R10, R10_H,
|
||||
// Empty register class.
|
||||
reg_class no_reg();
|
||||
|
||||
// Class for all pointer/long registers
|
||||
// Class for all pointer/long registers including APX extended GPRs.
|
||||
reg_class all_reg(RAX, RAX_H,
|
||||
RDX, RDX_H,
|
||||
RBP, RBP_H,
|
||||
@ -183,9 +246,25 @@ reg_class all_reg(RAX, RAX_H,
|
||||
R12, R12_H,
|
||||
R13, R13_H,
|
||||
R14, R14_H,
|
||||
R15, R15_H);
|
||||
R15, R15_H,
|
||||
R16, R16_H,
|
||||
R17, R17_H,
|
||||
R18, R18_H,
|
||||
R19, R19_H,
|
||||
R20, R20_H,
|
||||
R21, R21_H,
|
||||
R22, R22_H,
|
||||
R23, R23_H,
|
||||
R24, R24_H,
|
||||
R25, R25_H,
|
||||
R26, R26_H,
|
||||
R27, R27_H,
|
||||
R28, R28_H,
|
||||
R29, R29_H,
|
||||
R30, R30_H,
|
||||
R31, R31_H);
|
||||
|
||||
// Class for all int registers
|
||||
// Class for all int registers including APX extended GPRs.
|
||||
reg_class all_int_reg(RAX
|
||||
RDX,
|
||||
RBP,
|
||||
@ -199,7 +278,23 @@ reg_class all_int_reg(RAX
|
||||
R11,
|
||||
R12,
|
||||
R13,
|
||||
R14);
|
||||
R14,
|
||||
R16,
|
||||
R17,
|
||||
R18,
|
||||
R19,
|
||||
R20,
|
||||
R21,
|
||||
R22,
|
||||
R23,
|
||||
R24,
|
||||
R25,
|
||||
R26,
|
||||
R27,
|
||||
R28,
|
||||
R29,
|
||||
R30,
|
||||
R31);
|
||||
|
||||
// Class for all pointer registers
|
||||
reg_class any_reg %{
|
||||
@ -386,6 +481,8 @@ static bool need_r12_heapbase() {
|
||||
}
|
||||
|
||||
void reg_mask_init() {
|
||||
constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
|
||||
|
||||
// _ALL_REG_mask is generated by adlc from the all_reg register class below.
|
||||
// We derive a number of subsets from it.
|
||||
_ANY_REG_mask = _ALL_REG_mask;
|
||||
@ -404,6 +501,12 @@ void reg_mask_init() {
|
||||
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
|
||||
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
|
||||
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
|
||||
if (!UseAPX) {
|
||||
for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
|
||||
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
|
||||
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
|
||||
}
|
||||
}
|
||||
|
||||
_STACK_OR_PTR_REG_mask = _PTR_REG_mask;
|
||||
_STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
|
||||
@ -420,6 +523,7 @@ void reg_mask_init() {
|
||||
_PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
|
||||
_PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
|
||||
|
||||
|
||||
_LONG_REG_mask = _PTR_REG_mask;
|
||||
_STACK_OR_LONG_REG_mask = _LONG_REG_mask;
|
||||
_STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
|
||||
@ -441,6 +545,12 @@ void reg_mask_init() {
|
||||
_LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
|
||||
|
||||
_INT_REG_mask = _ALL_INT_REG_mask;
|
||||
if (!UseAPX) {
|
||||
for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
|
||||
_INT_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
|
||||
}
|
||||
}
|
||||
|
||||
if (PreserveFramePointer) {
|
||||
_INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
|
||||
}
|
||||
@ -12320,7 +12430,6 @@ instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
|
||||
format %{ "testl rax, [$poll]\t"
|
||||
"# Safepoint: poll for GC" %}
|
||||
ins_cost(125);
|
||||
size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
|
||||
ins_encode %{
|
||||
__ relocate(relocInfo::poll_type);
|
||||
address pre_pc = __ pc();
|
||||
|
@ -2758,7 +2758,7 @@ LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo) {
|
||||
return Handle_Exception(exceptionInfo, VM_Version::cpuinfo_cont_addr());
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
#if !defined(PRODUCT) && defined(_LP64)
|
||||
if ((exception_code == EXCEPTION_ACCESS_VIOLATION) &&
|
||||
VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
|
||||
// Verify that OS save/restore APX registers.
|
||||
|
@ -416,7 +416,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
|
||||
stub = VM_Version::cpuinfo_cont_addr();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
#if !defined(PRODUCT) && defined(_LP64)
|
||||
if ((sig == SIGSEGV || sig == SIGBUS) && VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
|
||||
// Verify that OS save/restore APX registers.
|
||||
stub = VM_Version::cpuinfo_cont_addr_apx();
|
||||
|
@ -248,7 +248,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
|
||||
stub = VM_Version::cpuinfo_cont_addr();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
#if !defined(PRODUCT) && defined(_LP64)
|
||||
if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
|
||||
// Verify that OS save/restore APX registers.
|
||||
stub = VM_Version::cpuinfo_cont_addr_apx();
|
||||
|
@ -66,49 +66,68 @@ public class AMD64 extends Architecture {
|
||||
public static final Register r14 = new Register(14, 14, "r14", CPU);
|
||||
public static final Register r15 = new Register(15, 15, "r15", CPU);
|
||||
|
||||
public static final Register r16 = new Register(16, 16, "r16", CPU);
|
||||
public static final Register r17 = new Register(17, 17, "r17", CPU);
|
||||
public static final Register r18 = new Register(18, 18, "r18", CPU);
|
||||
public static final Register r19 = new Register(19, 19, "r19", CPU);
|
||||
public static final Register r20 = new Register(20, 20, "r20", CPU);
|
||||
public static final Register r21 = new Register(21, 21, "r21", CPU);
|
||||
public static final Register r22 = new Register(22, 22, "r22", CPU);
|
||||
public static final Register r23 = new Register(23, 23, "r23", CPU);
|
||||
public static final Register r24 = new Register(24, 24, "r24", CPU);
|
||||
public static final Register r25 = new Register(25, 25, "r25", CPU);
|
||||
public static final Register r26 = new Register(26, 26, "r26", CPU);
|
||||
public static final Register r27 = new Register(27, 27, "r27", CPU);
|
||||
public static final Register r28 = new Register(28, 28, "r28", CPU);
|
||||
public static final Register r29 = new Register(29, 29, "r29", CPU);
|
||||
public static final Register r30 = new Register(30, 30, "r30", CPU);
|
||||
public static final Register r31 = new Register(31, 31, "r31", CPU);
|
||||
|
||||
public static final Register[] cpuRegisters = {
|
||||
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
|
||||
r8, r9, r10, r11, r12, r13, r14, r15
|
||||
r8, r9, r10, r11, r12, r13, r14, r15,
|
||||
r16, r17, r18, r19, r20, r21, r22, r23,
|
||||
r24, r25, r26, r27, r28, r29, r30, r31
|
||||
};
|
||||
|
||||
public static final RegisterCategory XMM = new RegisterCategory("XMM");
|
||||
|
||||
// XMM registers
|
||||
public static final Register xmm0 = new Register(16, 0, "xmm0", XMM);
|
||||
public static final Register xmm1 = new Register(17, 1, "xmm1", XMM);
|
||||
public static final Register xmm2 = new Register(18, 2, "xmm2", XMM);
|
||||
public static final Register xmm3 = new Register(19, 3, "xmm3", XMM);
|
||||
public static final Register xmm4 = new Register(20, 4, "xmm4", XMM);
|
||||
public static final Register xmm5 = new Register(21, 5, "xmm5", XMM);
|
||||
public static final Register xmm6 = new Register(22, 6, "xmm6", XMM);
|
||||
public static final Register xmm7 = new Register(23, 7, "xmm7", XMM);
|
||||
public static final Register xmm0 = new Register(32, 0, "xmm0", XMM);
|
||||
public static final Register xmm1 = new Register(33, 1, "xmm1", XMM);
|
||||
public static final Register xmm2 = new Register(34, 2, "xmm2", XMM);
|
||||
public static final Register xmm3 = new Register(35, 3, "xmm3", XMM);
|
||||
public static final Register xmm4 = new Register(36, 4, "xmm4", XMM);
|
||||
public static final Register xmm5 = new Register(37, 5, "xmm5", XMM);
|
||||
public static final Register xmm6 = new Register(38, 6, "xmm6", XMM);
|
||||
public static final Register xmm7 = new Register(39, 7, "xmm7", XMM);
|
||||
|
||||
public static final Register xmm8 = new Register(24, 8, "xmm8", XMM);
|
||||
public static final Register xmm9 = new Register(25, 9, "xmm9", XMM);
|
||||
public static final Register xmm10 = new Register(26, 10, "xmm10", XMM);
|
||||
public static final Register xmm11 = new Register(27, 11, "xmm11", XMM);
|
||||
public static final Register xmm12 = new Register(28, 12, "xmm12", XMM);
|
||||
public static final Register xmm13 = new Register(29, 13, "xmm13", XMM);
|
||||
public static final Register xmm14 = new Register(30, 14, "xmm14", XMM);
|
||||
public static final Register xmm15 = new Register(31, 15, "xmm15", XMM);
|
||||
public static final Register xmm8 = new Register(40, 8, "xmm8", XMM);
|
||||
public static final Register xmm9 = new Register(41, 9, "xmm9", XMM);
|
||||
public static final Register xmm10 = new Register(42, 10, "xmm10", XMM);
|
||||
public static final Register xmm11 = new Register(43, 11, "xmm11", XMM);
|
||||
public static final Register xmm12 = new Register(44, 12, "xmm12", XMM);
|
||||
public static final Register xmm13 = new Register(45, 13, "xmm13", XMM);
|
||||
public static final Register xmm14 = new Register(46, 14, "xmm14", XMM);
|
||||
public static final Register xmm15 = new Register(47, 15, "xmm15", XMM);
|
||||
|
||||
public static final Register xmm16 = new Register(32, 16, "xmm16", XMM);
|
||||
public static final Register xmm17 = new Register(33, 17, "xmm17", XMM);
|
||||
public static final Register xmm18 = new Register(34, 18, "xmm18", XMM);
|
||||
public static final Register xmm19 = new Register(35, 19, "xmm19", XMM);
|
||||
public static final Register xmm20 = new Register(36, 20, "xmm20", XMM);
|
||||
public static final Register xmm21 = new Register(37, 21, "xmm21", XMM);
|
||||
public static final Register xmm22 = new Register(38, 22, "xmm22", XMM);
|
||||
public static final Register xmm23 = new Register(39, 23, "xmm23", XMM);
|
||||
public static final Register xmm16 = new Register(48, 16, "xmm16", XMM);
|
||||
public static final Register xmm17 = new Register(49, 17, "xmm17", XMM);
|
||||
public static final Register xmm18 = new Register(50, 18, "xmm18", XMM);
|
||||
public static final Register xmm19 = new Register(51, 19, "xmm19", XMM);
|
||||
public static final Register xmm20 = new Register(52, 20, "xmm20", XMM);
|
||||
public static final Register xmm21 = new Register(53, 21, "xmm21", XMM);
|
||||
public static final Register xmm22 = new Register(54, 22, "xmm22", XMM);
|
||||
public static final Register xmm23 = new Register(55, 23, "xmm23", XMM);
|
||||
|
||||
public static final Register xmm24 = new Register(40, 24, "xmm24", XMM);
|
||||
public static final Register xmm25 = new Register(41, 25, "xmm25", XMM);
|
||||
public static final Register xmm26 = new Register(42, 26, "xmm26", XMM);
|
||||
public static final Register xmm27 = new Register(43, 27, "xmm27", XMM);
|
||||
public static final Register xmm28 = new Register(44, 28, "xmm28", XMM);
|
||||
public static final Register xmm29 = new Register(45, 29, "xmm29", XMM);
|
||||
public static final Register xmm30 = new Register(46, 30, "xmm30", XMM);
|
||||
public static final Register xmm31 = new Register(47, 31, "xmm31", XMM);
|
||||
public static final Register xmm24 = new Register(56, 24, "xmm24", XMM);
|
||||
public static final Register xmm25 = new Register(57, 25, "xmm25", XMM);
|
||||
public static final Register xmm26 = new Register(58, 26, "xmm26", XMM);
|
||||
public static final Register xmm27 = new Register(59, 27, "xmm27", XMM);
|
||||
public static final Register xmm28 = new Register(60, 28, "xmm28", XMM);
|
||||
public static final Register xmm29 = new Register(61, 29, "xmm29", XMM);
|
||||
public static final Register xmm30 = new Register(62, 30, "xmm30", XMM);
|
||||
public static final Register xmm31 = new Register(63, 31, "xmm31", XMM);
|
||||
|
||||
public static final Register[] xmmRegistersSSE = {
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
|
||||
@ -124,14 +143,14 @@ public class AMD64 extends Architecture {
|
||||
|
||||
public static final RegisterCategory MASK = new RegisterCategory("MASK", false);
|
||||
|
||||
public static final Register k0 = new Register(48, 0, "k0", MASK);
|
||||
public static final Register k1 = new Register(49, 1, "k1", MASK);
|
||||
public static final Register k2 = new Register(50, 2, "k2", MASK);
|
||||
public static final Register k3 = new Register(51, 3, "k3", MASK);
|
||||
public static final Register k4 = new Register(52, 4, "k4", MASK);
|
||||
public static final Register k5 = new Register(53, 5, "k5", MASK);
|
||||
public static final Register k6 = new Register(54, 6, "k6", MASK);
|
||||
public static final Register k7 = new Register(55, 7, "k7", MASK);
|
||||
public static final Register k0 = new Register(64, 0, "k0", MASK);
|
||||
public static final Register k1 = new Register(65, 1, "k1", MASK);
|
||||
public static final Register k2 = new Register(66, 2, "k2", MASK);
|
||||
public static final Register k3 = new Register(67, 3, "k3", MASK);
|
||||
public static final Register k4 = new Register(68, 4, "k4", MASK);
|
||||
public static final Register k5 = new Register(69, 5, "k5", MASK);
|
||||
public static final Register k6 = new Register(70, 6, "k6", MASK);
|
||||
public static final Register k7 = new Register(71, 7, "k7", MASK);
|
||||
|
||||
public static final RegisterArray valueRegistersSSE = new RegisterArray(
|
||||
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
|
||||
@ -143,6 +162,8 @@ public class AMD64 extends Architecture {
|
||||
public static final RegisterArray valueRegistersAVX512 = new RegisterArray(
|
||||
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
|
||||
r8, r9, r10, r11, r12, r13, r14, r15,
|
||||
r16, r17, r18, r19, r20, r21, r22, r23,
|
||||
r24, r25, r26, r27, r28, r29, r30, r31,
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
|
||||
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
|
||||
xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
|
||||
@ -153,7 +174,7 @@ public class AMD64 extends Architecture {
|
||||
/**
|
||||
* Register used to construct an instruction-relative address.
|
||||
*/
|
||||
public static final Register rip = new Register(56, -1, "rip", SPECIAL);
|
||||
public static final Register rip = new Register(72, -1, "rip", SPECIAL);
|
||||
|
||||
public static final RegisterArray allRegisters = new RegisterArray(
|
||||
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
|
||||
|
Loading…
x
Reference in New Issue
Block a user