8329032: C2 compiler register allocation support for APX EGPRs

Reviewed-by: kvn, sviswanathan
This commit is contained in:
Jatin Bhateja 2024-06-20 23:35:15 +00:00
parent 4b4a483b6f
commit e5de26ddf0
26 changed files with 732 additions and 180 deletions

View File

@ -842,7 +842,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case REX2:
NOT_LP64(assert(false, "64bit prefixes"));
if ((0xFF & *ip++) & REXBIT_W) {
if ((0xFF & *ip++) & REX2BIT_W) {
is_64bit = true;
}
goto again_after_prefix;
@ -899,7 +899,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case REX2:
NOT_LP64(assert(false, "64bit prefix found"));
if ((0xFF & *ip++) & REXBIT_W) {
if ((0xFF & *ip++) & REX2BIT_W) {
is_64bit = true;
}
goto again_after_size_prefix2;
@ -4498,7 +4498,7 @@ void Assembler::ud2() {
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs");
assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs as BASE or INDEX of address operand");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@ -5893,6 +5893,71 @@ void Assembler::evpunpckhqdq(XMMRegister dst, KRegister mask, XMMRegister src1,
emit_int16(0x6D, (0xC0 | encode));
}
#ifdef _LP64
void Assembler::push2(Register src1, Register src2, bool with_ppx) {
assert(VM_Version::supports_apx_f(), "requires APX");
InstructionAttr attributes(0, /* rex_w */ with_ppx, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
/* EVEX.BASE */
int src_enc = src1->encoding();
/* EVEX.VVVV */
int nds_enc = src2->encoding();
bool vex_b = (src_enc & 8) == 8;
bool evex_v = (nds_enc >= 16);
bool evex_b = (src_enc >= 16);
// EVEX.ND = 1;
attributes.set_extended_context();
attributes.set_is_evex_instruction();
set_attributes(&attributes);
evex_prefix(0, vex_b, 0, 0, evex_b, evex_v, false /*eevex_x*/, nds_enc, VEX_SIMD_NONE, /* map4 */ VEX_OPCODE_0F_3C);
emit_int16(0xFF, (0xC0 | (0x6 << 3) | (src_enc & 7)));
}
void Assembler::pop2(Register src1, Register src2, bool with_ppx) {
assert(VM_Version::supports_apx_f(), "requires APX");
InstructionAttr attributes(0, /* rex_w */ with_ppx, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
/* EVEX.BASE */
int src_enc = src1->encoding();
/* EVEX.VVVV */
int nds_enc = src2->encoding();
bool vex_b = (src_enc & 8) == 8;
bool evex_v = (nds_enc >= 16);
bool evex_b = (src_enc >= 16);
// EVEX.ND = 1;
attributes.set_extended_context();
attributes.set_is_evex_instruction();
set_attributes(&attributes);
evex_prefix(0, vex_b, 0, 0, evex_b, evex_v, false /*eevex_x*/, nds_enc, VEX_SIMD_NONE, /* map4 */ VEX_OPCODE_0F_3C);
emit_int16(0x8F, (0xC0 | (src_enc & 7)));
}
void Assembler::push2p(Register src1, Register src2) {
push2(src1, src2, true);
}
void Assembler::pop2p(Register src1, Register src2) {
pop2(src1, src2, true);
}
void Assembler::pushp(Register src) {
assert(VM_Version::supports_apx_f(), "requires APX");
int encode = prefixq_and_encode_rex2(src->encoding());
emit_int8(0x50 | encode);
}
void Assembler::popp(Register dst) {
assert(VM_Version::supports_apx_f(), "requires APX");
int encode = prefixq_and_encode_rex2(dst->encoding());
emit_int8((unsigned char)0x58 | encode);
}
#endif //_LP64
void Assembler::push(int32_t imm32) {
// in 64bits we push 64bits onto the stack but only
// take a 32bit immediate
@ -7207,6 +7272,7 @@ void Assembler::vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int ve
void Assembler::vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs as BASE or INDEX of address operand");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@ -11011,6 +11077,7 @@ void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
assert(VM_Version::supports_avx2(), "");
assert(!needs_eevex(src.base()), "does not support extended gprs as BASE of address operand");
assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
assert(dst != xnoreg, "sanity");
assert(src.isxmmindex(),"expected to be xmm index");
@ -11024,6 +11091,7 @@ void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int v
void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
assert(VM_Version::supports_avx2(), "");
assert(!needs_eevex(src.base()), "does not support extended gprs as BASE of address operand");
assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
assert(dst != xnoreg, "sanity");
assert(src.isxmmindex(),"expected to be xmm index");
@ -11037,6 +11105,7 @@ void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int v
void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
assert(VM_Version::supports_avx2(), "");
assert(!needs_eevex(src.base()), "does not support extended gprs as BASE of address operand");
assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
assert(dst != xnoreg, "sanity");
assert(src.isxmmindex(),"expected to be xmm index");
@ -11050,6 +11119,7 @@ void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int v
void Assembler::vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
assert(VM_Version::supports_avx2(), "");
assert(!needs_eevex(src.base()), "does not support extended gprs as BASE of address operand");
assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
assert(dst != xnoreg, "sanity");
assert(src.isxmmindex(),"expected to be xmm index");
@ -11808,7 +11878,6 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
_attributes->get_embedded_opmask_register_specifier() != 0) {
byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
}
emit_int32(EVEX_4bytes, byte2, byte3, byte4);
}
@ -12921,14 +12990,14 @@ void Assembler::emit_data64(jlong data,
int Assembler::get_base_prefix_bits(int enc) {
int bits = 0;
if (enc & 16) bits |= REX2BIT_B4;
if (enc & 8) bits |= REXBIT_B;
if (enc & 8) bits |= REX2BIT_B;
return bits;
}
int Assembler::get_index_prefix_bits(int enc) {
int bits = 0;
if (enc & 16) bits |= REX2BIT_X4;
if (enc & 8) bits |= REXBIT_X;
if (enc & 8) bits |= REX2BIT_X;
return bits;
}
@ -12943,7 +13012,7 @@ int Assembler::get_index_prefix_bits(Register index) {
int Assembler::get_reg_prefix_bits(int enc) {
int bits = 0;
if (enc & 16) bits |= REX2BIT_R4;
if (enc & 8) bits |= REXBIT_R;
if (enc & 8) bits |= REX2BIT_R;
return bits;
}
@ -13181,6 +13250,15 @@ bool Assembler::prefix_is_rex2(int prefix) {
return (prefix & 0xFF00) == WREX2;
}
int Assembler::get_prefixq_rex2(Address adr, bool is_map1) {
assert(UseAPX, "APX features not enabled");
int bits = REX2BIT_W;
if (is_map1) bits |= REX2BIT_M0;
bits |= get_base_prefix_bits(adr.base());
bits |= get_index_prefix_bits(adr.index());
return WREX2 | bits;
}
int Assembler::get_prefixq(Address adr, bool is_map1) {
if (adr.base_needs_rex2() || adr.index_needs_rex2()) {
return get_prefixq_rex2(adr, is_map1);
@ -13190,15 +13268,6 @@ int Assembler::get_prefixq(Address adr, bool is_map1) {
return is_map1 ? (((int16_t)prfx) << 8) | 0x0F : (int16_t)prfx;
}
int Assembler::get_prefixq_rex2(Address adr, bool is_map1) {
assert(UseAPX, "APX features not enabled");
int bits = REXBIT_W;
if (is_map1) bits |= REX2BIT_M0;
bits |= get_base_prefix_bits(adr.base());
bits |= get_index_prefix_bits(adr.index());
return WREX2 | bits;
}
int Assembler::get_prefixq(Address adr, Register src, bool is_map1) {
if (adr.base_needs_rex2() || adr.index_needs_rex2() || src->encoding() >= 16) {
return get_prefixq_rex2(adr, src, is_map1);
@ -13243,7 +13312,7 @@ int Assembler::get_prefixq(Address adr, Register src, bool is_map1) {
int Assembler::get_prefixq_rex2(Address adr, Register src, bool is_map1) {
assert(UseAPX, "APX features not enabled");
int bits = REXBIT_W;
int bits = REX2BIT_W;
if (is_map1) bits |= REX2BIT_M0;
bits |= get_base_prefix_bits(adr.base());
bits |= get_index_prefix_bits(adr.index());
@ -13306,7 +13375,7 @@ void Assembler::prefixq(Address adr, XMMRegister src) {
}
void Assembler::prefixq_rex2(Address adr, XMMRegister src) {
int bits = REXBIT_W;
int bits = REX2BIT_W;
bits |= get_base_prefix_bits(adr.base());
bits |= get_index_prefix_bits(adr.index());
bits |= get_reg_prefix_bits(src->encoding());
@ -13329,7 +13398,7 @@ int Assembler::prefixq_and_encode(int reg_enc, bool is_map1) {
int Assembler::prefixq_and_encode_rex2(int reg_enc, bool is_map1) {
prefix16(WREX2 | REXBIT_W | (is_map1 ? REX2BIT_M0: 0) | get_base_prefix_bits(reg_enc));
prefix16(WREX2 | REX2BIT_W | (is_map1 ? REX2BIT_M0: 0) | get_base_prefix_bits(reg_enc));
return reg_enc & 0x7;
}
@ -13358,7 +13427,7 @@ int Assembler::prefixq_and_encode(int dst_enc, int src_enc, bool is_map1) {
}
int Assembler::prefixq_and_encode_rex2(int dst_enc, int src_enc, bool is_map1) {
int init_bits = REXBIT_W | (is_map1 ? REX2BIT_M0 : 0);
int init_bits = REX2BIT_W | (is_map1 ? REX2BIT_M0 : 0);
return prefix_and_encode_rex2(dst_enc, src_enc, init_bits);
}
@ -14168,7 +14237,7 @@ void Assembler::precompute_instructions() {
ResourceMark rm;
// Make a temporary buffer big enough for the routines we're capturing
int size = 256;
int size = UseAPX ? 512 : 256;
char* tmp_code = NEW_RESOURCE_ARRAY(char, size);
CodeBuffer buffer((address)tmp_code, size);
MacroAssembler masm(&buffer);
@ -14212,11 +14281,97 @@ static void emit_copy(CodeSection* code_section, u_char* src, int src_len) {
code_section->set_end(end + src_len);
}
// Does not actually store the value of rsp on the stack.
// The slot for rsp just contains an arbitrary value.
void Assembler::pusha() { // 64bit
emit_copy(code_section(), pusha_code, pusha_len);
}
// Does not actually store the value of rsp on the stack.
// The slot for rsp just contains an arbitrary value.
void Assembler::pusha_uncached() { // 64bit
if (UseAPX) {
// Data being pushed by PUSH2 must be 16B-aligned on the stack, for this push rax upfront
// and use it as a temporary register for stack alignment.
pushp(rax);
// Move original stack pointer to RAX and align stack pointer to 16B boundary.
movq(rax, rsp);
andq(rsp, -(StackAlignmentInBytes));
// Push pair of original stack pointer along with remaining registers
// at 16B aligned boundary.
push2p(rax, r31);
push2p(r30, r29);
push2p(r28, r27);
push2p(r26, r25);
push2p(r24, r23);
push2p(r22, r21);
push2p(r20, r19);
push2p(r18, r17);
push2p(r16, r15);
push2p(r14, r13);
push2p(r12, r11);
push2p(r10, r9);
push2p(r8, rdi);
push2p(rsi, rbp);
push2p(rbx, rdx);
// To maintain 16 byte alignment after rcx is pushed.
subq(rsp, 8);
pushp(rcx);
} else {
subq(rsp, 16 * wordSize);
movq(Address(rsp, 15 * wordSize), rax);
movq(Address(rsp, 14 * wordSize), rcx);
movq(Address(rsp, 13 * wordSize), rdx);
movq(Address(rsp, 12 * wordSize), rbx);
// Skip rsp as the value is normally not used. There are a few places where
// the original value of rsp needs to be known but that can be computed
// from the value of rsp immediately after pusha (rsp + 16 * wordSize).
// FIXME: For APX any such direct access should also consider EGPR size
// during address compution.
movq(Address(rsp, 10 * wordSize), rbp);
movq(Address(rsp, 9 * wordSize), rsi);
movq(Address(rsp, 8 * wordSize), rdi);
movq(Address(rsp, 7 * wordSize), r8);
movq(Address(rsp, 6 * wordSize), r9);
movq(Address(rsp, 5 * wordSize), r10);
movq(Address(rsp, 4 * wordSize), r11);
movq(Address(rsp, 3 * wordSize), r12);
movq(Address(rsp, 2 * wordSize), r13);
movq(Address(rsp, wordSize), r14);
movq(Address(rsp, 0), r15);
}
}
void Assembler::popa() { // 64bit
emit_copy(code_section(), popa_code, popa_len);
}
void Assembler::popa_uncached() { // 64bit
if (UseAPX) {
popp(rcx);
addq(rsp, 8);
// Data being popped by POP2 must be 16B-aligned on the stack.
pop2p(rdx, rbx);
pop2p(rbp, rsi);
pop2p(rdi, r8);
pop2p(r9, r10);
pop2p(r11, r12);
pop2p(r13, r14);
pop2p(r15, r16);
pop2p(r17, r18);
pop2p(r19, r20);
pop2p(r21, r22);
pop2p(r23, r24);
pop2p(r25, r26);
pop2p(r27, r28);
pop2p(r29, r30);
// Popped value in RAX holds original unaligned stack pointer.
pop2p(r31, rax);
// Reinstantiate original stack pointer.
movq(rsp, rax);
popp(rax);
} else {
movq(r15, Address(rsp, 0));
movq(r14, Address(rsp, wordSize));
movq(r13, Address(rsp, 2 * wordSize));
@ -14236,37 +14391,7 @@ void Assembler::popa_uncached() { // 64bit
movq(rax, Address(rsp, 15 * wordSize));
addq(rsp, 16 * wordSize);
}
// Does not actually store the value of rsp on the stack.
// The slot for rsp just contains an arbitrary value.
void Assembler::pusha() { // 64bit
emit_copy(code_section(), pusha_code, pusha_len);
}
// Does not actually store the value of rsp on the stack.
// The slot for rsp just contains an arbitrary value.
void Assembler::pusha_uncached() { // 64bit
subq(rsp, 16 * wordSize);
movq(Address(rsp, 15 * wordSize), rax);
movq(Address(rsp, 14 * wordSize), rcx);
movq(Address(rsp, 13 * wordSize), rdx);
movq(Address(rsp, 12 * wordSize), rbx);
// Skip rsp as the value is normally not used. There are a few places where
// the original value of rsp needs to be known but that can be computed
// from the value of rsp immediately after pusha (rsp + 16 * wordSize).
movq(Address(rsp, 10 * wordSize), rbp);
movq(Address(rsp, 9 * wordSize), rsi);
movq(Address(rsp, 8 * wordSize), rdi);
movq(Address(rsp, 7 * wordSize), r8);
movq(Address(rsp, 6 * wordSize), r9);
movq(Address(rsp, 5 * wordSize), r10);
movq(Address(rsp, 4 * wordSize), r11);
movq(Address(rsp, 3 * wordSize), r12);
movq(Address(rsp, 2 * wordSize), r13);
movq(Address(rsp, wordSize), r14);
movq(Address(rsp, 0), r15);
}
}
void Assembler::vzeroupper() {

View File

@ -530,14 +530,16 @@ class Assembler : public AbstractAssembler {
};
enum PrefixBits {
REXBIT_B = 0x01,
REXBIT_X = 0x02,
REXBIT_R = 0x04,
REXBIT_W = 0x08,
REX2BIT_B = 0x01,
REX2BIT_X = 0x02,
REX2BIT_R = 0x04,
REX2BIT_W = 0x08,
REX2BIT_B4 = 0x10,
REX2BIT_X4 = 0x20,
REX2BIT_R4 = 0x40,
REX2BIT_M0 = 0x80
REX2BIT_M0 = 0x80,
REX2BIT_WB = 0x09,
REX2BIT_WB4 = 0x18,
};
enum VexPrefix {
@ -1017,6 +1019,15 @@ private:
void pusha_uncached();
void popa_uncached();
// APX ISA extensions for register save/restore optimizations.
void push2(Register src1, Register src2, bool with_ppx = false);
void pop2(Register src1, Register src2, bool with_ppx = false);
void push2p(Register src1, Register src2);
void pop2p(Register src1, Register src2);
void pushp(Register src);
void popp(Register src);
#endif
void vzeroupper_uncached();
void decq(Register dst);
@ -3070,7 +3081,6 @@ public:
}
void set_extended_context(void) { _is_extended_context = true; }
};
#endif // CPU_X86_ASSEMBLER_X86_HPP

View File

@ -39,7 +39,7 @@ enum {
// registers
enum {
pd_nof_cpu_regs_frame_map = Register::number_of_registers, // number of registers used during code emission
pd_nof_cpu_regs_frame_map = NOT_LP64(8) LP64_ONLY(16), // number of registers used during code emission
pd_nof_fpu_regs_frame_map = FloatRegister::number_of_registers, // number of registers used during code emission
pd_nof_xmm_regs_frame_map = XMMRegister::number_of_registers, // number of registers used during code emission

View File

@ -2836,7 +2836,7 @@ void LIR_Assembler::align_call(LIR_Code code) {
offset += NativeCall::displacement_offset;
break;
case lir_icvirtual_call:
offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size_rex;
break;
default: ShouldNotReachHere();
}
@ -2873,7 +2873,7 @@ void LIR_Assembler::emit_static_call_stub() {
int start = __ offset();
// make sure that the displacement word of the call ends up word aligned
__ align(BytesPerWord, __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset);
__ align(BytesPerWord, __ offset() + NativeMovConstReg::instruction_size_rex + NativeCall::displacement_offset);
__ relocate(static_stub_Relocation::spec(call_pc));
__ mov_metadata(rbx, (Metadata*)nullptr);
// must be set to -1 at code generation time

View File

@ -420,7 +420,12 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers) {
__ block_comment("save_live_registers");
__ pusha(); // integer registers
// Push CPU state in multiple of 16 bytes
#ifdef _LP64
__ save_legacy_gprs();
#else
__ pusha();
#endif
// assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
// assert(xmm_regs_as_doubles_off % 2 == 0, "misaligned offset");
@ -560,7 +565,12 @@ void C1_MacroAssembler::restore_live_registers(bool restore_fpu_registers) {
__ block_comment("restore_live_registers");
restore_fpu(this, restore_fpu_registers);
#ifdef _LP64
__ restore_legacy_gprs();
#else
__ popa();
#endif
}

View File

@ -583,6 +583,25 @@ void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
if (UseAPX) {
caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
}
int gp_spill_size = 0;
int opmask_spill_size = 0;
int xmm_spill_size = 0;

View File

@ -484,6 +484,25 @@ private:
caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
caller_saved.Remove(OptoReg::as_OptoReg(stub->ref()->as_VMReg()));
if (UseAPX) {
caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
}
// Create mask of live registers
RegMask live = stub->live();
if (stub->tmp() != noreg) {

View File

@ -115,7 +115,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
"Highest supported AVX instructions set on x86/x64") \
range(0, 3) \
\
\
product(bool, UseAPX, false, EXPERIMENTAL, \
"Use Intel Advanced Performance Extensions") \
\
@ -192,7 +191,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
product(bool, IntelJccErratumMitigation, true, DIAGNOSTIC, \
"Turn off JVM mitigations related to Intel micro code " \
"mitigations for the Intel JCC erratum") \
\
// end of ARCH_FLAGS
#endif // CPU_X86_GLOBALS_X86_HPP

View File

@ -49,12 +49,17 @@ jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMC
return (pc_offset + NativeCall::instruction_size);
} else if (inst->is_mov_literal64()) {
// mov+call instruction pair
jint offset = pc_offset + NativeMovConstReg::instruction_size;
jint offset = pc_offset + ((NativeMovConstReg*)inst)->instruction_size();
u_char* call = (u_char*) (_instructions->start() + offset);
if (call[0] == Assembler::REX_B) {
offset += 1; /* prefix byte for extended register R8-R15 */
call++;
}
if (call[0] == Assembler::REX2) {
offset += 2; /* prefix byte for APX extended GPR register R16-R31 */
call+=2;
}
// Register indirect call.
assert(call[0] == 0xFF, "expected call");
offset += 2; /* opcode byte + modrm byte */
return (offset);

View File

@ -4087,6 +4087,11 @@ RegSet MacroAssembler::call_clobbered_gp_registers() {
regs += RegSet::range(r8, r11);
#else
regs += RegSet::of(rax, rcx, rdx);
#endif
#ifdef _LP64
if (UseAPX) {
regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1));
}
#endif
return regs;
}
@ -10379,3 +10384,45 @@ void MacroAssembler::lightweight_unlock(Register obj, Register reg_rax, Register
bind(unlocked);
}
#ifdef _LP64
// Saves legacy GPRs state on stack.
void MacroAssembler::save_legacy_gprs() {
subq(rsp, 16 * wordSize);
movq(Address(rsp, 15 * wordSize), rax);
movq(Address(rsp, 14 * wordSize), rcx);
movq(Address(rsp, 13 * wordSize), rdx);
movq(Address(rsp, 12 * wordSize), rbx);
movq(Address(rsp, 10 * wordSize), rbp);
movq(Address(rsp, 9 * wordSize), rsi);
movq(Address(rsp, 8 * wordSize), rdi);
movq(Address(rsp, 7 * wordSize), r8);
movq(Address(rsp, 6 * wordSize), r9);
movq(Address(rsp, 5 * wordSize), r10);
movq(Address(rsp, 4 * wordSize), r11);
movq(Address(rsp, 3 * wordSize), r12);
movq(Address(rsp, 2 * wordSize), r13);
movq(Address(rsp, wordSize), r14);
movq(Address(rsp, 0), r15);
}
// Resotres back legacy GPRs state from stack.
void MacroAssembler::restore_legacy_gprs() {
movq(r15, Address(rsp, 0));
movq(r14, Address(rsp, wordSize));
movq(r13, Address(rsp, 2 * wordSize));
movq(r12, Address(rsp, 3 * wordSize));
movq(r11, Address(rsp, 4 * wordSize));
movq(r10, Address(rsp, 5 * wordSize));
movq(r9, Address(rsp, 6 * wordSize));
movq(r8, Address(rsp, 7 * wordSize));
movq(rdi, Address(rsp, 8 * wordSize));
movq(rsi, Address(rsp, 9 * wordSize));
movq(rbp, Address(rsp, 10 * wordSize));
movq(rbx, Address(rsp, 12 * wordSize));
movq(rdx, Address(rsp, 13 * wordSize));
movq(rcx, Address(rsp, 14 * wordSize));
movq(rax, Address(rsp, 15 * wordSize));
addq(rsp, 16 * wordSize);
}
#endif

View File

@ -2150,6 +2150,11 @@ public:
void lightweight_lock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow);
void lightweight_unlock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow);
#ifdef _LP64
void save_legacy_gprs();
void restore_legacy_gprs();
#endif
};
/**

View File

@ -536,10 +536,11 @@ void trace_method_handle_stub(const char* adaptername,
Register r = as_Register(i);
// The registers are stored in reverse order on the stack (by pusha).
#ifdef AMD64
assert(Register::number_of_registers == 16, "sanity");
int num_regs = UseAPX ? 32 : 16;
assert(Register::available_gp_registers() == num_regs, "sanity");
if (r == rsp) {
// rsp is actually not stored by pusha(), compute the old rsp from saved_regs (rsp after pusha): saved_regs + 16 = old rsp
ls.print("%3s=" PTR_FORMAT, r->name(), (intptr_t)(&saved_regs[16]));
ls.print("%3s=" PTR_FORMAT, r->name(), (intptr_t)(&saved_regs[num_regs]));
} else {
ls.print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
}

View File

@ -160,8 +160,13 @@ void NativeCall::set_destination_mt_safe(address dest) {
void NativeMovConstReg::verify() {
#ifdef AMD64
// make sure code pattern is actually a mov reg64, imm64 instruction
if ((ubyte_at(0) != Assembler::REX_W && ubyte_at(0) != Assembler::REX_WB) ||
(ubyte_at(1) & (0xff ^ register_mask)) != 0xB8) {
bool valid_rex_prefix = ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB;
bool valid_rex2_prefix = ubyte_at(0) == Assembler::REX2 &&
(ubyte_at(1) == Assembler::REX2BIT_W ||
ubyte_at(1) == Assembler::REX2BIT_WB ||
ubyte_at(1) == Assembler::REX2BIT_WB4);
int opcode = has_rex2_prefix() ? ubyte_at(2) : ubyte_at(1);
if ((!valid_rex_prefix || !valid_rex2_prefix) && (opcode & (0xff ^ register_mask)) != 0xB8) {
print();
fatal("not a REX.W[B] mov reg64, imm64");
}
@ -208,6 +213,11 @@ int NativeMovRegMem::instruction_start() const {
instr_0 = ubyte_at(off);
}
if (instr_0 == instruction_REX2_prefix) {
off+=2;
instr_0 = ubyte_at(off);
}
if (instr_0 == instruction_code_xor) {
off += 2;
instr_0 = ubyte_at(off);
@ -226,29 +236,39 @@ int NativeMovRegMem::instruction_start() const {
instr_0 = ubyte_at(off);
}
if (instr_0 == instruction_REX2_prefix) {
off+=2;
instr_0 = ubyte_at(off);
}
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
off++;
instr_0 = ubyte_at(off);
}
// Extended prefixes can only follow REX prefixes,
// REX2 is directly followed by main opcode.
if (instr_0 == instruction_extended_prefix ) { // 0x0f
off++;
}
// Offset of instruction opcode.
return off;
}
// Format [REX/REX2] [OPCODE] [ModRM] [SIB] [IMM/DISP32]
int NativeMovRegMem::patch_offset() const {
int off = data_offset + instruction_start();
u_char mod_rm = *(u_char*)(instruction_address() + 1);
// nnnn(r12|rsp) isn't coded as simple mod/rm since that is
// the encoding to use an SIB byte. Which will have the nnnn
// field off by one byte
// ModRM Byte Format = Mod[2] REG[3] RM[3]
if ((mod_rm & 7) == 0x4) {
off++;
}
// Displacement offset.
return off;
}
@ -294,12 +314,6 @@ void NativeMovRegMem::print() {
void NativeLoadAddress::verify() {
// make sure code pattern is actually a mov [reg+offset], reg instruction
u_char test_byte = *(u_char*)instruction_address();
#ifdef _LP64
if ( (test_byte == instruction_prefix_wide ||
test_byte == instruction_prefix_wide_extended) ) {
test_byte = *(u_char*)(instruction_address() + 1);
}
#endif // _LP64
if ( ! ((test_byte == lea_instruction_code)
LP64_ONLY(|| (test_byte == mov64_instruction_code) ))) {
fatal ("not a lea reg, [reg+offs] instruction");

View File

@ -90,6 +90,7 @@ class NativeInstruction {
void wrote(int offset);
public:
bool has_rex2_prefix() const { return ubyte_at(0) == Assembler::REX2; }
inline friend NativeInstruction* nativeInstruction_at(address address);
};
@ -178,19 +179,28 @@ inline NativeCall* nativeCall_before(address return_address) {
return call;
}
// Call with target address in a general purpose register(indirect absolute addressing).
// Encoding : FF /2 CALL r/m32
// Primary Opcode: FF
// Opcode Extension(part of ModRM.REG): /2
// Operand ModRM.RM = r/m32
class NativeCallReg: public NativeInstruction {
public:
enum Intel_specific_constants {
instruction_code = 0xFF,
instruction_offset = 0,
return_address_offset_norex = 2,
return_address_offset_rex = 3
return_address_offset_rex = 3,
return_address_offset_rex2 = 4
};
int next_instruction_offset() const {
if (ubyte_at(0) == NativeCallReg::instruction_code) {
return return_address_offset_norex;
} else if (has_rex2_prefix()) {
return return_address_offset_rex2;
} else {
assert((ubyte_at(0) & 0xF0) == Assembler::REX, "");
return return_address_offset_rex;
}
}
@ -198,28 +208,38 @@ class NativeCallReg: public NativeInstruction {
// An interface for accessing/manipulating native mov reg, imm32 instructions.
// (used to manipulate inlined 32bit data dll calls, etc.)
// Instruction format for implied addressing mode immediate operand move to register instruction:
// [REX/REX2] [OPCODE] [IMM32]
class NativeMovConstReg: public NativeInstruction {
#ifdef AMD64
static const bool has_rex = true;
static const int rex_size = 1;
static const int rex2_size = 2;
#else
static const bool has_rex = false;
static const int rex_size = 0;
static const int rex2_size = 0;
#endif // AMD64
public:
enum Intel_specific_constants {
instruction_code = 0xB8,
instruction_size = 1 + rex_size + wordSize,
instruction_offset = 0,
data_offset = 1 + rex_size,
next_instruction_offset = instruction_size,
instruction_size_rex = 1 + rex_size + wordSize,
instruction_size_rex2 = 1 + rex2_size + wordSize,
data_offset_rex = 1 + rex_size,
data_offset_rex2 = 1 + rex2_size,
next_instruction_offset_rex = instruction_size_rex,
next_instruction_offset_rex2 = instruction_size_rex2,
register_mask = 0x07
};
int instruction_size() const { return has_rex2_prefix() ? instruction_size_rex2 : instruction_size_rex; }
int next_inst_offset() const { return has_rex2_prefix() ? next_instruction_offset_rex2 : next_instruction_offset_rex; }
int data_byte_offset() const { return has_rex2_prefix() ? data_offset_rex2 : data_offset_rex;}
address instruction_address() const { return addr_at(instruction_offset); }
address next_instruction_address() const { return addr_at(next_instruction_offset); }
intptr_t data() const { return ptr_at(data_offset); }
void set_data(intptr_t x) { set_ptr_at(data_offset, x); }
address next_instruction_address() const { return addr_at(next_inst_offset()); }
intptr_t data() const { return ptr_at(data_byte_offset()); }
void set_data(intptr_t x) { set_ptr_at(data_byte_offset(), x); }
void verify();
void print();
@ -238,7 +258,10 @@ inline NativeMovConstReg* nativeMovConstReg_at(address address) {
}
inline NativeMovConstReg* nativeMovConstReg_before(address address) {
NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
int instruction_size = ((NativeInstruction*)(address))->has_rex2_prefix() ?
NativeMovConstReg::instruction_size_rex2 :
NativeMovConstReg::instruction_size_rex;
NativeMovConstReg* test = (NativeMovConstReg*)(address - instruction_size - NativeMovConstReg::instruction_offset);
#ifdef ASSERT
test->verify();
#endif
@ -279,35 +302,47 @@ class NativeMovRegMem: public NativeInstruction {
instruction_prefix_wide_hi = Assembler::REX_WRXB,
instruction_code_xor = 0x33,
instruction_extended_prefix = 0x0F,
// Legacy encoding MAP1 instructions promotable to REX2 encoding.
instruction_code_mem2reg_movslq = 0x63,
instruction_code_mem2reg_movzxb = 0xB6,
instruction_code_mem2reg_movsxb = 0xBE,
instruction_code_mem2reg_movzxw = 0xB7,
instruction_code_mem2reg_movsxw = 0xBF,
instruction_operandsize_prefix = 0x66,
// Legacy encoding MAP0 instructions promotable to REX2 encoding.
instruction_code_reg2mem = 0x89,
instruction_code_mem2reg = 0x8b,
instruction_code_reg2memb = 0x88,
instruction_code_mem2regb = 0x8a,
instruction_code_lea = 0x8d,
instruction_code_float_s = 0xd9,
instruction_code_float_d = 0xdd,
instruction_code_long_volatile = 0xdf,
// VEX/EVEX/Legacy encodeded MAP1 instructions promotable to REX2 encoding.
instruction_code_xmm_ss_prefix = 0xf3,
instruction_code_xmm_sd_prefix = 0xf2,
instruction_code_xmm_code = 0x0f,
// Address operand load/store/ldp are promotable to REX2 to accomodate
// extended SIB encoding.
instruction_code_xmm_load = 0x10,
instruction_code_xmm_store = 0x11,
instruction_code_xmm_lpd = 0x12,
instruction_code_lea = 0x8d,
instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes,
instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes,
instruction_EVEX_prefix_4bytes = Assembler::EVEX_4bytes,
instruction_REX2_prefix = Assembler::REX2,
instruction_offset = 0,
data_offset = 2,
next_instruction_offset = 4
next_instruction_offset_rex = 4,
next_instruction_offset_rex2 = 5
};
// helper
@ -438,7 +473,8 @@ inline NativeJump* nativeJump_at(address address) {
return jump;
}
// Handles all kinds of jump on Intel. Long/far, conditional/unconditional
// Handles all kinds of jump on Intel. Long/far, conditional/unconditional with relative offsets
// barring register indirect jumps.
class NativeGeneralJump: public NativeInstruction {
public:
enum Intel_specific_constants {
@ -538,7 +574,7 @@ inline bool NativeInstruction::is_cond_jump() { return (int_at(0) & 0xF0FF) =
inline bool NativeInstruction::is_safepoint_poll() {
#ifdef AMD64
const bool has_rex_prefix = ubyte_at(0) == NativeTstRegMem::instruction_rex_b_prefix;
const int test_offset = has_rex_prefix ? 1 : 0;
const int test_offset = has_rex2_prefix() ? 2 : (has_rex_prefix ? 1 : 0);
#else
const int test_offset = 0;
#endif
@ -549,8 +585,14 @@ inline bool NativeInstruction::is_safepoint_poll() {
inline bool NativeInstruction::is_mov_literal64() {
#ifdef AMD64
return ((ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB) &&
(ubyte_at(1) & (0xff ^ NativeMovConstReg::register_mask)) == 0xB8);
bool valid_rex_prefix = ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB;
bool valid_rex2_prefix = ubyte_at(0) == Assembler::REX2 &&
(ubyte_at(1) == Assembler::REX2BIT_W ||
ubyte_at(1) == Assembler::REX2BIT_WB ||
ubyte_at(1) == Assembler::REX2BIT_WB4);
int opcode = has_rex2_prefix() ? ubyte_at(2) : ubyte_at(1);
return ((valid_rex_prefix || valid_rex2_prefix) && (opcode & (0xff ^ NativeMovConstReg::register_mask)) == 0xB8);
#else
return false;
#endif // AMD64

View File

@ -35,7 +35,9 @@ const char * Register::RegisterImpl::name() const {
static const char *const names[number_of_registers] = {
#ifdef _LP64
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
#else
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"
#endif // _LP64

View File

@ -45,8 +45,8 @@ public:
inline friend constexpr Register as_Register(int encoding);
enum {
number_of_registers = LP64_ONLY( 16 ) NOT_LP64( 8 ),
number_of_byte_registers = LP64_ONLY( 16 ) NOT_LP64( 4 ),
number_of_registers = LP64_ONLY( 32 ) NOT_LP64( 8 ),
number_of_byte_registers = LP64_ONLY( 32 ) NOT_LP64( 4 ),
max_slots_per_register = LP64_ONLY( 2 ) NOT_LP64( 1 )
};
@ -76,6 +76,16 @@ public:
int operator!=(const Register r) const { return _encoding != r._encoding; }
constexpr const RegisterImpl* operator->() const { return RegisterImpl::first() + _encoding; }
// Actually available GP registers for use, depending on actual CPU capabilities and flags.
static int available_gp_registers() {
#ifdef _LP64
if (!UseAPX) {
return number_of_registers / 2;
}
#endif // _LP64
return number_of_registers;
}
};
extern const Register::RegisterImpl all_RegisterImpls[Register::number_of_registers + 1] INTERNAL_VISIBILITY;
@ -115,6 +125,22 @@ constexpr Register r12 = as_Register(12);
constexpr Register r13 = as_Register(13);
constexpr Register r14 = as_Register(14);
constexpr Register r15 = as_Register(15);
constexpr Register r16 = as_Register(16);
constexpr Register r17 = as_Register(17);
constexpr Register r18 = as_Register(18);
constexpr Register r19 = as_Register(19);
constexpr Register r20 = as_Register(20);
constexpr Register r21 = as_Register(21);
constexpr Register r22 = as_Register(22);
constexpr Register r23 = as_Register(23);
constexpr Register r24 = as_Register(24);
constexpr Register r25 = as_Register(25);
constexpr Register r26 = as_Register(26);
constexpr Register r27 = as_Register(27);
constexpr Register r28 = as_Register(28);
constexpr Register r29 = as_Register(29);
constexpr Register r30 = as_Register(30);
constexpr Register r31 = as_Register(31);
#endif // _LP64

View File

@ -95,6 +95,7 @@ class RegisterSaver {
// units because compiler frame slots are jints.
#define XSAVE_AREA_BEGIN 160
#define XSAVE_AREA_YMM_BEGIN 576
#define XSAVE_AREA_EGPRS 960
#define XSAVE_AREA_OPMASK_BEGIN 1088
#define XSAVE_AREA_ZMM_BEGIN 1152
#define XSAVE_AREA_UPPERBANK 1664
@ -113,6 +114,23 @@ class RegisterSaver {
DEF_YMM_OFFS(0),
DEF_YMM_OFFS(1),
// 2..15 are implied in range usage
r31_off = xmm_off + (XSAVE_AREA_EGPRS - XSAVE_AREA_BEGIN)/BytesPerInt,
r31H_off,
r30_off, r30H_off,
r29_off, r29H_off,
r28_off, r28H_off,
r27_off, r27H_off,
r26_off, r26H_off,
r25_off, r25H_off,
r24_off, r24H_off,
r23_off, r23H_off,
r22_off, r22H_off,
r21_off, r21H_off,
r20_off, r20H_off,
r19_off, r19H_off,
r18_off, r18H_off,
r17_off, r17H_off,
r16_off, r16H_off,
opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
DEF_OPMASK_OFFS(0),
DEF_OPMASK_OFFS(1),
@ -199,7 +217,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// to be under the return like a normal enter.
__ enter(); // rsp becomes 16-byte aligned here
__ push_CPU_state(); // Push a multiple of 16 bytes
__ pushf();
// Make sure rsp stays 16-byte aligned
__ subq(rsp, 8);
// Push CPU state in multiple of 16 bytes
__ save_legacy_gprs();
__ push_FPU_state();
// push cpu state handles this on EVEX enabled targets
if (save_wide_vectors) {
@ -247,6 +271,17 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
#endif
}
}
#if COMPILER2_OR_JVMCI
if (UseAPX) {
int base_addr = XSAVE_AREA_EGPRS;
off = 0;
for(int n = 16; n < Register::number_of_registers; n++) {
__ movq(Address(rsp, base_addr+(off++*8)), as_Register(n));
}
}
#endif
__ vzeroupper();
if (frame::arg_reg_save_area_bytes != 0) {
// Allocate argument register save area
@ -279,6 +314,25 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
if (UseAPX) {
map->set_callee_saved(STACK_OFFSET( r16_off ), r16->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r17_off ), r17->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r18_off ), r18->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r19_off ), r19->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r20_off ), r20->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r21_off ), r21->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r22_off ), r22->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r23_off ), r23->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r24_off ), r24->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r25_off ), r25->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r26_off ), r26->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r27_off ), r27->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r28_off ), r28->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r29_off ), r29->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r30_off ), r30->as_VMReg());
map->set_callee_saved(STACK_OFFSET( r31_off ), r31->as_VMReg());
}
// For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
// on EVEX enabled targets, we get it included in the xsave area
off = xmm0_off;
@ -339,6 +393,24 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
if (UseAPX) {
map->set_callee_saved(STACK_OFFSET( r16H_off ), r16->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r17H_off ), r17->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r18H_off ), r18->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r19H_off ), r19->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r20H_off ), r20->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r21H_off ), r21->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r22H_off ), r22->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r23H_off ), r23->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r24H_off ), r24->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r25H_off ), r25->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r26H_off ), r26->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r27H_off ), r27->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r28H_off ), r28->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r29H_off ), r29->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r30H_off ), r30->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( r31H_off ), r31->as_VMReg()->next());
}
// For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
// on EVEX enabled targets, we get it included in the xsave area
off = xmm0H_off;
@ -428,8 +500,21 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wi
}
}
#if COMPILER2_OR_JVMCI
if (UseAPX) {
int base_addr = XSAVE_AREA_EGPRS;
int off = 0;
for (int n = 16; n < Register::number_of_registers; n++) {
__ movq(as_Register(n), Address(rsp, base_addr+(off++*8)));
}
}
#endif
// Recover CPU state
__ pop_CPU_state();
__ pop_FPU_state();
__ restore_legacy_gprs();
__ addq(rsp, 8);
__ popf();
// Get the rbp described implicitly by the calling convention (no oopMap)
__ pop(rbp);
}
@ -2543,6 +2628,9 @@ void SharedRuntime::generate_deopt_blob() {
if (UseAVX > 2) {
pad += 1024;
}
if (UseAPX) {
pad += 1024;
}
#if INCLUDE_JVMCI
if (EnableJVMCI) {
pad += 512; // Increase the buffer size when compiling for JVMCI
@ -3091,7 +3179,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
OopMap* map;
// Allocate space for the code. Setup code generation tools.
CodeBuffer buffer("handler_blob", 2048, 1024);
CodeBuffer buffer("handler_blob", 2348, 1024);
MacroAssembler* masm = new MacroAssembler(&buffer);
address start = __ pc();
@ -3247,7 +3335,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
// allocate space for the code
ResourceMark rm;
CodeBuffer buffer(name, 1200, 512);
CodeBuffer buffer(name, 1552, 512);
MacroAssembler* masm = new MacroAssembler(&buffer);
int frame_size_in_words;

View File

@ -40,13 +40,17 @@
#define __ _masm->
static bool is_valid_XMM(XMMRegister reg) {
return reg->is_valid() && (UseAVX >= 3 || (reg->encoding() < 16)); // why is this not covered by is_valid()?
return reg->is_valid() && (reg->encoding() < (UseAVX >= 3 ? 32 : 16)); // why is this not covered by is_valid()?
}
static bool is_valid_gp(Register reg) {
return reg->is_valid() && (reg->encoding() < (UseAPX ? 32 : 16));
}
// for callee saved regs, according to the caller's ABI
static int compute_reg_save_area_size(const ABIDescriptor& abi) {
int size = 0;
for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
for (Register reg = as_Register(0); is_valid_gp(reg); reg = reg->successor()) {
if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
if (!abi.is_volatile_reg(reg)) {
size += 8; // bytes
@ -84,7 +88,7 @@ static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDesc
int offset = reg_save_area_offset;
__ block_comment("{ preserve_callee_saved_regs ");
for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
for (Register reg = as_Register(0); is_valid_gp(reg); reg = reg->successor()) {
if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
if (!abi.is_volatile_reg(reg)) {
__ movptr(Address(rsp, offset), reg);
@ -134,7 +138,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
int offset = reg_save_area_offset;
__ block_comment("{ restore_callee_saved_regs ");
for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
for (Register reg = as_Register(0); is_valid_gp(reg); reg = reg->successor()) {
if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
if (!abi.is_volatile_reg(reg)) {
__ movptr(reg, Address(rsp, offset));

View File

@ -108,6 +108,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
#if defined(_LP64)
address clear_apx_test_state() {
# define __ _masm->
address start = __ pc();
@ -115,7 +116,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
// handling guarantees that preserved register values post signal handling were
// re-instantiated by operating system and not because they were not modified externally.
/* FIXME Uncomment following code after OS enablement of
bool save_apx = UseAPX;
VM_Version::set_apx_cpuFeatures();
UseAPX = true;
@ -124,10 +124,10 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ mov64(r31, 0L);
UseAPX = save_apx;
VM_Version::clean_cpuFeatures();
*/
__ ret(0);
return start;
}
#endif
address generate_get_cpu_info() {
// Flags to test CPU type.
@ -419,7 +419,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
#ifndef PRODUCT
#if defined(_LP64)
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports APX
@ -437,26 +437,22 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ cmpl(rax, 0x80000);
__ jcc(Assembler::notEqual, vector_save_restore);
/* FIXME: Uncomment while integrating JDK-8329032
bool save_apx = UseAPX;
VM_Version::set_apx_cpuFeatures();
UseAPX = true;
__ mov64(r16, VM_Version::egpr_test_value());
__ mov64(r31, VM_Version::egpr_test_value());
*/
__ xorl(rsi, rsi);
VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
// Generate SEGV
__ movl(rax, Address(rsi, 0));
VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
/* FIXME: Uncomment after integration of JDK-8329032
__ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
__ movq(Address(rsi, 0), r16);
__ movq(Address(rsi, 8), r31);
UseAPX = save_apx;
*/
#endif
__ bind(vector_save_restore);
//
@ -2170,9 +2166,11 @@ int VM_Version::avx3_threshold() {
FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
}
#if defined(_LP64)
void VM_Version::clear_apx_test_state() {
clear_apx_test_state_stub();
}
#endif
static bool _vm_version_initialized = false;
@ -2191,8 +2189,10 @@ void VM_Version::initialize() {
detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
g.generate_detect_virt());
#if defined(_LP64)
clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
g.clear_apx_test_state());
#endif
get_processor_features();
LP64_ONLY(Assembler::precompute_instructions();)
@ -3183,11 +3183,17 @@ bool VM_Version::os_supports_apx_egprs() {
if (!supports_apx_f()) {
return false;
}
// Enable APX support for product builds after
// completion of planned features listed in JDK-8329030.
#if !defined(PRODUCT)
if (_cpuid_info.apx_save[0] != egpr_test_value() ||
_cpuid_info.apx_save[1] != egpr_test_value()) {
return false;
}
return true;
#else
return false;
#endif
}
uint VM_Version::cores_per_cpu() {

View File

@ -635,7 +635,7 @@ public:
static void set_cpuinfo_cont_addr_apx(address pc) { _cpuinfo_cont_addr_apx = pc; }
static address cpuinfo_cont_addr_apx() { return _cpuinfo_cont_addr_apx; }
static void clear_apx_test_state();
LP64_ONLY(static void clear_apx_test_state());
static void clean_cpuFeatures() { _features = 0; }
static void set_avx_cpuFeatures() { _features = (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); }

View File

@ -28,7 +28,8 @@
#include "register_x86.hpp"
inline bool is_Register() {
return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
int uarch_max_gpr = Register::max_slots_per_register * Register::available_gp_registers();
return (unsigned int) value() < (unsigned int) uarch_max_gpr;
}
inline bool is_FloatRegister() {

View File

@ -128,6 +128,53 @@ reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
// Floating Point Registers
@ -154,6 +201,22 @@ alloc_class chunk0(R10, R10_H,
R13, R13_H,
R14, R14_H,
R15, R15_H,
R16, R16_H,
R17, R17_H,
R18, R18_H,
R19, R19_H,
R20, R20_H,
R21, R21_H,
R22, R22_H,
R23, R23_H,
R24, R24_H,
R25, R25_H,
R26, R26_H,
R27, R27_H,
R28, R28_H,
R29, R29_H,
R30, R30_H,
R31, R31_H,
RSP, RSP_H);
@ -167,7 +230,7 @@ alloc_class chunk0(R10, R10_H,
// Empty register class.
reg_class no_reg();
// Class for all pointer/long registers
// Class for all pointer/long registers including APX extended GPRs.
reg_class all_reg(RAX, RAX_H,
RDX, RDX_H,
RBP, RBP_H,
@ -183,9 +246,25 @@ reg_class all_reg(RAX, RAX_H,
R12, R12_H,
R13, R13_H,
R14, R14_H,
R15, R15_H);
R15, R15_H,
R16, R16_H,
R17, R17_H,
R18, R18_H,
R19, R19_H,
R20, R20_H,
R21, R21_H,
R22, R22_H,
R23, R23_H,
R24, R24_H,
R25, R25_H,
R26, R26_H,
R27, R27_H,
R28, R28_H,
R29, R29_H,
R30, R30_H,
R31, R31_H);
// Class for all int registers
// Class for all int registers including APX extended GPRs.
reg_class all_int_reg(RAX
RDX,
RBP,
@ -199,7 +278,23 @@ reg_class all_int_reg(RAX
R11,
R12,
R13,
R14);
R14,
R16,
R17,
R18,
R19,
R20,
R21,
R22,
R23,
R24,
R25,
R26,
R27,
R28,
R29,
R30,
R31);
// Class for all pointer registers
reg_class any_reg %{
@ -386,6 +481,8 @@ static bool need_r12_heapbase() {
}
void reg_mask_init() {
constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
// _ALL_REG_mask is generated by adlc from the all_reg register class below.
// We derive a number of subsets from it.
_ANY_REG_mask = _ALL_REG_mask;
@ -404,6 +501,12 @@ void reg_mask_init() {
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
if (!UseAPX) {
for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
_PTR_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
}
}
_STACK_OR_PTR_REG_mask = _PTR_REG_mask;
_STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
@ -420,6 +523,7 @@ void reg_mask_init() {
_PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
_PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
_LONG_REG_mask = _PTR_REG_mask;
_STACK_OR_LONG_REG_mask = _LONG_REG_mask;
_STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
@ -441,6 +545,12 @@ void reg_mask_init() {
_LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
_INT_REG_mask = _ALL_INT_REG_mask;
if (!UseAPX) {
for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
_INT_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
}
}
if (PreserveFramePointer) {
_INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
}
@ -12320,7 +12430,6 @@ instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
format %{ "testl rax, [$poll]\t"
"# Safepoint: poll for GC" %}
ins_cost(125);
size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
ins_encode %{
__ relocate(relocInfo::poll_type);
address pre_pc = __ pc();

View File

@ -2758,7 +2758,7 @@ LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo) {
return Handle_Exception(exceptionInfo, VM_Version::cpuinfo_cont_addr());
}
#ifndef PRODUCT
#if !defined(PRODUCT) && defined(_LP64)
if ((exception_code == EXCEPTION_ACCESS_VIOLATION) &&
VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
// Verify that OS save/restore APX registers.

View File

@ -416,7 +416,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
stub = VM_Version::cpuinfo_cont_addr();
}
#ifndef PRODUCT
#if !defined(PRODUCT) && defined(_LP64)
if ((sig == SIGSEGV || sig == SIGBUS) && VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
// Verify that OS save/restore APX registers.
stub = VM_Version::cpuinfo_cont_addr_apx();

View File

@ -248,7 +248,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
stub = VM_Version::cpuinfo_cont_addr();
}
#ifndef PRODUCT
#if !defined(PRODUCT) && defined(_LP64)
if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
// Verify that OS save/restore APX registers.
stub = VM_Version::cpuinfo_cont_addr_apx();

View File

@ -66,49 +66,68 @@ public class AMD64 extends Architecture {
public static final Register r14 = new Register(14, 14, "r14", CPU);
public static final Register r15 = new Register(15, 15, "r15", CPU);
public static final Register r16 = new Register(16, 16, "r16", CPU);
public static final Register r17 = new Register(17, 17, "r17", CPU);
public static final Register r18 = new Register(18, 18, "r18", CPU);
public static final Register r19 = new Register(19, 19, "r19", CPU);
public static final Register r20 = new Register(20, 20, "r20", CPU);
public static final Register r21 = new Register(21, 21, "r21", CPU);
public static final Register r22 = new Register(22, 22, "r22", CPU);
public static final Register r23 = new Register(23, 23, "r23", CPU);
public static final Register r24 = new Register(24, 24, "r24", CPU);
public static final Register r25 = new Register(25, 25, "r25", CPU);
public static final Register r26 = new Register(26, 26, "r26", CPU);
public static final Register r27 = new Register(27, 27, "r27", CPU);
public static final Register r28 = new Register(28, 28, "r28", CPU);
public static final Register r29 = new Register(29, 29, "r29", CPU);
public static final Register r30 = new Register(30, 30, "r30", CPU);
public static final Register r31 = new Register(31, 31, "r31", CPU);
public static final Register[] cpuRegisters = {
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
r8, r9, r10, r11, r12, r13, r14, r15
r8, r9, r10, r11, r12, r13, r14, r15,
r16, r17, r18, r19, r20, r21, r22, r23,
r24, r25, r26, r27, r28, r29, r30, r31
};
public static final RegisterCategory XMM = new RegisterCategory("XMM");
// XMM registers
public static final Register xmm0 = new Register(16, 0, "xmm0", XMM);
public static final Register xmm1 = new Register(17, 1, "xmm1", XMM);
public static final Register xmm2 = new Register(18, 2, "xmm2", XMM);
public static final Register xmm3 = new Register(19, 3, "xmm3", XMM);
public static final Register xmm4 = new Register(20, 4, "xmm4", XMM);
public static final Register xmm5 = new Register(21, 5, "xmm5", XMM);
public static final Register xmm6 = new Register(22, 6, "xmm6", XMM);
public static final Register xmm7 = new Register(23, 7, "xmm7", XMM);
public static final Register xmm0 = new Register(32, 0, "xmm0", XMM);
public static final Register xmm1 = new Register(33, 1, "xmm1", XMM);
public static final Register xmm2 = new Register(34, 2, "xmm2", XMM);
public static final Register xmm3 = new Register(35, 3, "xmm3", XMM);
public static final Register xmm4 = new Register(36, 4, "xmm4", XMM);
public static final Register xmm5 = new Register(37, 5, "xmm5", XMM);
public static final Register xmm6 = new Register(38, 6, "xmm6", XMM);
public static final Register xmm7 = new Register(39, 7, "xmm7", XMM);
public static final Register xmm8 = new Register(24, 8, "xmm8", XMM);
public static final Register xmm9 = new Register(25, 9, "xmm9", XMM);
public static final Register xmm10 = new Register(26, 10, "xmm10", XMM);
public static final Register xmm11 = new Register(27, 11, "xmm11", XMM);
public static final Register xmm12 = new Register(28, 12, "xmm12", XMM);
public static final Register xmm13 = new Register(29, 13, "xmm13", XMM);
public static final Register xmm14 = new Register(30, 14, "xmm14", XMM);
public static final Register xmm15 = new Register(31, 15, "xmm15", XMM);
public static final Register xmm8 = new Register(40, 8, "xmm8", XMM);
public static final Register xmm9 = new Register(41, 9, "xmm9", XMM);
public static final Register xmm10 = new Register(42, 10, "xmm10", XMM);
public static final Register xmm11 = new Register(43, 11, "xmm11", XMM);
public static final Register xmm12 = new Register(44, 12, "xmm12", XMM);
public static final Register xmm13 = new Register(45, 13, "xmm13", XMM);
public static final Register xmm14 = new Register(46, 14, "xmm14", XMM);
public static final Register xmm15 = new Register(47, 15, "xmm15", XMM);
public static final Register xmm16 = new Register(32, 16, "xmm16", XMM);
public static final Register xmm17 = new Register(33, 17, "xmm17", XMM);
public static final Register xmm18 = new Register(34, 18, "xmm18", XMM);
public static final Register xmm19 = new Register(35, 19, "xmm19", XMM);
public static final Register xmm20 = new Register(36, 20, "xmm20", XMM);
public static final Register xmm21 = new Register(37, 21, "xmm21", XMM);
public static final Register xmm22 = new Register(38, 22, "xmm22", XMM);
public static final Register xmm23 = new Register(39, 23, "xmm23", XMM);
public static final Register xmm16 = new Register(48, 16, "xmm16", XMM);
public static final Register xmm17 = new Register(49, 17, "xmm17", XMM);
public static final Register xmm18 = new Register(50, 18, "xmm18", XMM);
public static final Register xmm19 = new Register(51, 19, "xmm19", XMM);
public static final Register xmm20 = new Register(52, 20, "xmm20", XMM);
public static final Register xmm21 = new Register(53, 21, "xmm21", XMM);
public static final Register xmm22 = new Register(54, 22, "xmm22", XMM);
public static final Register xmm23 = new Register(55, 23, "xmm23", XMM);
public static final Register xmm24 = new Register(40, 24, "xmm24", XMM);
public static final Register xmm25 = new Register(41, 25, "xmm25", XMM);
public static final Register xmm26 = new Register(42, 26, "xmm26", XMM);
public static final Register xmm27 = new Register(43, 27, "xmm27", XMM);
public static final Register xmm28 = new Register(44, 28, "xmm28", XMM);
public static final Register xmm29 = new Register(45, 29, "xmm29", XMM);
public static final Register xmm30 = new Register(46, 30, "xmm30", XMM);
public static final Register xmm31 = new Register(47, 31, "xmm31", XMM);
public static final Register xmm24 = new Register(56, 24, "xmm24", XMM);
public static final Register xmm25 = new Register(57, 25, "xmm25", XMM);
public static final Register xmm26 = new Register(58, 26, "xmm26", XMM);
public static final Register xmm27 = new Register(59, 27, "xmm27", XMM);
public static final Register xmm28 = new Register(60, 28, "xmm28", XMM);
public static final Register xmm29 = new Register(61, 29, "xmm29", XMM);
public static final Register xmm30 = new Register(62, 30, "xmm30", XMM);
public static final Register xmm31 = new Register(63, 31, "xmm31", XMM);
public static final Register[] xmmRegistersSSE = {
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
@ -124,14 +143,14 @@ public class AMD64 extends Architecture {
public static final RegisterCategory MASK = new RegisterCategory("MASK", false);
public static final Register k0 = new Register(48, 0, "k0", MASK);
public static final Register k1 = new Register(49, 1, "k1", MASK);
public static final Register k2 = new Register(50, 2, "k2", MASK);
public static final Register k3 = new Register(51, 3, "k3", MASK);
public static final Register k4 = new Register(52, 4, "k4", MASK);
public static final Register k5 = new Register(53, 5, "k5", MASK);
public static final Register k6 = new Register(54, 6, "k6", MASK);
public static final Register k7 = new Register(55, 7, "k7", MASK);
public static final Register k0 = new Register(64, 0, "k0", MASK);
public static final Register k1 = new Register(65, 1, "k1", MASK);
public static final Register k2 = new Register(66, 2, "k2", MASK);
public static final Register k3 = new Register(67, 3, "k3", MASK);
public static final Register k4 = new Register(68, 4, "k4", MASK);
public static final Register k5 = new Register(69, 5, "k5", MASK);
public static final Register k6 = new Register(70, 6, "k6", MASK);
public static final Register k7 = new Register(71, 7, "k7", MASK);
public static final RegisterArray valueRegistersSSE = new RegisterArray(
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
@ -143,6 +162,8 @@ public class AMD64 extends Architecture {
public static final RegisterArray valueRegistersAVX512 = new RegisterArray(
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
r8, r9, r10, r11, r12, r13, r14, r15,
r16, r17, r18, r19, r20, r21, r22, r23,
r24, r25, r26, r27, r28, r29, r30, r31,
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
@ -153,7 +174,7 @@ public class AMD64 extends Architecture {
/**
* Register used to construct an instruction-relative address.
*/
public static final Register rip = new Register(56, -1, "rip", SPECIAL);
public static final Register rip = new Register(72, -1, "rip", SPECIAL);
public static final RegisterArray allRegisters = new RegisterArray(
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,