8240772: x86_64: Pre-generate Assembler::popa, pusha and vzeroupper
Reviewed-by: iklam, kvn
This commit is contained in:
parent
c5bd0d7934
commit
7fe46b2464
@ -7290,7 +7290,7 @@ void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, i
|
||||
emit_int8((unsigned char)mask);
|
||||
}
|
||||
|
||||
void Assembler::vzeroupper() {
|
||||
void Assembler::vzeroupper_uncached() {
|
||||
if (VM_Version::supports_vzeroupper()) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
(void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
@ -7301,6 +7301,10 @@ void Assembler::vzeroupper() {
|
||||
#ifndef _LP64
|
||||
// 32bit only pieces of the assembler
|
||||
|
||||
void Assembler::vzeroupper() {
|
||||
vzeroupper_uncached();
|
||||
}
|
||||
|
||||
void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
|
||||
// NO PREFIX AS NEVER 64BIT
|
||||
InstructionMark im(this);
|
||||
@ -9250,27 +9254,6 @@ void Assembler::orq(Register dst, Register src) {
|
||||
emit_arith(0x0B, 0xC0, dst, src);
|
||||
}
|
||||
|
||||
void Assembler::popa() { // 64bit
|
||||
movq(r15, Address(rsp, 0));
|
||||
movq(r14, Address(rsp, wordSize));
|
||||
movq(r13, Address(rsp, 2 * wordSize));
|
||||
movq(r12, Address(rsp, 3 * wordSize));
|
||||
movq(r11, Address(rsp, 4 * wordSize));
|
||||
movq(r10, Address(rsp, 5 * wordSize));
|
||||
movq(r9, Address(rsp, 6 * wordSize));
|
||||
movq(r8, Address(rsp, 7 * wordSize));
|
||||
movq(rdi, Address(rsp, 8 * wordSize));
|
||||
movq(rsi, Address(rsp, 9 * wordSize));
|
||||
movq(rbp, Address(rsp, 10 * wordSize));
|
||||
// skip rsp
|
||||
movq(rbx, Address(rsp, 12 * wordSize));
|
||||
movq(rdx, Address(rsp, 13 * wordSize));
|
||||
movq(rcx, Address(rsp, 14 * wordSize));
|
||||
movq(rax, Address(rsp, 15 * wordSize));
|
||||
|
||||
addq(rsp, 16 * wordSize);
|
||||
}
|
||||
|
||||
void Assembler::popcntq(Register dst, Address src) {
|
||||
assert(VM_Version::supports_popcnt(), "must support");
|
||||
InstructionMark im(this);
|
||||
@ -9297,7 +9280,103 @@ void Assembler::popq(Address dst) {
|
||||
emit_operand(rax, dst);
|
||||
}
|
||||
|
||||
// Precomputable: popa, pusha, vzeroupper
|
||||
|
||||
// The result of these routines are invariant from one invocation to another
|
||||
// invocation for the duration of a run. Caching the result on bootstrap
|
||||
// and copying it out on subsequent invocations can thus be beneficial
|
||||
static bool precomputed = false;
|
||||
|
||||
static u_char* popa_code = NULL;
|
||||
static int popa_len = 0;
|
||||
|
||||
static u_char* pusha_code = NULL;
|
||||
static int pusha_len = 0;
|
||||
|
||||
static u_char* vzup_code = NULL;
|
||||
static int vzup_len = 0;
|
||||
|
||||
void Assembler::precompute_instructions() {
|
||||
assert(!Universe::is_fully_initialized(), "must still be single threaded");
|
||||
guarantee(!precomputed, "only once");
|
||||
precomputed = true;
|
||||
ResourceMark rm;
|
||||
|
||||
// Make a temporary buffer big enough for the routines we're capturing
|
||||
int size = 256;
|
||||
char* tmp_code = NEW_RESOURCE_ARRAY(char, size);
|
||||
CodeBuffer buffer((address)tmp_code, size);
|
||||
MacroAssembler masm(&buffer);
|
||||
|
||||
address begin_popa = masm.code_section()->end();
|
||||
masm.popa_uncached();
|
||||
address end_popa = masm.code_section()->end();
|
||||
masm.pusha_uncached();
|
||||
address end_pusha = masm.code_section()->end();
|
||||
masm.vzeroupper_uncached();
|
||||
address end_vzup = masm.code_section()->end();
|
||||
|
||||
// Save the instructions to permanent buffers.
|
||||
popa_len = (int)(end_popa - begin_popa);
|
||||
popa_code = NEW_C_HEAP_ARRAY(u_char, popa_len, mtInternal);
|
||||
memcpy(popa_code, begin_popa, popa_len);
|
||||
|
||||
pusha_len = (int)(end_pusha - end_popa);
|
||||
pusha_code = NEW_C_HEAP_ARRAY(u_char, pusha_len, mtInternal);
|
||||
memcpy(pusha_code, end_popa, pusha_len);
|
||||
|
||||
vzup_len = (int)(end_vzup - end_pusha);
|
||||
if (vzup_len > 0) {
|
||||
vzup_code = NEW_C_HEAP_ARRAY(u_char, vzup_len, mtInternal);
|
||||
memcpy(vzup_code, end_pusha, vzup_len);
|
||||
} else {
|
||||
vzup_code = pusha_code; // dummy
|
||||
}
|
||||
|
||||
assert(masm.code()->total_oop_size() == 0 &&
|
||||
masm.code()->total_metadata_size() == 0 &&
|
||||
masm.code()->total_relocation_size() == 0,
|
||||
"pre-computed code can't reference oops, metadata or contain relocations");
|
||||
}
|
||||
|
||||
static void emit_copy(CodeSection* code_section, u_char* src, int src_len) {
|
||||
assert(src != NULL, "code to copy must have been pre-computed");
|
||||
assert(code_section->limit() - code_section->end() > src_len, "code buffer not large enough");
|
||||
address end = code_section->end();
|
||||
memcpy(end, src, src_len);
|
||||
code_section->set_end(end + src_len);
|
||||
}
|
||||
|
||||
void Assembler::popa() { // 64bit
|
||||
emit_copy(code_section(), popa_code, popa_len);
|
||||
}
|
||||
|
||||
void Assembler::popa_uncached() { // 64bit
|
||||
movq(r15, Address(rsp, 0));
|
||||
movq(r14, Address(rsp, wordSize));
|
||||
movq(r13, Address(rsp, 2 * wordSize));
|
||||
movq(r12, Address(rsp, 3 * wordSize));
|
||||
movq(r11, Address(rsp, 4 * wordSize));
|
||||
movq(r10, Address(rsp, 5 * wordSize));
|
||||
movq(r9, Address(rsp, 6 * wordSize));
|
||||
movq(r8, Address(rsp, 7 * wordSize));
|
||||
movq(rdi, Address(rsp, 8 * wordSize));
|
||||
movq(rsi, Address(rsp, 9 * wordSize));
|
||||
movq(rbp, Address(rsp, 10 * wordSize));
|
||||
// skip rsp
|
||||
movq(rbx, Address(rsp, 12 * wordSize));
|
||||
movq(rdx, Address(rsp, 13 * wordSize));
|
||||
movq(rcx, Address(rsp, 14 * wordSize));
|
||||
movq(rax, Address(rsp, 15 * wordSize));
|
||||
|
||||
addq(rsp, 16 * wordSize);
|
||||
}
|
||||
|
||||
void Assembler::pusha() { // 64bit
|
||||
emit_copy(code_section(), pusha_code, pusha_len);
|
||||
}
|
||||
|
||||
void Assembler::pusha_uncached() { // 64bit
|
||||
// we have to store original rsp. ABI says that 128 bytes
|
||||
// below rsp are local scratch.
|
||||
movq(Address(rsp, -5 * wordSize), rsp);
|
||||
@ -9322,6 +9401,10 @@ void Assembler::pusha() { // 64bit
|
||||
movq(Address(rsp, 0), r15);
|
||||
}
|
||||
|
||||
void Assembler::vzeroupper() {
|
||||
emit_copy(code_section(), vzup_code, vzup_len);
|
||||
}
|
||||
|
||||
void Assembler::pushq(Address src) {
|
||||
InstructionMark im(this);
|
||||
prefixq(src);
|
||||
|
@ -885,6 +885,17 @@ private:
|
||||
|
||||
void mov(Register dst, Register src);
|
||||
|
||||
#ifdef _LP64
|
||||
// support caching the result of some routines
|
||||
|
||||
// must be called before pusha(), popa(), vzeroupper() - checked with asserts
|
||||
static void precompute_instructions();
|
||||
|
||||
void pusha_uncached();
|
||||
void popa_uncached();
|
||||
#endif
|
||||
void vzeroupper_uncached();
|
||||
|
||||
void pusha();
|
||||
void popa();
|
||||
|
||||
|
@ -562,7 +562,10 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
__ jcc(Assembler::equal, L_wrapup);
|
||||
__ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
|
||||
__ jcc(Assembler::equal, L_wrapup);
|
||||
__ vzeroupper();
|
||||
// vzeroupper() will use a pre-computed instruction sequence that we
|
||||
// can't compute until after we've determined CPU capabilities. Use
|
||||
// uncached variant here directly to be able to bootstrap correctly
|
||||
__ vzeroupper_uncached();
|
||||
# undef __
|
||||
}
|
||||
};
|
||||
@ -1833,6 +1836,9 @@ void VM_Version::initialize() {
|
||||
g.generate_get_cpu_info());
|
||||
|
||||
get_processor_features();
|
||||
|
||||
LP64_ONLY(Assembler::precompute_instructions();)
|
||||
|
||||
if (cpu_family() > 4) { // it supports CPUID
|
||||
check_virtualizations();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user