8260355: AArch64: deoptimization stub should save vector registers

Reviewed-by: vlivanov, aph
This commit is contained in:
Nick Gasson 2021-02-09 01:49:52 +00:00
parent 5d8204b169
commit 5183d8ae1e
12 changed files with 182 additions and 75 deletions

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "precompiled.hpp"
#include "runtime/registerMap.hpp"
#include "vmreg_aarch64.inline.hpp"
address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
if (base_reg->is_FloatRegister()) {
// Not all physical slots of an SVE register have corresponding
// VMRegs. However they are always saved to the stack in a
// contiguous region of memory so we can calculate the address of
// the upper slots by offsetting from the base address.
assert(base_reg->is_concrete(), "must pass base reg");
int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_gpr) /
FloatRegisterImpl::max_slots_per_register;
intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
address base_location = location(base_reg);
if (base_location != NULL) {
return base_location + offset_in_bytes;
} else {
return NULL;
}
} else {
return location(base_reg->next(slot_idx));
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -32,8 +32,8 @@
private:
// This is the hook for finding a register in an "well-known" location,
// such as a register block of a predetermined format.
// Since there is none, we just return NULL.
address pd_location(VMReg reg) const {return NULL;}
address pd_location(VMReg reg) const { return NULL; }
address pd_location(VMReg base_reg, int slot_idx) const;
// no PD state to clear or copy:
void pd_clear() {}

View File

@ -85,26 +85,24 @@ class SimpleRuntimeFrame {
// FIXME -- this is used by C1
class RegisterSaver {
const bool _save_vectors;
public:
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {}
OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
void restore_live_registers(MacroAssembler* masm);
// Offsets into the register save area
// Used by deoptimization when it is managing result register
// values on its own
static int r0_offset_in_bytes(void) { return (32 + r0->encoding()) * wordSize; }
static int reg_offset_in_bytes(Register r) { return r0_offset_in_bytes() + r->encoding() * wordSize; }
static int rmethod_offset_in_bytes(void) { return reg_offset_in_bytes(rmethod); }
static int rscratch1_offset_in_bytes(void) { return (32 + rscratch1->encoding()) * wordSize; }
static int v0_offset_in_bytes(void) { return 0; }
static int return_offset_in_bytes(void) { return (32 /* floats*/ + 31 /* gregs*/) * wordSize; }
int reg_offset_in_bytes(Register r);
int r0_offset_in_bytes() { return reg_offset_in_bytes(r0); }
int rscratch1_offset_in_bytes() { return reg_offset_in_bytes(rscratch1); }
int v0_offset_in_bytes(void) { return 0; }
// During deoptimization only the result registers need to be restored,
// all the other values have already been extracted.
static void restore_result_registers(MacroAssembler* masm);
// Capture info about frame layout
// Capture info about frame layout
// Note this is only correct when not saving full vectors.
enum layout {
fpu_state_off = 0,
fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
@ -119,7 +117,31 @@ class RegisterSaver {
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
int RegisterSaver::reg_offset_in_bytes(Register r) {
// The integer registers are located above the floating point
// registers in the stack frame pushed by save_live_registers() so the
// offset depends on whether we are saving full vectors, and whether
// those vectors are NEON or SVE.
int slots_per_vect = FloatRegisterImpl::save_slots_per_register;
#if COMPILER2_OR_JVMCI
if (_save_vectors) {
slots_per_vect = FloatRegisterImpl::slots_per_neon_register;
#ifdef COMPILER2
if (Matcher::supports_scalable_vector()) {
slots_per_vect = Matcher::scalable_vector_reg_size(T_FLOAT);
}
#endif
}
#endif
int r0_offset = (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt;
return r0_offset + r->encoding() * wordSize;
}
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
int sve_vector_size_in_slots = 0;
@ -131,7 +153,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
#endif
#if COMPILER2_OR_JVMCI
if (save_vectors) {
if (_save_vectors) {
int vect_words = 0;
int extra_save_slots_per_register = 0;
// Save upper half of vector registers
@ -145,7 +167,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
additional_frame_words += vect_words;
}
#else
assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
#endif
int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
@ -160,7 +182,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// Save Integer and Float registers.
__ enter();
__ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
__ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes);
// Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This
@ -185,7 +207,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
FloatRegister r = as_FloatRegister(i);
int sp_offset = 0;
if (save_vectors) {
if (_save_vectors) {
sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
(FloatRegisterImpl::slots_per_neon_register * i);
} else {
@ -198,37 +220,20 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
return oop_map;
}
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
#ifdef COMPILER2
__ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
__ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
Matcher::scalable_vector_reg_size(T_BYTE));
#else
#if !INCLUDE_JVMCI
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
#endif
__ pop_CPU_state(restore_vectors);
__ pop_CPU_state(_save_vectors);
#endif
__ leave();
}
void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
// Just restore result register. Only used by deoptimization. By
// now any callee save register that needs to be restored to a c2
// caller of the deoptee has been extracted into the vframeArray
// and will be stuffed into the c2i adapter we create for later
// restoration so only result registers need to be restored here.
// Restore fp result register
__ ldrd(v0, Address(sp, v0_offset_in_bytes()));
// Restore integer result register
__ ldr(r0, Address(sp, r0_offset_in_bytes()));
// Pop all of the register save are off the stack
__ add(sp, sp, align_up(return_offset_in_bytes(), 16));
}
// Is vector's size (in bytes) bigger than a size saved by default?
// 8 bytes vector registers are saved by default on AArch64.
bool SharedRuntime::is_wide_vector(int size) {
@ -2164,6 +2169,7 @@ void SharedRuntime::generate_deopt_blob() {
int frame_size_in_words;
OopMap* map = NULL;
OopMapSet *oop_maps = new OopMapSet();
RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0);
// -------------
// This code enters when returning to a de-optimized nmethod. A return
@ -2201,7 +2207,7 @@ void SharedRuntime::generate_deopt_blob() {
// Prolog for non exception case!
// Save everything in sight.
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
// Normal deoptimization. Save exec mode for unpack_frames.
__ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
@ -2219,7 +2225,7 @@ void SharedRuntime::generate_deopt_blob() {
// return address is the pc describes what bci to do re-execute at
// No need to update map as each call to save_live_registers will produce identical oopmap
(void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
(void) reg_save.save_live_registers(masm, 0, &frame_size_in_words);
__ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
__ b(cont);
@ -2238,7 +2244,7 @@ void SharedRuntime::generate_deopt_blob() {
uncommon_trap_offset = __ pc() - start;
// Save everything in sight.
RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
reg_save.save_live_registers(masm, 0, &frame_size_in_words);
// fetch_unroll_info needs to call last_java_frame()
Label retaddr;
__ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
@ -2295,7 +2301,7 @@ void SharedRuntime::generate_deopt_blob() {
// This is a somewhat fragile mechanism.
// Save everything in sight.
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
// Now it is safe to overwrite any register
@ -2376,7 +2382,7 @@ void SharedRuntime::generate_deopt_blob() {
__ verify_oop(r0);
// Overwrite the result registers with the exception results.
__ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
__ str(r0, Address(sp, reg_save.r0_offset_in_bytes()));
// I think this is useless
// __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
@ -2385,7 +2391,14 @@ void SharedRuntime::generate_deopt_blob() {
// Only register save data is on the stack.
// Now restore the result registers. Everything else is either dead
// or captured in the vframeArray.
RegisterSaver::restore_result_registers(masm);
// Restore fp result register
__ ldrd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
// Restore integer result register
__ ldr(r0, Address(sp, reg_save.r0_offset_in_bytes()));
// Pop all of the register save area off the stack
__ add(sp, sp, frame_size_in_words * wordSize);
// All of the register save area has been popped of the stack. Only the
// return address remains.
@ -2466,8 +2479,8 @@ void SharedRuntime::generate_deopt_blob() {
__ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
// Restore frame locals after moving the frame
__ strd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
__ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
__ strd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
__ str(r0, Address(sp, reg_save.r0_offset_in_bytes()));
// Call C code. Need thread but NOT official VM entry
// crud. We cannot block on this call, no GC can happen. Call should
@ -2494,8 +2507,8 @@ void SharedRuntime::generate_deopt_blob() {
__ reset_last_Java_frame(true);
// Collect return values
__ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
__ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
__ ldrd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
__ ldr(r0, Address(sp, reg_save.r0_offset_in_bytes()));
// I think this is useless (throwing pc?)
// __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
@ -2741,10 +2754,10 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
address call_pc = NULL;
int frame_size_in_words;
bool cause_return = (poll_type == POLL_AT_RETURN);
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
// Save Integer and Float registers.
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
// The following is basically a call_VM. However, we need the precise
// address of the call in order to generate an oopmap. Hence, we do all the
@ -2789,7 +2802,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
// Exception pending
RegisterSaver::restore_live_registers(masm, save_vectors);
reg_save.restore_live_registers(masm);
__ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
@ -2821,7 +2834,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
__ bind(no_adjust);
// Normal exit, restore registers and exit.
RegisterSaver::restore_live_registers(masm, save_vectors);
reg_save.restore_live_registers(masm);
__ ret(lr);
@ -2855,13 +2868,14 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
MacroAssembler* masm = new MacroAssembler(&buffer);
int frame_size_in_words;
RegisterSaver reg_save(false /* save_vectors */);
OopMapSet *oop_maps = new OopMapSet();
OopMap* map = NULL;
int start = __ offset();
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
int frame_complete = __ offset();
@ -2893,11 +2907,11 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
// get the returned Method*
__ get_vm_result_2(rmethod, rthread);
__ str(rmethod, Address(sp, RegisterSaver::reg_offset_in_bytes(rmethod)));
__ str(rmethod, Address(sp, reg_save.reg_offset_in_bytes(rmethod)));
// r0 is where we want to jump, overwrite rscratch1 which is saved and scratch
__ str(r0, Address(sp, RegisterSaver::rscratch1_offset_in_bytes()));
RegisterSaver::restore_live_registers(masm);
__ str(r0, Address(sp, reg_save.rscratch1_offset_in_bytes()));
reg_save.restore_live_registers(masm);
// We are back the the original state on entry and ready to go.
@ -2907,7 +2921,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
__ bind(pending);
RegisterSaver::restore_live_registers(masm);
reg_save.restore_live_registers(masm);
// exception pending => remove activation and forward to exception handler

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -34,8 +34,11 @@ inline bool is_FloatRegister() {
return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
}
inline Register as_Register() {
inline bool is_PRegister() {
return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_pr;
}
inline Register as_Register() {
assert( is_Register(), "must be");
// Yuk
return ::as_Register(value() / RegisterImpl::max_slots_per_register);
@ -48,9 +51,22 @@ inline FloatRegister as_FloatRegister() {
FloatRegisterImpl::max_slots_per_register);
}
inline bool is_concrete() {
inline PRegister as_PRegister() {
assert( is_PRegister(), "must be" );
return ::as_PRegister((value() - ConcreteRegisterImpl::max_fpr) /
PRegisterImpl::max_slots_per_register);
}
inline bool is_concrete() {
assert(is_reg(), "must be");
return is_even(value());
if (is_FloatRegister()) {
int base = value() - ConcreteRegisterImpl::max_gpr;
return base % FloatRegisterImpl::max_slots_per_register == 0;
} else if (is_PRegister()) {
return true; // Single slot
} else {
return is_even(value());
}
}
#endif // CPU_AARCH64_VMREG_AARCH64_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,11 @@
// Since there is none, we just return NULL.
// See registerMap_sparc.hpp for an example of grabbing registers
// from register save areas of a standard layout.
address pd_location(VMReg reg) const {return NULL;}
address pd_location(VMReg reg) const {return NULL;}
address pd_location(VMReg base_reg, int slot_idx) const {
return location(base_reg->next(slot_idx));
}
// no PD state to clear or copy:
void pd_clear() {}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2013 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -35,6 +35,10 @@
// Since there is none, we just return NULL.
address pd_location(VMReg reg) const { return NULL; }
address pd_location(VMReg base_reg, int slot_idx) const {
return location(base_reg->next(slot_idx));
}
// no PD state to clear or copy:
void pd_clear() {}
void pd_initialize() {}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -34,7 +34,11 @@
// This is the hook for finding a register in a "well-known" location,
// such as a register block of a predetermined format.
// Since there is none, we just return NULL.
address pd_location(VMReg reg) const {return NULL;}
address pd_location(VMReg reg) const {return NULL;}
address pd_location(VMReg base_reg, int slot_idx) const {
return location(base_reg->next(slot_idx));
}
// No PD state to clear or copy.
void pd_clear() {}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -61,3 +61,7 @@ address RegisterMap::pd_location(VMReg reg) const {
}
return NULL;
}
address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
return location(base_reg->next(slot_idx));
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -32,6 +32,7 @@
// This is the hook for finding a register in an "well-known" location,
// such as a register block of a predetermined format.
address pd_location(VMReg reg) const;
address pd_location(VMReg base_reg, int slot_idx) const;
// no PD state to clear or copy:
void pd_clear() {}
void pd_initialize() {}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,6 +34,10 @@
// Since there is none, we just return NULL.
address pd_location(VMReg reg) const { return NULL; }
address pd_location(VMReg base_reg, int slot_idx) const {
return location(base_reg->next(slot_idx));
}
// no PD state to clear or copy:
void pd_clear() {}
void pd_initialize() {}

View File

@ -139,7 +139,7 @@ Handle VectorSupport::allocate_vector_payload_helper(InstanceKlass* ik, frame* f
int vslot = (i * elem_size) / VMRegImpl::stack_slot_size;
int off = (i * elem_size) % VMRegImpl::stack_slot_size;
address elem_addr = reg_map->location(vreg->next(vslot)) + off;
address elem_addr = reg_map->location(vreg, vslot) + off; // assumes little endian element order
init_payload_element(arr, is_mask, elem_bt, i, elem_addr);
}
} else {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -99,6 +99,14 @@ class RegisterMap : public StackObj {
}
}
address location(VMReg base_reg, int slot_idx) const {
if (slot_idx > 0) {
return pd_location(base_reg, slot_idx);
} else {
return location(base_reg);
}
}
void set_location(VMReg reg, address loc) {
int index = reg->value() / location_valid_type_size;
assert(0 <= reg->value() && reg->value() < reg_count, "range check");