8136524: aarch64: test/compiler/runtime/7196199/Test7196199.java fails

Fix safepoint handlers to save 128 bits on vector poll

Reviewed-by: kvn
This commit is contained in:
Felix Yang 2015-09-15 12:59:51 +00:00 committed by Ed Nevill
parent 404fc5caa8
commit 00a6ff7050
3 changed files with 45 additions and 20 deletions

View File

@ -2286,18 +2286,30 @@ void MacroAssembler::c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_t
} }
#endif #endif
void MacroAssembler::push_CPU_state() { void MacroAssembler::push_CPU_state(bool save_vectors) {
push(0x3fffffff, sp); // integer registers except lr & sp push(0x3fffffff, sp); // integer registers except lr & sp
if (!save_vectors) {
for (int i = 30; i >= 0; i -= 2) for (int i = 30; i >= 0; i -= 2)
stpd(as_FloatRegister(i), as_FloatRegister(i+1), stpd(as_FloatRegister(i), as_FloatRegister(i+1),
Address(pre(sp, -2 * wordSize))); Address(pre(sp, -2 * wordSize)));
} else {
for (int i = 30; i >= 0; i -= 2)
stpq(as_FloatRegister(i), as_FloatRegister(i+1),
Address(pre(sp, -4 * wordSize)));
}
} }
void MacroAssembler::pop_CPU_state() { void MacroAssembler::pop_CPU_state(bool restore_vectors) {
for (int i = 0; i < 32; i += 2) if (!restore_vectors) {
ldpd(as_FloatRegister(i), as_FloatRegister(i+1), for (int i = 0; i < 32; i += 2)
Address(post(sp, 2 * wordSize))); ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
Address(post(sp, 2 * wordSize)));
} else {
for (int i = 0; i < 32; i += 2)
ldpq(as_FloatRegister(i), as_FloatRegister(i+1),
Address(post(sp, 4 * wordSize)));
}
pop(0x3fffffff, sp); // integer registers except lr & sp pop(0x3fffffff, sp); // integer registers except lr & sp
} }

View File

@ -777,8 +777,8 @@ public:
DEBUG_ONLY(void verify_heapbase(const char* msg);) DEBUG_ONLY(void verify_heapbase(const char* msg);)
void push_CPU_state(); void push_CPU_state(bool save_vectors = false);
void pop_CPU_state() ; void pop_CPU_state(bool restore_vectors = false) ;
// Round up to a power of two // Round up to a power of two
void round_to(Register reg, int modulus); void round_to(Register reg, int modulus);

View File

@ -75,8 +75,8 @@ class SimpleRuntimeFrame {
// FIXME -- this is used by C1 // FIXME -- this is used by C1
class RegisterSaver { class RegisterSaver {
public: public:
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
static void restore_live_registers(MacroAssembler* masm); static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
// Offsets into the register save area // Offsets into the register save area
// Used by deoptimization when it is managing result register // Used by deoptimization when it is managing result register
@ -108,7 +108,17 @@ class RegisterSaver {
}; };
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
#ifdef COMPILER2
if (save_vectors) {
// Save upper half of vector registers
int vect_words = 32 * 8 / wordSize;
additional_frame_words += vect_words;
}
#else
assert(!save_vectors, "vectors are generated only by C2");
#endif
int frame_size_in_bytes = round_to(additional_frame_words*wordSize + int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
reg_save_size*BytesPerInt, 16); reg_save_size*BytesPerInt, 16);
// OopMap frame size is in compiler stack slots (jint's) not bytes or words // OopMap frame size is in compiler stack slots (jint's) not bytes or words
@ -122,7 +132,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// Save registers, fpu state, and flags. // Save registers, fpu state, and flags.
__ enter(); __ enter();
__ push_CPU_state(); __ push_CPU_state(save_vectors);
// Set an oopmap for the call site. This oopmap will map all // Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This // oop-registers and debug-info registers as callee-saved. This
@ -139,14 +149,14 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// register slots are 8 bytes // register slots are 8 bytes
// wide, 32 floating-point // wide, 32 floating-point
// registers // registers
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
r->as_VMReg()); r->as_VMReg());
} }
} }
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
FloatRegister r = as_FloatRegister(i); FloatRegister r = as_FloatRegister(i);
int sp_offset = 2 * i; int sp_offset = save_vectors ? (4 * i) : (2 * i);
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
r->as_VMReg()); r->as_VMReg());
} }
@ -154,8 +164,11 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
return oop_map; return oop_map;
} }
void RegisterSaver::restore_live_registers(MacroAssembler* masm) { void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
__ pop_CPU_state(); #ifndef COMPILER2
assert(!restore_vectors, "vectors are generated only by C2");
#endif
__ pop_CPU_state(restore_vectors);
__ leave(); __ leave();
} }
@ -177,9 +190,9 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
} }
// Is vector's size (in bytes) bigger than a size saved by default? // Is vector's size (in bytes) bigger than a size saved by default?
// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. // 8 bytes vector registers are saved by default on AArch64.
bool SharedRuntime::is_wide_vector(int size) { bool SharedRuntime::is_wide_vector(int size) {
return size > 16; return size > 8;
} }
// The java_calling_convention describes stack locations as ideal slots on // The java_calling_convention describes stack locations as ideal slots on
// a frame with no abi restrictions. Since we must observe abi restrictions // a frame with no abi restrictions. Since we must observe abi restrictions
@ -2742,7 +2755,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
// Save registers, fpu state, and flags // Save registers, fpu state, and flags
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
// The following is basically a call_VM. However, we need the precise // The following is basically a call_VM. However, we need the precise
// address of the call in order to generate an oopmap. Hence, we do all the // address of the call in order to generate an oopmap. Hence, we do all the
@ -2793,7 +2806,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
__ bind(noException); __ bind(noException);
// Normal exit, restore registers and exit. // Normal exit, restore registers and exit.
RegisterSaver::restore_live_registers(masm); RegisterSaver::restore_live_registers(masm, save_vectors);
__ ret(lr); __ ret(lr);