7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
Save whole XMM/YMM registers in safepoint interrupt handler. Reviewed-by: roland, twisti
This commit is contained in:
parent
45435c5485
commit
811c047ec1
@ -313,6 +313,14 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
|
||||
|
||||
}
|
||||
|
||||
// Is vector's size (in bytes) bigger than a size saved by default?
|
||||
// 8 bytes FP registers are saved by default on SPARC.
|
||||
bool SharedRuntime::is_wide_vector(int size) {
|
||||
// Note, MaxVectorSize == 8 on SPARC.
|
||||
assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size));
|
||||
return size > 8;
|
||||
}
|
||||
|
||||
// The java_calling_convention describes stack locations as ideal slots on
|
||||
// a frame with no abi restrictions. Since we must observe abi restrictions
|
||||
// (like the placement of the register window) the slots must be biased by
|
||||
@ -3734,7 +3742,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
|
||||
// the 64-bit %o's, then do a save, then fixup the caller's SP (our FP).
|
||||
// Tricky, tricky, tricky...
|
||||
|
||||
SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
|
||||
SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
|
||||
assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
|
||||
|
||||
// allocate space for the code
|
||||
@ -3752,6 +3760,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
|
||||
|
||||
int start = __ offset();
|
||||
|
||||
bool cause_return = (poll_type == POLL_AT_RETURN);
|
||||
// If this causes a return before the processing, then do a "restore"
|
||||
if (cause_return) {
|
||||
__ restore();
|
||||
|
@ -3496,6 +3496,33 @@ void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
|
||||
emit_byte(0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinsertf128h(XMMRegister dst, Address src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionMark im(this);
|
||||
bool vector256 = true;
|
||||
assert(dst != xnoreg, "sanity");
|
||||
int dst_enc = dst->encoding();
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
|
||||
emit_byte(0x18);
|
||||
emit_operand(dst, src);
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_byte(0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextractf128h(Address dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionMark im(this);
|
||||
bool vector256 = true;
|
||||
assert(src != xnoreg, "sanity");
|
||||
int src_enc = src->encoding();
|
||||
vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
|
||||
emit_byte(0x19);
|
||||
emit_operand(src, dst);
|
||||
// 0x01 - extract from upper 128 bits
|
||||
emit_byte(0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
bool vector256 = true;
|
||||
@ -3507,6 +3534,33 @@ void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
|
||||
emit_byte(0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinserti128h(XMMRegister dst, Address src) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
InstructionMark im(this);
|
||||
bool vector256 = true;
|
||||
assert(dst != xnoreg, "sanity");
|
||||
int dst_enc = dst->encoding();
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
|
||||
emit_byte(0x38);
|
||||
emit_operand(dst, src);
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_byte(0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextracti128h(Address dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
InstructionMark im(this);
|
||||
bool vector256 = true;
|
||||
assert(src != xnoreg, "sanity");
|
||||
int src_enc = src->encoding();
|
||||
vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
|
||||
emit_byte(0x39);
|
||||
emit_operand(src, dst);
|
||||
// 0x01 - extract from upper 128 bits
|
||||
emit_byte(0x01);
|
||||
}
|
||||
|
||||
void Assembler::vzeroupper() {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
(void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
|
||||
@ -8907,11 +8961,9 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
||||
pusha();
|
||||
|
||||
// if we are coming from c1, xmm registers may be live
|
||||
if (UseSSE >= 1) {
|
||||
subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
|
||||
}
|
||||
int off = 0;
|
||||
if (UseSSE == 1) {
|
||||
subptr(rsp, sizeof(jdouble)*8);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
|
||||
@ -8921,23 +8973,50 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
|
||||
} else if (UseSSE >= 2) {
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7);
|
||||
#ifdef COMPILER2
|
||||
if (MaxVectorSize > 16) {
|
||||
assert(UseAVX > 0, "256bit vectors are supported only with AVX");
|
||||
// Save upper half of YMM registes
|
||||
subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
|
||||
vextractf128h(Address(rsp, 0),xmm0);
|
||||
vextractf128h(Address(rsp, 16),xmm1);
|
||||
vextractf128h(Address(rsp, 32),xmm2);
|
||||
vextractf128h(Address(rsp, 48),xmm3);
|
||||
vextractf128h(Address(rsp, 64),xmm4);
|
||||
vextractf128h(Address(rsp, 80),xmm5);
|
||||
vextractf128h(Address(rsp, 96),xmm6);
|
||||
vextractf128h(Address(rsp,112),xmm7);
|
||||
#ifdef _LP64
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15);
|
||||
vextractf128h(Address(rsp,128),xmm8);
|
||||
vextractf128h(Address(rsp,144),xmm9);
|
||||
vextractf128h(Address(rsp,160),xmm10);
|
||||
vextractf128h(Address(rsp,176),xmm11);
|
||||
vextractf128h(Address(rsp,192),xmm12);
|
||||
vextractf128h(Address(rsp,208),xmm13);
|
||||
vextractf128h(Address(rsp,224),xmm14);
|
||||
vextractf128h(Address(rsp,240),xmm15);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
// Save whole 128bit (16 bytes) XMM regiters
|
||||
subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
|
||||
movdqu(Address(rsp,off++*16),xmm0);
|
||||
movdqu(Address(rsp,off++*16),xmm1);
|
||||
movdqu(Address(rsp,off++*16),xmm2);
|
||||
movdqu(Address(rsp,off++*16),xmm3);
|
||||
movdqu(Address(rsp,off++*16),xmm4);
|
||||
movdqu(Address(rsp,off++*16),xmm5);
|
||||
movdqu(Address(rsp,off++*16),xmm6);
|
||||
movdqu(Address(rsp,off++*16),xmm7);
|
||||
#ifdef _LP64
|
||||
movdqu(Address(rsp,off++*16),xmm8);
|
||||
movdqu(Address(rsp,off++*16),xmm9);
|
||||
movdqu(Address(rsp,off++*16),xmm10);
|
||||
movdqu(Address(rsp,off++*16),xmm11);
|
||||
movdqu(Address(rsp,off++*16),xmm12);
|
||||
movdqu(Address(rsp,off++*16),xmm13);
|
||||
movdqu(Address(rsp,off++*16),xmm14);
|
||||
movdqu(Address(rsp,off++*16),xmm15);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -9015,28 +9094,52 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
||||
movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
|
||||
addptr(rsp, sizeof(jdouble)*8);
|
||||
} else if (UseSSE >= 2) {
|
||||
movdbl(xmm0, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm1, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm2, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm3, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm4, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm5, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm6, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm7, Address(rsp,off++*sizeof(jdouble)));
|
||||
// Restore whole 128bit (16 bytes) XMM regiters
|
||||
movdqu(xmm0, Address(rsp,off++*16));
|
||||
movdqu(xmm1, Address(rsp,off++*16));
|
||||
movdqu(xmm2, Address(rsp,off++*16));
|
||||
movdqu(xmm3, Address(rsp,off++*16));
|
||||
movdqu(xmm4, Address(rsp,off++*16));
|
||||
movdqu(xmm5, Address(rsp,off++*16));
|
||||
movdqu(xmm6, Address(rsp,off++*16));
|
||||
movdqu(xmm7, Address(rsp,off++*16));
|
||||
#ifdef _LP64
|
||||
movdbl(xmm8, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm9, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm10, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm11, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm12, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm13, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm14, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm15, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdqu(xmm8, Address(rsp,off++*16));
|
||||
movdqu(xmm9, Address(rsp,off++*16));
|
||||
movdqu(xmm10, Address(rsp,off++*16));
|
||||
movdqu(xmm11, Address(rsp,off++*16));
|
||||
movdqu(xmm12, Address(rsp,off++*16));
|
||||
movdqu(xmm13, Address(rsp,off++*16));
|
||||
movdqu(xmm14, Address(rsp,off++*16));
|
||||
movdqu(xmm15, Address(rsp,off++*16));
|
||||
#endif
|
||||
addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
|
||||
#ifdef COMPILER2
|
||||
if (MaxVectorSize > 16) {
|
||||
// Restore upper half of YMM registes.
|
||||
vinsertf128h(xmm0, Address(rsp, 0));
|
||||
vinsertf128h(xmm1, Address(rsp, 16));
|
||||
vinsertf128h(xmm2, Address(rsp, 32));
|
||||
vinsertf128h(xmm3, Address(rsp, 48));
|
||||
vinsertf128h(xmm4, Address(rsp, 64));
|
||||
vinsertf128h(xmm5, Address(rsp, 80));
|
||||
vinsertf128h(xmm6, Address(rsp, 96));
|
||||
vinsertf128h(xmm7, Address(rsp,112));
|
||||
#ifdef _LP64
|
||||
vinsertf128h(xmm8, Address(rsp,128));
|
||||
vinsertf128h(xmm9, Address(rsp,144));
|
||||
vinsertf128h(xmm10, Address(rsp,160));
|
||||
vinsertf128h(xmm11, Address(rsp,176));
|
||||
vinsertf128h(xmm12, Address(rsp,192));
|
||||
vinsertf128h(xmm13, Address(rsp,208));
|
||||
vinsertf128h(xmm14, Address(rsp,224));
|
||||
vinsertf128h(xmm15, Address(rsp,240));
|
||||
#endif
|
||||
addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (UseSSE >= 1) {
|
||||
addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
|
||||
}
|
||||
popa();
|
||||
}
|
||||
|
@ -1743,6 +1743,12 @@ private:
|
||||
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
|
||||
// Load/store high 128bit of YMM registers which does not destroy other half.
|
||||
void vinsertf128h(XMMRegister dst, Address src);
|
||||
void vinserti128h(XMMRegister dst, Address src);
|
||||
void vextractf128h(Address dst, XMMRegister src);
|
||||
void vextracti128h(Address dst, XMMRegister src);
|
||||
|
||||
// AVX instruction which is used to clear upper 128 bits of YMM registers and
|
||||
// to avoid transaction penalty between AVX and SSE states. There is no
|
||||
// penalty if legacy SSE instructions are encoded using VEX prefix because
|
||||
|
@ -46,11 +46,11 @@
|
||||
const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
|
||||
|
||||
class RegisterSaver {
|
||||
enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ };
|
||||
// Capture info about frame layout
|
||||
#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
|
||||
enum layout {
|
||||
fpu_state_off = 0,
|
||||
fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
|
||||
fpu_state_end = fpu_state_off+FPUStateSizeInWords,
|
||||
st0_off, st0H_off,
|
||||
st1_off, st1H_off,
|
||||
st2_off, st2H_off,
|
||||
@ -59,16 +59,16 @@ class RegisterSaver {
|
||||
st5_off, st5H_off,
|
||||
st6_off, st6H_off,
|
||||
st7_off, st7H_off,
|
||||
|
||||
xmm0_off, xmm0H_off,
|
||||
xmm1_off, xmm1H_off,
|
||||
xmm2_off, xmm2H_off,
|
||||
xmm3_off, xmm3H_off,
|
||||
xmm4_off, xmm4H_off,
|
||||
xmm5_off, xmm5H_off,
|
||||
xmm6_off, xmm6H_off,
|
||||
xmm7_off, xmm7H_off,
|
||||
flags_off,
|
||||
xmm_off,
|
||||
DEF_XMM_OFFS(0),
|
||||
DEF_XMM_OFFS(1),
|
||||
DEF_XMM_OFFS(2),
|
||||
DEF_XMM_OFFS(3),
|
||||
DEF_XMM_OFFS(4),
|
||||
DEF_XMM_OFFS(5),
|
||||
DEF_XMM_OFFS(6),
|
||||
DEF_XMM_OFFS(7),
|
||||
flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word
|
||||
rdi_off,
|
||||
rsi_off,
|
||||
ignore_off, // extra copy of rbp,
|
||||
@ -83,13 +83,13 @@ class RegisterSaver {
|
||||
rbp_off,
|
||||
return_off, // slot for return address
|
||||
reg_save_size };
|
||||
|
||||
enum { FPU_regs_live = flags_off - fpu_state_end };
|
||||
|
||||
public:
|
||||
|
||||
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words,
|
||||
int* total_frame_words, bool verify_fpu = true);
|
||||
static void restore_live_registers(MacroAssembler* masm);
|
||||
int* total_frame_words, bool verify_fpu = true, bool save_vectors = false);
|
||||
static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
|
||||
|
||||
static int rax_offset() { return rax_off; }
|
||||
static int rbx_offset() { return rbx_off; }
|
||||
@ -113,9 +113,20 @@ class RegisterSaver {
|
||||
};
|
||||
|
||||
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
|
||||
int* total_frame_words, bool verify_fpu) {
|
||||
|
||||
int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
|
||||
int* total_frame_words, bool verify_fpu, bool save_vectors) {
|
||||
int vect_words = 0;
|
||||
#ifdef COMPILER2
|
||||
if (save_vectors) {
|
||||
assert(UseAVX > 0, "256bit vectors are supported only with AVX");
|
||||
assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
|
||||
// Save upper half of YMM registes
|
||||
vect_words = 8 * 16 / wordSize;
|
||||
additional_frame_words += vect_words;
|
||||
}
|
||||
#else
|
||||
assert(!save_vectors, "vectors are generated only by C2");
|
||||
#endif
|
||||
int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
|
||||
int frame_words = frame_size_in_bytes / wordSize;
|
||||
*total_frame_words = frame_words;
|
||||
|
||||
@ -129,7 +140,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
__ enter();
|
||||
__ pusha();
|
||||
__ pushf();
|
||||
__ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space
|
||||
__ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space
|
||||
__ push_FPU_state(); // Save FPU state & init
|
||||
|
||||
if (verify_fpu) {
|
||||
@ -183,14 +194,28 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
__ movflt(Address(rsp,xmm6_off*wordSize),xmm6);
|
||||
__ movflt(Address(rsp,xmm7_off*wordSize),xmm7);
|
||||
} else if( UseSSE >= 2 ) {
|
||||
__ movdbl(Address(rsp,xmm0_off*wordSize),xmm0);
|
||||
__ movdbl(Address(rsp,xmm1_off*wordSize),xmm1);
|
||||
__ movdbl(Address(rsp,xmm2_off*wordSize),xmm2);
|
||||
__ movdbl(Address(rsp,xmm3_off*wordSize),xmm3);
|
||||
__ movdbl(Address(rsp,xmm4_off*wordSize),xmm4);
|
||||
__ movdbl(Address(rsp,xmm5_off*wordSize),xmm5);
|
||||
__ movdbl(Address(rsp,xmm6_off*wordSize),xmm6);
|
||||
__ movdbl(Address(rsp,xmm7_off*wordSize),xmm7);
|
||||
// Save whole 128bit (16 bytes) XMM regiters
|
||||
__ movdqu(Address(rsp,xmm0_off*wordSize),xmm0);
|
||||
__ movdqu(Address(rsp,xmm1_off*wordSize),xmm1);
|
||||
__ movdqu(Address(rsp,xmm2_off*wordSize),xmm2);
|
||||
__ movdqu(Address(rsp,xmm3_off*wordSize),xmm3);
|
||||
__ movdqu(Address(rsp,xmm4_off*wordSize),xmm4);
|
||||
__ movdqu(Address(rsp,xmm5_off*wordSize),xmm5);
|
||||
__ movdqu(Address(rsp,xmm6_off*wordSize),xmm6);
|
||||
__ movdqu(Address(rsp,xmm7_off*wordSize),xmm7);
|
||||
}
|
||||
|
||||
if (vect_words > 0) {
|
||||
assert(vect_words*wordSize == 128, "");
|
||||
__ subptr(rsp, 128); // Save upper half of YMM registes
|
||||
__ vextractf128h(Address(rsp, 0),xmm0);
|
||||
__ vextractf128h(Address(rsp, 16),xmm1);
|
||||
__ vextractf128h(Address(rsp, 32),xmm2);
|
||||
__ vextractf128h(Address(rsp, 48),xmm3);
|
||||
__ vextractf128h(Address(rsp, 64),xmm4);
|
||||
__ vextractf128h(Address(rsp, 80),xmm5);
|
||||
__ vextractf128h(Address(rsp, 96),xmm6);
|
||||
__ vextractf128h(Address(rsp,112),xmm7);
|
||||
}
|
||||
|
||||
// Set an oopmap for the call site. This oopmap will map all
|
||||
@ -253,10 +278,20 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
|
||||
}
|
||||
|
||||
void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
|
||||
|
||||
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
|
||||
// Recover XMM & FPU state
|
||||
if( UseSSE == 1 ) {
|
||||
int additional_frame_bytes = 0;
|
||||
#ifdef COMPILER2
|
||||
if (restore_vectors) {
|
||||
assert(UseAVX > 0, "256bit vectors are supported only with AVX");
|
||||
assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
|
||||
additional_frame_bytes = 128;
|
||||
}
|
||||
#else
|
||||
assert(!restore_vectors, "vectors are generated only by C2");
|
||||
#endif
|
||||
if (UseSSE == 1) {
|
||||
assert(additional_frame_bytes == 0, "");
|
||||
__ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
|
||||
__ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
|
||||
__ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
|
||||
@ -265,18 +300,33 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
|
||||
__ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
|
||||
__ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
|
||||
__ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
|
||||
} else if( UseSSE >= 2 ) {
|
||||
__ movdbl(xmm0,Address(rsp,xmm0_off*wordSize));
|
||||
__ movdbl(xmm1,Address(rsp,xmm1_off*wordSize));
|
||||
__ movdbl(xmm2,Address(rsp,xmm2_off*wordSize));
|
||||
__ movdbl(xmm3,Address(rsp,xmm3_off*wordSize));
|
||||
__ movdbl(xmm4,Address(rsp,xmm4_off*wordSize));
|
||||
__ movdbl(xmm5,Address(rsp,xmm5_off*wordSize));
|
||||
__ movdbl(xmm6,Address(rsp,xmm6_off*wordSize));
|
||||
__ movdbl(xmm7,Address(rsp,xmm7_off*wordSize));
|
||||
} else if (UseSSE >= 2) {
|
||||
#define STACK_ADDRESS(x) Address(rsp,(x)*wordSize + additional_frame_bytes)
|
||||
__ movdqu(xmm0,STACK_ADDRESS(xmm0_off));
|
||||
__ movdqu(xmm1,STACK_ADDRESS(xmm1_off));
|
||||
__ movdqu(xmm2,STACK_ADDRESS(xmm2_off));
|
||||
__ movdqu(xmm3,STACK_ADDRESS(xmm3_off));
|
||||
__ movdqu(xmm4,STACK_ADDRESS(xmm4_off));
|
||||
__ movdqu(xmm5,STACK_ADDRESS(xmm5_off));
|
||||
__ movdqu(xmm6,STACK_ADDRESS(xmm6_off));
|
||||
__ movdqu(xmm7,STACK_ADDRESS(xmm7_off));
|
||||
#undef STACK_ADDRESS
|
||||
}
|
||||
if (restore_vectors) {
|
||||
// Restore upper half of YMM registes.
|
||||
assert(additional_frame_bytes == 128, "");
|
||||
__ vinsertf128h(xmm0, Address(rsp, 0));
|
||||
__ vinsertf128h(xmm1, Address(rsp, 16));
|
||||
__ vinsertf128h(xmm2, Address(rsp, 32));
|
||||
__ vinsertf128h(xmm3, Address(rsp, 48));
|
||||
__ vinsertf128h(xmm4, Address(rsp, 64));
|
||||
__ vinsertf128h(xmm5, Address(rsp, 80));
|
||||
__ vinsertf128h(xmm6, Address(rsp, 96));
|
||||
__ vinsertf128h(xmm7, Address(rsp,112));
|
||||
__ addptr(rsp, additional_frame_bytes);
|
||||
}
|
||||
__ pop_FPU_state();
|
||||
__ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers
|
||||
__ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers
|
||||
|
||||
__ popf();
|
||||
__ popa();
|
||||
@ -308,6 +358,13 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
|
||||
__ addptr(rsp, return_off * wordSize);
|
||||
}
|
||||
|
||||
// Is vector's size (in bytes) bigger than a size saved by default?
|
||||
// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions.
|
||||
// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated.
|
||||
bool SharedRuntime::is_wide_vector(int size) {
|
||||
return size > 16;
|
||||
}
|
||||
|
||||
// The java_calling_convention describes stack locations as ideal slots on
|
||||
// a frame with no abi restrictions. Since we must observe abi restrictions
|
||||
// (like the placement of the register window) the slots must be biased by
|
||||
@ -2732,7 +2789,6 @@ uint SharedRuntime::out_preserve_stack_slots() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------generate_deopt_blob----------------------------
|
||||
void SharedRuntime::generate_deopt_blob() {
|
||||
// allocate space for the code
|
||||
@ -3270,7 +3326,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
|
||||
// setup oopmap, and calls safepoint code to stop the compiled code for
|
||||
// a safepoint.
|
||||
//
|
||||
SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
|
||||
SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
|
||||
|
||||
// Account for thread arg in our frame
|
||||
const int additional_words = 1;
|
||||
@ -3290,17 +3346,18 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
|
||||
const Register java_thread = rdi; // callee-saved for VC++
|
||||
address start = __ pc();
|
||||
address call_pc = NULL;
|
||||
|
||||
bool cause_return = (poll_type == POLL_AT_RETURN);
|
||||
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
|
||||
// If cause_return is true we are at a poll_return and there is
|
||||
// the return address on the stack to the caller on the nmethod
|
||||
// that is safepoint. We can leave this return on the stack and
|
||||
// effectively complete the return and safepoint in the caller.
|
||||
// Otherwise we push space for a return address that the safepoint
|
||||
// handler will install later to make the stack walking sensible.
|
||||
if( !cause_return )
|
||||
__ push(rbx); // Make room for return address (or push it again)
|
||||
if (!cause_return)
|
||||
__ push(rbx); // Make room for return address (or push it again)
|
||||
|
||||
map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
|
||||
map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors);
|
||||
|
||||
// The following is basically a call_VM. However, we need the precise
|
||||
// address of the call in order to generate an oopmap. Hence, we do all the
|
||||
@ -3312,7 +3369,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
|
||||
__ set_last_Java_frame(java_thread, noreg, noreg, NULL);
|
||||
|
||||
// if this was not a poll_return then we need to correct the return address now.
|
||||
if( !cause_return ) {
|
||||
if (!cause_return) {
|
||||
__ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
|
||||
__ movptr(Address(rbp, wordSize), rax);
|
||||
}
|
||||
@ -3340,15 +3397,14 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
|
||||
__ jcc(Assembler::equal, noException);
|
||||
|
||||
// Exception pending
|
||||
|
||||
RegisterSaver::restore_live_registers(masm);
|
||||
RegisterSaver::restore_live_registers(masm, save_vectors);
|
||||
|
||||
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
|
||||
|
||||
__ bind(noException);
|
||||
|
||||
// Normal exit, register restoring and exit
|
||||
RegisterSaver::restore_live_registers(masm);
|
||||
RegisterSaver::restore_live_registers(masm, save_vectors);
|
||||
|
||||
__ ret(0);
|
||||
|
||||
|
@ -116,8 +116,8 @@ class RegisterSaver {
|
||||
};
|
||||
|
||||
public:
|
||||
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
|
||||
static void restore_live_registers(MacroAssembler* masm);
|
||||
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
|
||||
static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
|
||||
|
||||
// Offsets into the register save area
|
||||
// Used by deoptimization when it is managing result register
|
||||
@ -134,7 +134,19 @@ class RegisterSaver {
|
||||
static void restore_result_registers(MacroAssembler* masm);
|
||||
};
|
||||
|
||||
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
|
||||
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
|
||||
int vect_words = 0;
|
||||
#ifdef COMPILER2
|
||||
if (save_vectors) {
|
||||
assert(UseAVX > 0, "256bit vectors are supported only with AVX");
|
||||
assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
|
||||
// Save upper half of YMM registes
|
||||
vect_words = 16 * 16 / wordSize;
|
||||
additional_frame_words += vect_words;
|
||||
}
|
||||
#else
|
||||
assert(!save_vectors, "vectors are generated only by C2");
|
||||
#endif
|
||||
|
||||
// Always make the frame size 16-byte aligned
|
||||
int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
|
||||
@ -155,6 +167,27 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
|
||||
__ enter(); // rsp becomes 16-byte aligned here
|
||||
__ push_CPU_state(); // Push a multiple of 16 bytes
|
||||
|
||||
if (vect_words > 0) {
|
||||
assert(vect_words*wordSize == 256, "");
|
||||
__ subptr(rsp, 256); // Save upper half of YMM registes
|
||||
__ vextractf128h(Address(rsp, 0),xmm0);
|
||||
__ vextractf128h(Address(rsp, 16),xmm1);
|
||||
__ vextractf128h(Address(rsp, 32),xmm2);
|
||||
__ vextractf128h(Address(rsp, 48),xmm3);
|
||||
__ vextractf128h(Address(rsp, 64),xmm4);
|
||||
__ vextractf128h(Address(rsp, 80),xmm5);
|
||||
__ vextractf128h(Address(rsp, 96),xmm6);
|
||||
__ vextractf128h(Address(rsp,112),xmm7);
|
||||
__ vextractf128h(Address(rsp,128),xmm8);
|
||||
__ vextractf128h(Address(rsp,144),xmm9);
|
||||
__ vextractf128h(Address(rsp,160),xmm10);
|
||||
__ vextractf128h(Address(rsp,176),xmm11);
|
||||
__ vextractf128h(Address(rsp,192),xmm12);
|
||||
__ vextractf128h(Address(rsp,208),xmm13);
|
||||
__ vextractf128h(Address(rsp,224),xmm14);
|
||||
__ vextractf128h(Address(rsp,240),xmm15);
|
||||
}
|
||||
if (frame::arg_reg_save_area_bytes != 0) {
|
||||
// Allocate argument register save area
|
||||
__ subptr(rsp, frame::arg_reg_save_area_bytes);
|
||||
@ -167,112 +200,111 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
|
||||
OopMapSet *oop_maps = new OopMapSet();
|
||||
OopMap* map = new OopMap(frame_size_in_slots, 0);
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rax_off + additional_frame_slots), rax->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rcx_off + additional_frame_slots), rcx->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rdx_off + additional_frame_slots), rdx->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rbx_off + additional_frame_slots), rbx->as_VMReg());
|
||||
|
||||
#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
|
||||
|
||||
map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
|
||||
// rbp location is known implicitly by the frame sender code, needs no oopmap
|
||||
// and the location where rbp was saved by is ignored
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rsi_off + additional_frame_slots), rsi->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rdi_off + additional_frame_slots), rdi->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r8_off + additional_frame_slots), r8->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r9_off + additional_frame_slots), r9->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r10_off + additional_frame_slots), r10->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r11_off + additional_frame_slots), r11->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r12_off + additional_frame_slots), r12->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r13_off + additional_frame_slots), r13->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r14_off + additional_frame_slots), r14->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r15_off + additional_frame_slots), r15->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm0_off + additional_frame_slots), xmm0->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm1_off + additional_frame_slots), xmm1->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm2_off + additional_frame_slots), xmm2->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm3_off + additional_frame_slots), xmm3->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm4_off + additional_frame_slots), xmm4->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm5_off + additional_frame_slots), xmm5->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm6_off + additional_frame_slots), xmm6->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm7_off + additional_frame_slots), xmm7->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm8_off + additional_frame_slots), xmm8->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm9_off + additional_frame_slots), xmm9->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm10_off + additional_frame_slots), xmm10->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm11_off + additional_frame_slots), xmm11->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm12_off + additional_frame_slots), xmm12->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm13_off + additional_frame_slots), xmm13->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm14_off + additional_frame_slots), xmm14->as_VMReg());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm15_off + additional_frame_slots), xmm15->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
|
||||
|
||||
// %%% These should all be a waste but we'll keep things as they were for now
|
||||
if (true) {
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( raxH_off + additional_frame_slots),
|
||||
rax->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rcxH_off + additional_frame_slots),
|
||||
rcx->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rdxH_off + additional_frame_slots),
|
||||
rdx->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rbxH_off + additional_frame_slots),
|
||||
rbx->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
|
||||
// rbp location is known implicitly by the frame sender code, needs no oopmap
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rsiH_off + additional_frame_slots),
|
||||
rsi->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( rdiH_off + additional_frame_slots),
|
||||
rdi->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r8H_off + additional_frame_slots),
|
||||
r8->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r9H_off + additional_frame_slots),
|
||||
r9->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r10H_off + additional_frame_slots),
|
||||
r10->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r11H_off + additional_frame_slots),
|
||||
r11->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r12H_off + additional_frame_slots),
|
||||
r12->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r13H_off + additional_frame_slots),
|
||||
r13->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r14H_off + additional_frame_slots),
|
||||
r14->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg( r15H_off + additional_frame_slots),
|
||||
r15->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm0H_off + additional_frame_slots),
|
||||
xmm0->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm1H_off + additional_frame_slots),
|
||||
xmm1->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm2H_off + additional_frame_slots),
|
||||
xmm2->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm3H_off + additional_frame_slots),
|
||||
xmm3->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm4H_off + additional_frame_slots),
|
||||
xmm4->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm5H_off + additional_frame_slots),
|
||||
xmm5->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm6H_off + additional_frame_slots),
|
||||
xmm6->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm7H_off + additional_frame_slots),
|
||||
xmm7->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm8H_off + additional_frame_slots),
|
||||
xmm8->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm9H_off + additional_frame_slots),
|
||||
xmm9->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm10H_off + additional_frame_slots),
|
||||
xmm10->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm11H_off + additional_frame_slots),
|
||||
xmm11->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm12H_off + additional_frame_slots),
|
||||
xmm12->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm13H_off + additional_frame_slots),
|
||||
xmm13->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm14H_off + additional_frame_slots),
|
||||
xmm14->as_VMReg()->next());
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm15H_off + additional_frame_slots),
|
||||
xmm15->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next());
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
|
||||
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
|
||||
if (frame::arg_reg_save_area_bytes != 0) {
|
||||
// Pop arg register save area
|
||||
__ addptr(rsp, frame::arg_reg_save_area_bytes);
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (restore_vectors) {
|
||||
// Restore upper half of YMM registes.
|
||||
assert(UseAVX > 0, "256bit vectors are supported only with AVX");
|
||||
assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
|
||||
__ vinsertf128h(xmm0, Address(rsp, 0));
|
||||
__ vinsertf128h(xmm1, Address(rsp, 16));
|
||||
__ vinsertf128h(xmm2, Address(rsp, 32));
|
||||
__ vinsertf128h(xmm3, Address(rsp, 48));
|
||||
__ vinsertf128h(xmm4, Address(rsp, 64));
|
||||
__ vinsertf128h(xmm5, Address(rsp, 80));
|
||||
__ vinsertf128h(xmm6, Address(rsp, 96));
|
||||
__ vinsertf128h(xmm7, Address(rsp,112));
|
||||
__ vinsertf128h(xmm8, Address(rsp,128));
|
||||
__ vinsertf128h(xmm9, Address(rsp,144));
|
||||
__ vinsertf128h(xmm10, Address(rsp,160));
|
||||
__ vinsertf128h(xmm11, Address(rsp,176));
|
||||
__ vinsertf128h(xmm12, Address(rsp,192));
|
||||
__ vinsertf128h(xmm13, Address(rsp,208));
|
||||
__ vinsertf128h(xmm14, Address(rsp,224));
|
||||
__ vinsertf128h(xmm15, Address(rsp,240));
|
||||
__ addptr(rsp, 256);
|
||||
}
|
||||
#else
|
||||
assert(!restore_vectors, "vectors are generated only by C2");
|
||||
#endif
|
||||
// Recover CPU state
|
||||
__ pop_CPU_state();
|
||||
// Get the rbp described implicitly by the calling convention (no oopMap)
|
||||
@ -297,6 +329,12 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
|
||||
__ addptr(rsp, return_offset_in_bytes());
|
||||
}
|
||||
|
||||
// Is vector's size (in bytes) bigger than a size saved by default?
|
||||
// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
|
||||
bool SharedRuntime::is_wide_vector(int size) {
|
||||
return size > 16;
|
||||
}
|
||||
|
||||
// The java_calling_convention describes stack locations as ideal slots on
|
||||
// a frame with no abi restrictions. Since we must observe abi restrictions
|
||||
// (like the placement of the register window) the slots must be biased by
|
||||
@ -3235,7 +3273,6 @@ uint SharedRuntime::out_preserve_stack_slots() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------generate_deopt_blob----------------------------
|
||||
void SharedRuntime::generate_deopt_blob() {
|
||||
// Allocate space for the code
|
||||
@ -3740,7 +3777,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
|
||||
// Generate a special Compile2Runtime blob that saves all registers,
|
||||
// and setup oopmap.
|
||||
//
|
||||
SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
|
||||
SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
|
||||
assert(StubRoutines::forward_exception_entry() != NULL,
|
||||
"must be generated before");
|
||||
|
||||
@ -3755,6 +3792,8 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
|
||||
address start = __ pc();
|
||||
address call_pc = NULL;
|
||||
int frame_size_in_words;
|
||||
bool cause_return = (poll_type == POLL_AT_RETURN);
|
||||
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
|
||||
|
||||
// Make room for return address (or push it again)
|
||||
if (!cause_return) {
|
||||
@ -3762,7 +3801,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
|
||||
}
|
||||
|
||||
// Save registers, fpu state, and flags
|
||||
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
|
||||
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
|
||||
|
||||
// The following is basically a call_VM. However, we need the precise
|
||||
// address of the call in order to generate an oopmap. Hence, we do all the
|
||||
@ -3799,7 +3838,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
|
||||
|
||||
// Exception pending
|
||||
|
||||
RegisterSaver::restore_live_registers(masm);
|
||||
RegisterSaver::restore_live_registers(masm, save_vectors);
|
||||
|
||||
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
|
||||
|
||||
@ -3807,7 +3846,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
|
||||
__ bind(noException);
|
||||
|
||||
// Normal exit, restore registers and exit.
|
||||
RegisterSaver::restore_live_registers(masm);
|
||||
RegisterSaver::restore_live_registers(masm, save_vectors);
|
||||
|
||||
__ ret(0);
|
||||
|
||||
|
@ -498,6 +498,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
case Op_PopCountL:
|
||||
if (!UsePopCountInstruction)
|
||||
return false;
|
||||
break;
|
||||
case Op_MulVI:
|
||||
if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
|
||||
return false;
|
||||
|
@ -346,7 +346,8 @@ void Compilation::install_code(int frame_size) {
|
||||
implicit_exception_table(),
|
||||
compiler(),
|
||||
_env->comp_level(),
|
||||
has_unsafe_access()
|
||||
has_unsafe_access(),
|
||||
SharedRuntime::is_wide_vector(max_vector_size())
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -127,6 +127,7 @@ class Compilation: public StackObj {
|
||||
bool has_exception_handlers() const { return _has_exception_handlers; }
|
||||
bool has_fpu_code() const { return _has_fpu_code; }
|
||||
bool has_unsafe_access() const { return _has_unsafe_access; }
|
||||
int max_vector_size() const { return 0; }
|
||||
ciMethod* method() const { return _method; }
|
||||
int osr_bci() const { return _osr_bci; }
|
||||
bool is_osr_compile() const { return osr_bci() >= 0; }
|
||||
|
@ -921,7 +921,8 @@ void ciEnv::register_method(ciMethod* target,
|
||||
ImplicitExceptionTable* inc_table,
|
||||
AbstractCompiler* compiler,
|
||||
int comp_level,
|
||||
bool has_unsafe_access) {
|
||||
bool has_unsafe_access,
|
||||
bool has_wide_vectors) {
|
||||
VM_ENTRY_MARK;
|
||||
nmethod* nm = NULL;
|
||||
{
|
||||
@ -1016,6 +1017,7 @@ void ciEnv::register_method(ciMethod* target,
|
||||
}
|
||||
} else {
|
||||
nm->set_has_unsafe_access(has_unsafe_access);
|
||||
nm->set_has_wide_vectors(has_wide_vectors);
|
||||
|
||||
// Record successful registration.
|
||||
// (Put nm into the task handle *before* publishing to the Java heap.)
|
||||
|
@ -362,7 +362,8 @@ public:
|
||||
ImplicitExceptionTable* inc_table,
|
||||
AbstractCompiler* compiler,
|
||||
int comp_level,
|
||||
bool has_unsafe_access);
|
||||
bool has_unsafe_access,
|
||||
bool has_wide_vectors);
|
||||
|
||||
|
||||
// Access to certain well known ciObjects.
|
||||
|
@ -463,6 +463,7 @@ void nmethod::init_defaults() {
|
||||
_has_unsafe_access = 0;
|
||||
_has_method_handle_invokes = 0;
|
||||
_lazy_critical_native = 0;
|
||||
_has_wide_vectors = 0;
|
||||
_marked_for_deoptimization = 0;
|
||||
_lock_count = 0;
|
||||
_stack_traversal_mark = 0;
|
||||
|
@ -177,6 +177,7 @@ class nmethod : public CodeBlob {
|
||||
unsigned int _has_unsafe_access:1; // May fault due to unsafe access.
|
||||
unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes?
|
||||
unsigned int _lazy_critical_native:1; // Lazy JNI critical native
|
||||
unsigned int _has_wide_vectors:1; // Preserve wide vectors at safepoints
|
||||
|
||||
// Protected by Patching_lock
|
||||
unsigned char _state; // {alive, not_entrant, zombie, unloaded}
|
||||
@ -442,6 +443,9 @@ class nmethod : public CodeBlob {
|
||||
bool is_lazy_critical_native() const { return _lazy_critical_native; }
|
||||
void set_lazy_critical_native(bool z) { _lazy_critical_native = z; }
|
||||
|
||||
bool has_wide_vectors() const { return _has_wide_vectors; }
|
||||
void set_has_wide_vectors(bool z) { _has_wide_vectors = z; }
|
||||
|
||||
int comp_level() const { return _comp_level; }
|
||||
|
||||
// Support for oops in scopes and relocs:
|
||||
|
@ -825,7 +825,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
|
||||
&_handler_table, &_inc_table,
|
||||
compiler,
|
||||
env()->comp_level(),
|
||||
has_unsafe_access()
|
||||
has_unsafe_access(),
|
||||
SharedRuntime::is_wide_vector(max_vector_size())
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -963,6 +964,7 @@ void Compile::Init(int aliaslevel) {
|
||||
_trap_can_recompile = false; // no traps emitted yet
|
||||
_major_progress = true; // start out assuming good things will happen
|
||||
set_has_unsafe_access(false);
|
||||
set_max_vector_size(0);
|
||||
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
|
||||
set_decompile_count(0);
|
||||
|
||||
|
@ -279,6 +279,7 @@ class Compile : public Phase {
|
||||
bool _has_split_ifs; // True if the method _may_ have some split-if
|
||||
bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores.
|
||||
bool _has_stringbuilder; // True StringBuffers or StringBuilders are allocated
|
||||
int _max_vector_size; // Maximum size of generated vectors
|
||||
uint _trap_hist[trapHistLength]; // Cumulative traps
|
||||
bool _trap_can_recompile; // Have we emitted a recompiling trap?
|
||||
uint _decompile_count; // Cumulative decompilation counts.
|
||||
@ -443,6 +444,8 @@ class Compile : public Phase {
|
||||
void set_has_unsafe_access(bool z) { _has_unsafe_access = z; }
|
||||
bool has_stringbuilder() const { return _has_stringbuilder; }
|
||||
void set_has_stringbuilder(bool z) { _has_stringbuilder = z; }
|
||||
int max_vector_size() const { return _max_vector_size; }
|
||||
void set_max_vector_size(int s) { _max_vector_size = s; }
|
||||
void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; }
|
||||
uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
|
||||
bool trap_can_recompile() const { return _trap_can_recompile; }
|
||||
|
@ -1869,7 +1869,9 @@ void Compile::ScheduleAndBundle() {
|
||||
if (!do_scheduling())
|
||||
return;
|
||||
|
||||
assert(MaxVectorSize <= 8, "scheduling code works only with pairs");
|
||||
// Scheduling code works only with pairs (8 bytes) maximum.
|
||||
if (max_vector_size() > 8)
|
||||
return;
|
||||
|
||||
NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
|
||||
|
||||
|
@ -1350,11 +1350,14 @@ void SuperWord::output() {
|
||||
insert_extracts(_packset.at(i));
|
||||
}
|
||||
|
||||
Compile* C = _phase->C;
|
||||
uint max_vlen_in_bytes = 0;
|
||||
for (int i = 0; i < _block.length(); i++) {
|
||||
Node* n = _block.at(i);
|
||||
Node_List* p = my_pack(n);
|
||||
if (p && n == executed_last(p)) {
|
||||
uint vlen = p->size();
|
||||
uint vlen_in_bytes = 0;
|
||||
Node* vn = NULL;
|
||||
Node* low_adr = p->at(0);
|
||||
Node* first = executed_first(p);
|
||||
@ -1364,7 +1367,8 @@ void SuperWord::output() {
|
||||
Node* mem = first->in(MemNode::Memory);
|
||||
Node* adr = low_adr->in(MemNode::Address);
|
||||
const TypePtr* atyp = n->adr_type();
|
||||
vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
|
||||
vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
|
||||
vlen_in_bytes = vn->as_LoadVector()->memory_size();
|
||||
} else if (n->is_Store()) {
|
||||
// Promote value to be stored to vector
|
||||
Node* val = vector_opd(p, MemNode::ValueIn);
|
||||
@ -1372,7 +1376,8 @@ void SuperWord::output() {
|
||||
Node* mem = first->in(MemNode::Memory);
|
||||
Node* adr = low_adr->in(MemNode::Address);
|
||||
const TypePtr* atyp = n->adr_type();
|
||||
vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
|
||||
vn = StoreVectorNode::make(C, opc, ctl, mem, adr, atyp, val, vlen);
|
||||
vlen_in_bytes = vn->as_StoreVector()->memory_size();
|
||||
} else if (n->req() == 3) {
|
||||
// Promote operands to vector
|
||||
Node* in1 = vector_opd(p, 1);
|
||||
@ -1383,7 +1388,8 @@ void SuperWord::output() {
|
||||
in1 = in2;
|
||||
in2 = tmp;
|
||||
}
|
||||
vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n));
|
||||
vn = VectorNode::make(C, opc, in1, in2, vlen, velt_basic_type(n));
|
||||
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
@ -1395,6 +1401,10 @@ void SuperWord::output() {
|
||||
_igvn.replace_node(pm, vn);
|
||||
}
|
||||
_igvn._worklist.push(vn);
|
||||
|
||||
if (vlen_in_bytes > max_vlen_in_bytes) {
|
||||
max_vlen_in_bytes = vlen_in_bytes;
|
||||
}
|
||||
#ifdef ASSERT
|
||||
if (TraceNewVectors) {
|
||||
tty->print("new Vector node: ");
|
||||
@ -1403,6 +1413,7 @@ void SuperWord::output() {
|
||||
#endif
|
||||
}
|
||||
}
|
||||
C->set_max_vector_size(max_vlen_in_bytes);
|
||||
}
|
||||
|
||||
//------------------------------vector_opd---------------------------
|
||||
@ -1439,7 +1450,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
|
||||
}
|
||||
assert(opd->bottom_type()->isa_int(), "int type only");
|
||||
// Move non constant shift count into XMM register.
|
||||
cnt = new (_phase->C, 2) MoveI2FNode(cnt);
|
||||
cnt = new (C, 2) MoveI2FNode(cnt);
|
||||
}
|
||||
if (cnt != opd) {
|
||||
_phase->_igvn.register_new_node_with_optimizer(cnt);
|
||||
@ -1480,10 +1491,10 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
|
||||
_phase->_igvn.register_new_node_with_optimizer(pk);
|
||||
_phase->set_ctrl(pk, _phase->get_ctrl(opd));
|
||||
#ifdef ASSERT
|
||||
if (TraceNewVectors) {
|
||||
tty->print("new Vector node: ");
|
||||
pk->dump();
|
||||
}
|
||||
if (TraceNewVectors) {
|
||||
tty->print("new Vector node: ");
|
||||
pk->dump();
|
||||
}
|
||||
#endif
|
||||
return pk;
|
||||
}
|
||||
|
@ -88,6 +88,7 @@ RuntimeStub* SharedRuntime::_resolve_virtual_call_blob;
|
||||
RuntimeStub* SharedRuntime::_resolve_static_call_blob;
|
||||
|
||||
DeoptimizationBlob* SharedRuntime::_deopt_blob;
|
||||
SafepointBlob* SharedRuntime::_polling_page_vectors_safepoint_handler_blob;
|
||||
SafepointBlob* SharedRuntime::_polling_page_safepoint_handler_blob;
|
||||
SafepointBlob* SharedRuntime::_polling_page_return_handler_blob;
|
||||
|
||||
@ -104,8 +105,14 @@ void SharedRuntime::generate_stubs() {
|
||||
_resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C), "resolve_virtual_call");
|
||||
_resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C), "resolve_static_call");
|
||||
|
||||
_polling_page_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), false);
|
||||
_polling_page_return_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), true);
|
||||
#ifdef COMPILER2
|
||||
// Vectors are generated only by C2.
|
||||
if (is_wide_vector(MaxVectorSize)) {
|
||||
_polling_page_vectors_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_VECTOR_LOOP);
|
||||
}
|
||||
#endif // COMPILER2
|
||||
_polling_page_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_LOOP);
|
||||
_polling_page_return_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_RETURN);
|
||||
|
||||
generate_deopt_blob();
|
||||
|
||||
@ -535,10 +542,15 @@ address SharedRuntime::get_poll_stub(address pc) {
|
||||
"Only polling locations are used for safepoint");
|
||||
|
||||
bool at_poll_return = ((nmethod*)cb)->is_at_poll_return(pc);
|
||||
bool has_wide_vectors = ((nmethod*)cb)->has_wide_vectors();
|
||||
if (at_poll_return) {
|
||||
assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
|
||||
"polling page return stub not created yet");
|
||||
stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
|
||||
} else if (has_wide_vectors) {
|
||||
assert(SharedRuntime::polling_page_vectors_safepoint_handler_blob() != NULL,
|
||||
"polling page vectors safepoint stub not created yet");
|
||||
stub = SharedRuntime::polling_page_vectors_safepoint_handler_blob()->entry_point();
|
||||
} else {
|
||||
assert(SharedRuntime::polling_page_safepoint_handler_blob() != NULL,
|
||||
"polling page safepoint stub not created yet");
|
||||
|
@ -62,6 +62,7 @@ class SharedRuntime: AllStatic {
|
||||
|
||||
static DeoptimizationBlob* _deopt_blob;
|
||||
|
||||
static SafepointBlob* _polling_page_vectors_safepoint_handler_blob;
|
||||
static SafepointBlob* _polling_page_safepoint_handler_blob;
|
||||
static SafepointBlob* _polling_page_return_handler_blob;
|
||||
|
||||
@ -75,7 +76,8 @@ class SharedRuntime: AllStatic {
|
||||
#endif // !PRODUCT
|
||||
|
||||
private:
|
||||
static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return);
|
||||
enum { POLL_AT_RETURN, POLL_AT_LOOP, POLL_AT_VECTOR_LOOP };
|
||||
static SafepointBlob* generate_handler_blob(address call_ptr, int poll_type);
|
||||
static RuntimeStub* generate_resolve_blob(address destination, const char* name);
|
||||
|
||||
public:
|
||||
@ -223,6 +225,7 @@ class SharedRuntime: AllStatic {
|
||||
|
||||
static SafepointBlob* polling_page_return_handler_blob() { return _polling_page_return_handler_blob; }
|
||||
static SafepointBlob* polling_page_safepoint_handler_blob() { return _polling_page_safepoint_handler_blob; }
|
||||
static SafepointBlob* polling_page_vectors_safepoint_handler_blob() { return _polling_page_vectors_safepoint_handler_blob; }
|
||||
|
||||
// Counters
|
||||
#ifndef PRODUCT
|
||||
@ -416,6 +419,10 @@ class SharedRuntime: AllStatic {
|
||||
// when an interrupt occurs.
|
||||
static uint out_preserve_stack_slots();
|
||||
|
||||
// Is vector's size (in bytes) bigger than a size saved by default?
|
||||
// For example, on x86 16 bytes XMM registers are saved by default.
|
||||
static bool is_wide_vector(int size);
|
||||
|
||||
// Save and restore a native result
|
||||
static void save_native_result(MacroAssembler *_masm, BasicType ret_type, int frame_slots );
|
||||
static void restore_native_result(MacroAssembler *_masm, BasicType ret_type, int frame_slots );
|
||||
|
189
hotspot/test/compiler/7196199/Test7196199.java
Normal file
189
hotspot/test/compiler/7196199/Test7196199.java
Normal file
@ -0,0 +1,189 @@
|
||||
/*
|
||||
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 7196199
|
||||
* @summary java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
|
||||
*
|
||||
* @run main/othervm/timeout=400 -Xmx32m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:CompileCommand=exclude,Test7196199.test -XX:+SafepointALot -XX:GuaranteedSafepointInterval=100 Test7196199
|
||||
*/
|
||||
|
||||
|
||||
public class Test7196199 {
|
||||
private static final int ARRLEN = 97;
|
||||
private static final int ITERS = 5000;
|
||||
private static final int INI_ITERS = 1000;
|
||||
private static final int SFP_ITERS = 10000;
|
||||
private static final float SFP_ITERS_F = 10000.f;
|
||||
private static final float VALUE = 15.f;
|
||||
public static void main(String args[]) {
|
||||
int errn = test();
|
||||
if (errn > 0) {
|
||||
System.err.println("FAILED: " + errn + " errors");
|
||||
System.exit(97);
|
||||
}
|
||||
System.out.println("PASSED");
|
||||
}
|
||||
|
||||
static int test() {
|
||||
float[] a0 = new float[ARRLEN];
|
||||
float[] a1 = new float[ARRLEN];
|
||||
// Initialize
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
a0[i] = 0.f;
|
||||
a1[i] = (float)i;
|
||||
}
|
||||
System.out.println("Warmup");
|
||||
for (int i=0; i<INI_ITERS; i++) {
|
||||
test_incrc(a0);
|
||||
test_incrv(a0, VALUE);
|
||||
test_addc(a0, a1);
|
||||
test_addv(a0, a1, VALUE);
|
||||
}
|
||||
// Test and verify results
|
||||
System.out.println("Verification");
|
||||
int errn = 0;
|
||||
for (int i=0; i<ARRLEN; i++)
|
||||
a0[i] = 0.f;
|
||||
|
||||
System.out.println(" test_incrc");
|
||||
for (int j=0; j<ITERS; j++) {
|
||||
test_incrc(a0);
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
errn += verify("test_incrc: ", i, a0[i], VALUE*SFP_ITERS_F);
|
||||
a0[i] = 0.f; // Reset
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(" test_incrv");
|
||||
for (int j=0; j<ITERS; j++) {
|
||||
test_incrv(a0, VALUE);
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
errn += verify("test_incrv: ", i, a0[i], VALUE*SFP_ITERS_F);
|
||||
a0[i] = 0.f; // Reset
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(" test_addc");
|
||||
for (int j=0; j<ITERS; j++) {
|
||||
test_addc(a0, a1);
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
errn += verify("test_addc: ", i, a0[i], ((float)i + VALUE)*SFP_ITERS_F);
|
||||
a0[i] = 0.f; // Reset
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(" test_addv");
|
||||
for (int j=0; j<ITERS; j++) {
|
||||
test_addv(a0, a1, VALUE);
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
errn += verify("test_addv: ", i, a0[i], ((float)i + VALUE)*SFP_ITERS_F);
|
||||
a0[i] = 0.f; // Reset
|
||||
}
|
||||
}
|
||||
|
||||
if (errn > 0)
|
||||
return errn;
|
||||
|
||||
System.out.println("Time");
|
||||
long start, end;
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<INI_ITERS; i++) {
|
||||
test_incrc(a0);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_incrc: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<INI_ITERS; i++) {
|
||||
test_incrv(a0, VALUE);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_incrv: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<INI_ITERS; i++) {
|
||||
test_addc(a0, a1);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_addc: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<INI_ITERS; i++) {
|
||||
test_addv(a0, a1, VALUE);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_addv: " + (end - start));
|
||||
|
||||
return errn;
|
||||
}
|
||||
|
||||
static void test_incrc(float[] a0) {
|
||||
// Non-counted loop with safepoint.
|
||||
for (long l = 0; l < SFP_ITERS; l++) {
|
||||
// Counted and vectorized loop.
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] += VALUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
static void test_incrv(float[] a0, float b) {
|
||||
// Non-counted loop with safepoint.
|
||||
for (long l = 0; l < SFP_ITERS; l++) {
|
||||
// Counted and vectorized loop.
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] += b;
|
||||
}
|
||||
}
|
||||
}
|
||||
static void test_addc(float[] a0, float[] a1) {
|
||||
// Non-counted loop with safepoint.
|
||||
for (long l = 0; l < SFP_ITERS; l++) {
|
||||
// Counted and vectorized loop.
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] += a1[i]+VALUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
static void test_addv(float[] a0, float[] a1, float b) {
|
||||
// Non-counted loop with safepoint.
|
||||
for (long l = 0; l < SFP_ITERS; l++) {
|
||||
// Counted and vectorized loop.
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] += a1[i]+b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int verify(String text, int i, float elem, float val) {
|
||||
if (elem != val) {
|
||||
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user