From b73ef8ebc6a059e4f200d7ca20c5ebb720ebdb18 Mon Sep 17 00:00:00 2001 From: Ed Nevill Date: Fri, 17 Jul 2015 07:50:36 +0000 Subject: [PATCH 1/7] 8131362: aarch64: C2 does not handle large stack offsets Change spill code to allow large offsets Reviewed-by: kvn, aph --- hotspot/src/cpu/aarch64/vm/aarch64.ad | 401 ++++-------------- .../cpu/aarch64/vm/macroAssembler_aarch64.cpp | 22 + .../cpu/aarch64/vm/macroAssembler_aarch64.hpp | 44 ++ 3 files changed, 159 insertions(+), 308 deletions(-) diff --git a/hotspot/src/cpu/aarch64/vm/aarch64.ad b/hotspot/src/cpu/aarch64/vm/aarch64.ad index 5ece17d1c6d..bef849a2050 100644 --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad @@ -2167,8 +2167,12 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo return 0; // Self copy, no move. } + bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && + (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + if (bottom_type()->isa_vect() != NULL) { - uint len = 4; uint ireg = ideal_reg(); assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); if (cbuf) { @@ -2176,334 +2180,115 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { // stack->stack - int src_offset = ra_->reg2offset(src_lo); - int dst_offset = ra_->reg2offset(dst_lo); assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset"); - len = 8; if (ireg == Op_VecD) { - __ ldr(rscratch1, Address(sp, src_offset)); - __ str(rscratch1, Address(sp, dst_offset)); + __ unspill(rscratch1, true, src_offset); + __ spill(rscratch1, true, dst_offset); } else { - if (src_offset < 512) { - __ ldp(rscratch1, rscratch2, Address(sp, src_offset)); - } else { - __ ldr(rscratch1, Address(sp, src_offset)); - __ ldr(rscratch2, Address(sp, src_offset+4)); - len += 4; - } - if (dst_offset < 512) { - __ stp(rscratch1, rscratch2, Address(sp, dst_offset)); - } else { - __ str(rscratch1, Address(sp, dst_offset)); - __ str(rscratch2, Address(sp, dst_offset+4)); - len += 4; - } + __ spill_copy128(src_offset, dst_offset); } } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { - __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), + __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]), ireg == Op_VecD ? __ T8B : __ T16B, - as_FloatRegister(Matcher::_regEncode[src_lo]), as_FloatRegister(Matcher::_regEncode[src_lo])); } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { - __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), - ireg == Op_VecD ? __ D : __ Q, - Address(sp, ra_->reg2offset(dst_lo))); + __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(dst_lo)); } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { - __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), - ireg == Op_VecD ? __ D : __ Q, - Address(sp, ra_->reg2offset(src_lo))); + __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(src_lo)); } else { ShouldNotReachHere(); } - } else if (st) { - if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { - // stack->stack - int src_offset = ra_->reg2offset(src_lo); - int dst_offset = ra_->reg2offset(dst_lo); - if (ireg == Op_VecD) { - st->print("ldr rscratch1, [sp, #%d]", src_offset); - st->print("str rscratch1, [sp, #%d]", dst_offset); + } + } else if (cbuf) { + MacroAssembler _masm(cbuf); + switch (src_lo_rc) { + case rc_int: + if (dst_lo_rc == rc_int) { // gpr --> gpr copy + if (is64) { + __ mov(as_Register(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); } else { - if (src_offset < 512) { - st->print("ldp rscratch1, rscratch2, [sp, #%d]", src_offset); - } else { - st->print("ldr rscratch1, [sp, #%d]", src_offset); - st->print("\nldr rscratch2, [sp, #%d]", src_offset+4); - } - if (dst_offset < 512) { - st->print("\nstp rscratch1, rscratch2, [sp, #%d]", dst_offset); - } else { - st->print("\nstr rscratch1, [sp, #%d]", dst_offset); - st->print("\nstr rscratch2, [sp, #%d]", dst_offset+4); - } + MacroAssembler _masm(cbuf); + __ movw(as_Register(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); } - st->print("\t# vector spill, stack to stack"); - } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { - st->print("mov %s, %s\t# vector spill, reg to reg", - Matcher::regName[dst_lo], Matcher::regName[src_lo]); - } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { - st->print("str %s, [sp, #%d]\t# vector spill, reg to stack", - Matcher::regName[src_lo], ra_->reg2offset(dst_lo)); - } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { - st->print("ldr %s, [sp, #%d]\t# vector spill, stack to reg", - Matcher::regName[dst_lo], ra_->reg2offset(src_lo)); + } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy + if (is64) { + __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } + } else { // gpr --> stack spill + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); } - } - return len; - } - - switch (src_lo_rc) { - case rc_int: - if (dst_lo_rc == rc_int) { // gpr --> gpr copy - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ mov(as_Register(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("mov %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); + break; + case rc_float: + if (dst_lo_rc == rc_int) { // fpr --> gpr copy + if (is64) { + __ fmovd(as_Register(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_Register(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ movw(as_Register(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("movw %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); + } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy + if (cbuf) { + __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); } + } else { // fpr --> stack spill + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), + is64 ? __ D : __ S, dst_offset); } - } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovd %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovs %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } + break; + case rc_stack: + if (dst_lo_rc == rc_int) { // stack --> gpr load + __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); + } else if (dst_lo_rc == rc_float) { // stack --> fpr load + __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), + is64 ? __ D : __ S, src_offset); + } else { // stack --> stack copy + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ unspill(rscratch1, is64, src_offset); + __ spill(rscratch1, is64, dst_offset); } - } else { // gpr --> stack spill - assert(dst_lo_rc == rc_stack, "spill to bad register class"); - int dst_offset = ra_->reg2offset(dst_lo); - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ str(as_Register(Matcher::_regEncode[src_lo]), - Address(sp, dst_offset)); - } else if (st) { - st->print("str %s, [sp, #%d]\t# spill", - Matcher::regName[src_lo], - dst_offset); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ strw(as_Register(Matcher::_regEncode[src_lo]), - Address(sp, dst_offset)); - } else if (st) { - st->print("strw %s, [sp, #%d]\t# spill", - Matcher::regName[src_lo], - dst_offset); - } - } - } - return 4; - case rc_float: - if (dst_lo_rc == rc_int) { // fpr --> gpr copy - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovd(as_Register(Matcher::_regEncode[dst_lo]), - as_FloatRegister(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovd %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovs(as_Register(Matcher::_regEncode[dst_lo]), - as_FloatRegister(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovs %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } - } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), - as_FloatRegister(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovd %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), - as_FloatRegister(Matcher::_regEncode[src_lo])); - } else if (st) { - st->print("fmovs %s, %s\t# shuffle", - Matcher::regName[dst_lo], - Matcher::regName[src_lo]); - } - } - } else { // fpr --> stack spill - assert(dst_lo_rc == rc_stack, "spill to bad register class"); - int dst_offset = ra_->reg2offset(dst_lo); - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ strd(as_FloatRegister(Matcher::_regEncode[src_lo]), - Address(sp, dst_offset)); - } else if (st) { - st->print("strd %s, [sp, #%d]\t# spill", - Matcher::regName[src_lo], - dst_offset); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ strs(as_FloatRegister(Matcher::_regEncode[src_lo]), - Address(sp, dst_offset)); - } else if (st) { - st->print("strs %s, [sp, #%d]\t# spill", - Matcher::regName[src_lo], - dst_offset); - } - } - } - return 4; - case rc_stack: - int src_offset = ra_->reg2offset(src_lo); - if (dst_lo_rc == rc_int) { // stack --> gpr load - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldr(as_Register(Matcher::_regEncode[dst_lo]), - Address(sp, src_offset)); - } else if (st) { - st->print("ldr %s, [sp, %d]\t# restore", - Matcher::regName[dst_lo], - src_offset); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldrw(as_Register(Matcher::_regEncode[dst_lo]), - Address(sp, src_offset)); - } else if (st) { - st->print("ldr %s, [sp, %d]\t# restore", - Matcher::regName[dst_lo], - src_offset); - } - } - return 4; - } else if (dst_lo_rc == rc_float) { // stack --> fpr load - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldrd(as_FloatRegister(Matcher::_regEncode[dst_lo]), - Address(sp, src_offset)); - } else if (st) { - st->print("ldrd %s, [sp, %d]\t# restore", - Matcher::regName[dst_lo], - src_offset); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldrs(as_FloatRegister(Matcher::_regEncode[dst_lo]), - Address(sp, src_offset)); - } else if (st) { - st->print("ldrs %s, [sp, %d]\t# restore", - Matcher::regName[dst_lo], - src_offset); - } - } - return 4; - } else { // stack --> stack copy - assert(dst_lo_rc == rc_stack, "spill to bad register class"); - int dst_offset = ra_->reg2offset(dst_lo); - if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) && - (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) { - // 64 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldr(rscratch1, Address(sp, src_offset)); - __ str(rscratch1, Address(sp, dst_offset)); - } else if (st) { - st->print("ldr rscratch1, [sp, %d]\t# mem-mem spill", - src_offset); - st->print("\n\t"); - st->print("str rscratch1, [sp, %d]", - dst_offset); - } - } else { - // 32 bit - if (cbuf) { - MacroAssembler _masm(cbuf); - __ ldrw(rscratch1, Address(sp, src_offset)); - __ strw(rscratch1, Address(sp, dst_offset)); - } else if (st) { - st->print("ldrw rscratch1, [sp, %d]\t# mem-mem spill", - src_offset); - st->print("\n\t"); - st->print("strw rscratch1, [sp, %d]", - dst_offset); - } - } - return 8; + break; + default: + assert(false, "bad rc_class for spill"); + ShouldNotReachHere(); + } + } + + if (st) { + st->print("spill "); + if (src_lo_rc == rc_stack) { + st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo)); + } else { + st->print("%s -> ", Matcher::regName[src_lo]); + } + if (dst_lo_rc == rc_stack) { + st->print("[sp, #%d]", ra_->reg2offset(dst_lo)); + } else { + st->print("%s", Matcher::regName[dst_lo]); + } + if (bottom_type()->isa_vect() != NULL) { + st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128); + } else { + st->print("\t# spill size = %d", is64 ? 64:32); } } - assert(false," bad rc_class for spill "); - Unimplemented(); return 0; } @@ -2522,7 +2307,7 @@ void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { } uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { - return implementation(NULL, ra_, true, NULL); + return MachNode::size(ra_); } //============================================================================= diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp index 4fe853e1f86..309191390ea 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -2306,6 +2306,28 @@ Address MacroAssembler::offsetted_address(Register r, Register r1, } } +Address MacroAssembler::spill_address(int size, int offset, Register tmp) +{ + assert(offset >= 0, "spill to negative address?"); + // Offset reachable ? + // Not aligned - 9 bits signed offset + // Aligned - 12 bits unsigned offset shifted + Register base = sp; + if ((offset & (size-1)) && offset >= (1<<8)) { + add(tmp, base, offset & ((1<<12)-1)); + base = tmp; + offset &= -1<<12; + } + + if (offset >= (1<<12) * size) { + add(tmp, base, offset & (((1<<12)-1)<<12)); + base = tmp; + offset &= ~(((1<<12)-1)<<12); + } + + return Address(base, offset); +} + /** * Multiply 64 bit by 64 bit first loop. */ diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index b3544ee625d..3a73655ac17 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -468,6 +468,10 @@ public: void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32); + void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { + orr(Vd, T, Vn, Vn); + } + // macro instructions for accessing and updating floating point // status register // @@ -1161,6 +1165,46 @@ private: // Uses rscratch2. Address offsetted_address(Register r, Register r1, Address::extend ext, int offset, int size); + +private: + // Returns an address on the stack which is reachable with a ldr/str of size + // Uses rscratch2 if the address is not directly reachable + Address spill_address(int size, int offset, Register tmp=rscratch2); + +public: + void spill(Register Rx, bool is64, int offset) { + if (is64) { + str(Rx, spill_address(8, offset)); + } else { + strw(Rx, spill_address(4, offset)); + } + } + void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { + str(Vx, T, spill_address(1 << (int)T, offset)); + } + void unspill(Register Rx, bool is64, int offset) { + if (is64) { + ldr(Rx, spill_address(8, offset)); + } else { + ldrw(Rx, spill_address(4, offset)); + } + } + void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { + ldr(Vx, T, spill_address(1 << (int)T, offset)); + } + void spill_copy128(int src_offset, int dst_offset, + Register tmp1=rscratch1, Register tmp2=rscratch2) { + if (src_offset < 512 && (src_offset & 7) == 0 && + dst_offset < 512 && (dst_offset & 7) == 0) { + ldp(tmp1, tmp2, Address(sp, src_offset)); + stp(tmp1, tmp2, Address(sp, dst_offset)); + } else { + unspill(tmp1, true, src_offset); + spill(tmp1, true, dst_offset); + unspill(tmp1, true, src_offset+8); + spill(tmp1, true, dst_offset+8); + } + } }; #ifdef ASSERT From 244435704bce329e44815b7d9248e351d7a12442 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 20 Jul 2015 11:41:34 +0100 Subject: [PATCH 2/7] 8131779: AARCH64: add Montgomery multiply intrinsic Add Montgomery multiply intrinsic for AArch64. Reviewed-by: kvn --- .../cpu/aarch64/vm/macroAssembler_aarch64.cpp | 8 + .../cpu/aarch64/vm/macroAssembler_aarch64.hpp | 8 + .../cpu/aarch64/vm/stubGenerator_aarch64.cpp | 858 +++++++++++++++++- .../src/cpu/aarch64/vm/vm_version_aarch64.cpp | 7 + 4 files changed, 869 insertions(+), 12 deletions(-) diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp index b47d57f1cdb..93b951081fd 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -2008,6 +2008,14 @@ void MacroAssembler::addw(Register Rd, Register Rn, RegisterOrConstant increment } } +void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) { + if (decrement.is_register()) { + sub(Rd, Rn, decrement.as_register()); + } else { + sub(Rd, Rn, decrement.as_constant()); + } +} + void MacroAssembler::reinit_heapbase() { if (UseCompressedOops) { diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index b3544ee625d..a2d0a93771a 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -464,6 +464,13 @@ public: mov(dst, (long)i); } + void mov(Register dst, RegisterOrConstant src) { + if (src.is_register()) + mov(dst, src.as_register()); + else + mov(dst, src.as_constant()); + } + void movptr(Register r, uintptr_t imm64); void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32); @@ -1045,6 +1052,7 @@ public: void add(Register Rd, Register Rn, RegisterOrConstant increment); void addw(Register Rd, Register Rn, RegisterOrConstant increment); + void sub(Register Rd, Register Rn, RegisterOrConstant decrement); void adrp(Register reg1, const Address &dest, unsigned long &byte_offset); diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp index a7fcaca9788..ec45397fd26 100644 --- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp @@ -120,10 +120,8 @@ class StubGenerator: public StubCodeGenerator { // we save r19-r28 which Java uses as scratch registers and C // expects to be callee-save // - // we don't save any FP registers since only v8-v15 are callee-save - // (strictly only the f and d components) and Java uses them as - // callee-save. v0-v7 are arg registers and C treats v16-v31 as - // volatile (as does Java?) + // we save the bottom 64 bits of each value stored in v8-v15; it is + // the responsibility of the caller to preserve larger values. // // so the stub frame looks like this when we enter Java code // @@ -131,14 +129,14 @@ class StubGenerator: public StubCodeGenerator { // [ argument word n ] // ... // -27 [ argument word 1 ] - // -26 [ saved d15 ] <--- sp_after_call - // -25 [ saved d14 ] - // -24 [ saved d13 ] - // -23 [ saved d12 ] - // -22 [ saved d11 ] - // -21 [ saved d10 ] - // -20 [ saved d9 ] - // -19 [ saved d8 ] + // -26 [ saved v15 ] <--- sp_after_call + // -25 [ saved v14 ] + // -24 [ saved v13 ] + // -23 [ saved v12 ] + // -22 [ saved v11 ] + // -21 [ saved v10 ] + // -20 [ saved v9 ] + // -19 [ saved v8 ] // -18 [ saved r28 ] // -17 [ saved r27 ] // -16 [ saved r26 ] @@ -2544,6 +2542,828 @@ class StubGenerator: public StubCodeGenerator { return stub->entry_point(); } + class MontgomeryMultiplyGenerator : public MacroAssembler { + + Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn, + Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; + + RegSet _toSave; + bool _squaring; + + public: + MontgomeryMultiplyGenerator (Assembler *as, bool squaring) + : MacroAssembler(as->code()), _squaring(squaring) { + + // Register allocation + + Register reg = c_rarg0; + Pa_base = reg; // Argument registers + if (squaring) + Pb_base = Pa_base; + else + Pb_base = ++reg; + Pn_base = ++reg; + Rlen= ++reg; + inv = ++reg; + Pm_base = ++reg; + + // Working registers: + Ra = ++reg; // The current digit of a, b, n, and m. + Rb = ++reg; + Rm = ++reg; + Rn = ++reg; + + Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m. + Pb = ++reg; + Pm = ++reg; + Pn = ++reg; + + t0 = ++reg; // Three registers which form a + t1 = ++reg; // triple-precision accumuator. + t2 = ++reg; + + Ri = ++reg; // Inner and outer loop indexes. + Rj = ++reg; + + Rhi_ab = ++reg; // Product registers: low and high parts + Rlo_ab = ++reg; // of a*b and m*n. + Rhi_mn = ++reg; + Rlo_mn = ++reg; + + // r19 and up are callee-saved. + _toSave = RegSet::range(r19, reg) + Pm_base; + } + + private: + void save_regs() { + push(_toSave, sp); + } + + void restore_regs() { + pop(_toSave, sp); + } + + template + void unroll_2(Register count, T block) { + Label loop, end, odd; + tbnz(count, 0, odd); + cbz(count, end); + align(16); + bind(loop); + (this->*block)(); + bind(odd); + (this->*block)(); + subs(count, count, 2); + br(Assembler::GT, loop); + bind(end); + } + + template + void unroll_2(Register count, T block, Register d, Register s, Register tmp) { + Label loop, end, odd; + tbnz(count, 0, odd); + cbz(count, end); + align(16); + bind(loop); + (this->*block)(d, s, tmp); + bind(odd); + (this->*block)(d, s, tmp); + subs(count, count, 2); + br(Assembler::GT, loop); + bind(end); + } + + void pre1(RegisterOrConstant i) { + block_comment("pre1"); + // Pa = Pa_base; + // Pb = Pb_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + ldr(Ra, Address(Pa_base)); + ldr(Rb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord))); + ldr(Rm, Address(Pm_base)); + ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord))); + lea(Pa, Address(Pa_base)); + lea(Pb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord))); + lea(Pm, Address(Pm_base)); + lea(Pn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord))); + + // Zero the m*n result. + mov(Rhi_mn, zr); + mov(Rlo_mn, zr); + } + + // The core multiply-accumulate step of a Montgomery + // multiplication. The idea is to schedule operations as a + // pipeline so that instructions with long latencies (loads and + // multiplies) have time to complete before their results are + // used. This most benefits in-order implementations of the + // architecture but out-of-order ones also benefit. + void step() { + block_comment("step"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + umulh(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + ldr(Ra, pre(Pa, wordSize)); + ldr(Rb, pre(Pb, -wordSize)); + acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n from the + // previous iteration. + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + umulh(Rhi_mn, Rm, Rn); + mul(Rlo_mn, Rm, Rn); + ldr(Rm, pre(Pm, wordSize)); + ldr(Rn, pre(Pn, -wordSize)); + acc(Rhi_ab, Rlo_ab, t0, t1, t2); + } + + void post1() { + block_comment("post1"); + + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + umulh(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n + acc(Rhi_ab, Rlo_ab, t0, t1, t2); + + // *Pm = Rm = t0 * inv; + mul(Rm, t0, inv); + str(Rm, Address(Pm)); + + // MACC(Rm, Rn, t0, t1, t2); + // t0 = t1; t1 = t2; t2 = 0; + umulh(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); + { + mul(Rlo_mn, Rm, Rn); + add(Rlo_mn, t0, Rlo_mn); + Label ok; + cbz(Rlo_mn, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + } +#endif + // We have very carefully set things up so that + // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -t0. t0 + (-t0) must generate a carry iff + // t0 != 0. So, rather than do a mul and an adds we just set + // the carry flag iff t0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); + // adds(zr, t0, Rlo_mn); + subs(zr, t0, 1); // Set carry iff t0 is nonzero + adcs(t0, t1, Rhi_mn); + adc(t1, t2, zr); + mov(t2, zr); + } + + void pre2(RegisterOrConstant i, RegisterOrConstant len) { + block_comment("pre2"); + // Pa = Pa_base + i-len; + // Pb = Pb_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + if (i.is_register()) { + sub(Rj, i.as_register(), len); + } else { + mov(Rj, i.as_constant()); + sub(Rj, Rj, len); + } + // Rj == i-len + + lea(Pa, Address(Pa_base, Rj, Address::uxtw(LogBytesPerWord))); + lea(Pb, Address(Pb_base, len, Address::uxtw(LogBytesPerWord))); + lea(Pm, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord))); + lea(Pn, Address(Pn_base, len, Address::uxtw(LogBytesPerWord))); + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + ldr(Ra, pre(Pa, wordSize)); + ldr(Rb, pre(Pb, -wordSize)); + ldr(Rm, pre(Pm, wordSize)); + ldr(Rn, pre(Pn, -wordSize)); + + mov(Rhi_mn, zr); + mov(Rlo_mn, zr); + } + + void post2(RegisterOrConstant i, RegisterOrConstant len) { + block_comment("post2"); + if (i.is_constant()) { + mov(Rj, i.as_constant()-len.as_constant()); + } else { + sub(Rj, i.as_register(), len); + } + + adds(t0, t0, Rlo_mn); // The pending m*n, low part + + // As soon as we know the least significant digit of our result, + // store it. + // Pm_base[i-len] = t0; + str(t0, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord))); + + // t0 = t1; t1 = t2; t2 = 0; + adcs(t0, t1, Rhi_mn); // The pending m*n, high part + adc(t1, t2, zr); + mov(t2, zr); + } + + // A carry in t0 after Montgomery multiplication means that we + // should subtract multiples of n from our result in m. We'll + // keep doing that until there is no carry. + void normalize(RegisterOrConstant len) { + block_comment("normalize"); + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + Label loop, post, again; + Register cnt = t1, i = t2; // Re-use registers; we're done with them now + cbz(t0, post); { + bind(again); { + mov(i, zr); + mov(cnt, len); + ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord))); + ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord))); + subs(zr, zr, zr); // set carry flag, i.e. no borrow + align(16); + bind(loop); { + sbcs(Rm, Rm, Rn); + str(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord))); + add(i, i, 1); + ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord))); + ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord))); + sub(cnt, cnt, 1); + } cbnz(cnt, loop); + sbc(t0, t0, zr); + } cbnz(t0, again); + } bind(post); + } + + // Move memory at s to d, reversing words. + // Increments d to end of copied memory + // Destroys tmp1, tmp2 + // Preserves len + // Leaves s pointing to the address which was in d at start + void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { + assert(tmp1 < r19 && tmp2 < r19, "register corruption"); + + lea(s, Address(s, len, Address::uxtw(LogBytesPerWord))); + mov(tmp1, len); + unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); + sub(s, d, len, ext::uxtw, LogBytesPerWord); + } + // where + void reverse1(Register d, Register s, Register tmp) { + ldr(tmp, pre(s, -wordSize)); + ror(tmp, tmp, 32); + str(tmp, post(d, wordSize)); + } + + void step_squaring() { + // An extra ACC + step(); + acc(Rhi_ab, Rlo_ab, t0, t1, t2); + } + + void last_squaring(RegisterOrConstant i) { + Label dont; + // if ((i & 1) == 0) { + tbnz(i.as_register(), 0, dont); { + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + umulh(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + acc(Rhi_ab, Rlo_ab, t0, t1, t2); + } bind(dont); + } + + void extra_step_squaring() { + acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n + + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + umulh(Rhi_mn, Rm, Rn); + mul(Rlo_mn, Rm, Rn); + ldr(Rm, pre(Pm, wordSize)); + ldr(Rn, pre(Pn, -wordSize)); + } + + void post1_squaring() { + acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n + + // *Pm = Rm = t0 * inv; + mul(Rm, t0, inv); + str(Rm, Address(Pm)); + + // MACC(Rm, Rn, t0, t1, t2); + // t0 = t1; t1 = t2; t2 = 0; + umulh(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); + { + mul(Rlo_mn, Rm, Rn); + add(Rlo_mn, t0, Rlo_mn); + Label ok; + cbz(Rlo_mn, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + } +#endif + // We have very carefully set things up so that + // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -t0. t0 + (-t0) must generate a carry iff + // t0 != 0. So, rather than do a mul and an adds we just set + // the carry flag iff t0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); + // adds(zr, t0, Rlo_mn); + subs(zr, t0, 1); // Set carry iff t0 is nonzero + adcs(t0, t1, Rhi_mn); + adc(t1, t2, zr); + mov(t2, zr); + } + + void acc(Register Rhi, Register Rlo, + Register t0, Register t1, Register t2) { + adds(t0, t0, Rlo); + adcs(t1, t1, Rhi); + adc(t2, t2, zr); + } + + public: + /** + * Fast Montgomery multiplication. The derivation of the + * algorithm is in A Cryptographic Library for the Motorola + * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + * + * Arguments: + * + * Inputs for multiplication: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements b + * c_rarg2 - int array elements n (the modulus) + * c_rarg3 - int length + * c_rarg4 - int inv + * c_rarg5 - int array elements m (the result) + * + * Inputs for squaring: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements n (the modulus) + * c_rarg2 - int length + * c_rarg3 - int inv + * c_rarg4 - int array elements m (the result) + * + */ + address generate_multiply() { + Label argh, nothing; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + cbzw(Rlen, nothing); + + enter(); + + // Make room. + cmpw(Rlen, 512); + br(Assembler::HI, argh); + sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint))); + andr(sp, Ra, -2 * wordSize); + + lsrw(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, t0, t1); + if (!_squaring) + reverse(Ra, Pb_base, Rlen, t0, t1); + reverse(Ra, Pn_base, Rlen, t0, t1); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + +#ifndef PRODUCT + // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + { + ldr(Rn, Address(Pn_base, 0)); + mul(Rlo_mn, Rn, inv); + cmp(Rlo_mn, -1); + Label ok; + br(EQ, ok); { + stop("broken inverse in Montgomery multiply"); + } bind(ok); + } +#endif + + mov(Pm_base, Ra); + + mov(t0, zr); + mov(t1, zr); + mov(t2, zr); + + block_comment("for (int i = 0; i < len; i++) {"); + mov(Ri, zr); { + Label loop, end; + cmpw(Ri, Rlen); + br(Assembler::GE, end); + + bind(loop); + pre1(Ri); + + block_comment(" for (j = i; j; j--) {"); { + movw(Rj, Ri); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post1(); + addw(Ri, Ri, 1); + cmpw(Ri, Rlen); + br(Assembler::LT, loop); + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mov(Ri, Rlen); { + Label loop, end; + cmpw(Ri, Rlen, Assembler::LSL, 1); + br(Assembler::GE, end); + + bind(loop); + pre2(Ri, Rlen); + + block_comment(" for (j = len*2-i-1; j; j--) {"); { + lslw(Rj, Rlen, 1); + subw(Rj, Rj, Ri); + subw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post2(Ri, Rlen); + addw(Ri, Ri, 1); + cmpw(Ri, Rlen, Assembler::LSL, 1); + br(Assembler::LT, loop); + bind(end); + } + block_comment("} // i"); + + normalize(Rlen); + + mov(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, t0, t1); + + leave(); + bind(nothing); + ret(lr); + + return entry; + } + // In C, approximately: + + // void + // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], + // unsigned long Pn_base[], unsigned long Pm_base[], + // unsigned long inv, int len) { + // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + // unsigned long *Pa, *Pb, *Pn, *Pm; + // unsigned long Ra, Rb, Rn, Rm; + + // int i; + + // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); + + // for (i = 0; i < len; i++) { + // int j; + + // Pa = Pa_base; + // Pb = Pb_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + + // int iters = i; + // for (j = 0; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // *Pm = Rm = t0 * inv; + // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + + // assert(t0 == 0, "broken Montgomery multiply"); + + // t0 = t1; t1 = t2; t2 = 0; + // } + + // for (i = len; i < 2*len; i++) { + // int j; + + // Pa = Pa_base + i-len; + // Pb = Pb_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + + // int iters = len*2-i-1; + // for (j = i-len+1; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // Pm_base[i-len] = t0; + // t0 = t1; t1 = t2; t2 = 0; + // } + + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + // } + + /** + * Fast Montgomery squaring. This uses asymptotically 25% fewer + * multiplies than Montgomery multiplication so it should be up to + * 25% faster. However, its loop control is more complex and it + * may actually run slower on some machines. + * + * Arguments: + * + * Inputs: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements n (the modulus) + * c_rarg2 - int length + * c_rarg3 - int inv + * c_rarg4 - int array elements m (the result) + * + */ + address generate_square() { + Label argh; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + enter(); + + // Make room. + cmpw(Rlen, 512); + br(Assembler::HI, argh); + sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint))); + andr(sp, Ra, -2 * wordSize); + + lsrw(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, t0, t1); + reverse(Ra, Pn_base, Rlen, t0, t1); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + + mov(Pm_base, Ra); + + mov(t0, zr); + mov(t1, zr); + mov(t2, zr); + + block_comment("for (int i = 0; i < len; i++) {"); + mov(Ri, zr); { + Label loop, end; + bind(loop); + cmp(Ri, Rlen); + br(Assembler::GE, end); + + pre1(Ri); + + block_comment("for (j = (i+1)/2; j; j--) {"); { + add(Rj, Ri, 1); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = i/2; j; j--) {"); { + lsr(Rj, Ri, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post1_squaring(); + add(Ri, Ri, 1); + cmp(Ri, Rlen); + br(Assembler::LT, loop); + + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mov(Ri, Rlen); { + Label loop, end; + bind(loop); + cmp(Ri, Rlen, Assembler::LSL, 1); + br(Assembler::GE, end); + + pre2(Ri, Rlen); + + block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { + lsl(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + sub(Rj, Rj, 1); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = (2*len-i)/2; j; j--) {"); { + lsl(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post2(Ri, Rlen); + add(Ri, Ri, 1); + cmp(Ri, Rlen, Assembler::LSL, 1); + + br(Assembler::LT, loop); + bind(end); + block_comment("} // i"); + } + + normalize(Rlen); + + mov(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, t0, t1); + + leave(); + ret(lr); + + return entry; + } + // In C, approximately: + + // void + // montgomery_square(unsigned long Pa_base[], unsigned long Pn_base[], + // unsigned long Pm_base[], unsigned long inv, int len) { + // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + // unsigned long *Pa, *Pb, *Pn, *Pm; + // unsigned long Ra, Rb, Rn, Rm; + + // int i; + + // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); + + // for (i = 0; i < len; i++) { + // int j; + + // Pa = Pa_base; + // Pb = Pa_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + + // int iters = (i+1)/2; + // for (j = 0; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be"); + // MACC2(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // if ((i & 1) == 0) { + // assert(Ra == Pa_base[j], "must be"); + // MACC(Ra, Ra, t0, t1, t2); + // } + // iters = i/2; + // assert(iters == i-j, "must be"); + // for (; iters--; j++) { + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // *Pm = Rm = t0 * inv; + // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + + // assert(t0 == 0, "broken Montgomery multiply"); + + // t0 = t1; t1 = t2; t2 = 0; + // } + + // for (i = len; i < 2*len; i++) { + // int start = i-len+1; + // int end = start + (len - start)/2; + // int j; + + // Pa = Pa_base + i-len; + // Pb = Pa_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + + // int iters = (2*len-i-1)/2; + // assert(iters == end-start, "must be"); + // for (j = start; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be"); + // MACC2(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // if ((i & 1) == 0) { + // assert(Ra == Pa_base[j], "must be"); + // MACC(Ra, Ra, t0, t1, t2); + // } + // iters = (2*len-i)/2; + // assert(iters == len-j, "must be"); + // for (; iters--; j++) { + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // Pm_base[i-len] = t0; + // t0 = t1; t1 = t2; t2 = 0; + // } + + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + // } + }; + // Initialization void generate_initial() { // Generate initial stubs and initializes the entry points @@ -2603,6 +3423,20 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_multiplyToLen = generate_multiplyToLen(); } + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); + StubRoutines::_montgomeryMultiply = g.generate_multiply(); + } + + if (UseMontgomerySquareIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + // We use generate_multiply() rather than generate_square() + // because it's faster for the sizes of modulus we care about. + StubRoutines::_montgomerySquare = g.generate_multiply(); + } + #ifndef BUILTIN_SIM if (UseAESIntrinsics) { StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); diff --git a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp index 02591e639ed..76277df495b 100644 --- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp @@ -261,6 +261,13 @@ void VM_Version::get_processor_features() { UsePopCountInstruction = true; } + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = true; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = true; + } + #ifdef COMPILER2 if (FLAG_IS_DEFAULT(OptoScheduling)) { OptoScheduling = true; From 88a6ccaaa298a7877b786726882c73f6b394063e Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Fri, 24 Jul 2015 21:29:11 -0400 Subject: [PATCH 3/7] 8131782: C1 Class.cast optimization breaks when Class is loaded from static final Change as_ValueType() to return InstanceConstant when appropriate Reviewed-by: jrose --- hotspot/src/share/vm/c1/c1_ValueType.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/hotspot/src/share/vm/c1/c1_ValueType.cpp b/hotspot/src/share/vm/c1/c1_ValueType.cpp index 0aebd036a78..5f86a8b9309 100644 --- a/hotspot/src/share/vm/c1/c1_ValueType.cpp +++ b/hotspot/src/share/vm/c1/c1_ValueType.cpp @@ -153,7 +153,19 @@ ValueType* as_ValueType(ciConstant value) { case T_FLOAT : return new FloatConstant (value.as_float ()); case T_DOUBLE : return new DoubleConstant(value.as_double()); case T_ARRAY : // fall through (ciConstant doesn't have an array accessor) - case T_OBJECT : return new ObjectConstant(value.as_object()); + case T_OBJECT : { + // TODO: Common the code with GraphBuilder::load_constant? + ciObject* obj = value.as_object(); + if (obj->is_null_object()) + return objectNull; + if (obj->is_loaded()) { + if (obj->is_array()) + return new ArrayConstant(obj->as_array()); + else if (obj->is_instance()) + return new InstanceConstant(obj->as_instance()); + } + return new ObjectConstant(obj); + } } ShouldNotReachHere(); return illegalType; From ae7cdb597235c57f41501815445786c938ea3904 Mon Sep 17 00:00:00 2001 From: Roland Westrelin Date: Mon, 27 Jul 2015 13:44:07 +0200 Subject: [PATCH 4/7] 8130858: CICompilerCount=1 when tiered is off is not allowed any more 8122937 broke handling of CICompilerCount Reviewed-by: kvn, vlivanov, gziemski --- hotspot/src/share/vm/runtime/arguments.cpp | 30 ---- hotspot/src/share/vm/runtime/arguments.hpp | 3 - .../commandLineFlagConstraintsCompiler.cpp | 42 ++++++ .../commandLineFlagConstraintsCompiler.hpp | 2 + hotspot/src/share/vm/runtime/globals.hpp | 4 +- .../arguments/CheckCICompilerCount.java | 135 ++++++++++++++++++ 6 files changed, 181 insertions(+), 35 deletions(-) create mode 100644 hotspot/test/compiler/arguments/CheckCICompilerCount.java diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp index f18b4c621d2..1e5f0fb40ba 100644 --- a/hotspot/src/share/vm/runtime/arguments.cpp +++ b/hotspot/src/share/vm/runtime/arguments.cpp @@ -1205,32 +1205,6 @@ void Arguments::set_tiered_flags() { } } -/** - * Returns the minimum number of compiler threads needed to run the JVM. The following - * configurations are possible. - * - * 1) The JVM is build using an interpreter only. As a result, the minimum number of - * compiler threads is 0. - * 2) The JVM is build using the compiler(s) and tiered compilation is disabled. As - * a result, either C1 or C2 is used, so the minimum number of compiler threads is 1. - * 3) The JVM is build using the compiler(s) and tiered compilation is enabled. However, - * the option "TieredStopAtLevel < CompLevel_full_optimization". As a result, only - * C1 can be used, so the minimum number of compiler threads is 1. - * 4) The JVM is build using the compilers and tiered compilation is enabled. The option - * 'TieredStopAtLevel = CompLevel_full_optimization' (the default value). As a result, - * the minimum number of compiler threads is 2. - */ -int Arguments::get_min_number_of_compiler_threads() { -#if !defined(COMPILER1) && !defined(COMPILER2) && !defined(SHARK) - return 0; // case 1 -#else - if (!TieredCompilation || (TieredStopAtLevel < CompLevel_full_optimization)) { - return 1; // case 2 or case 3 - } - return 2; // case 4 (tiered) -#endif -} - #if INCLUDE_ALL_GCS static void disable_adaptive_size_policy(const char* collector_name) { if (UseAdaptiveSizePolicy) { @@ -2178,10 +2152,6 @@ bool Arguments::check_vm_args_consistency() { status = false; } - int min_number_of_compiler_threads = get_min_number_of_compiler_threads(); - // The default CICompilerCount's value is CI_COMPILER_COUNT. - assert(min_number_of_compiler_threads <= CI_COMPILER_COUNT, "minimum should be less or equal default number"); - if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) { warning("The VM option CICompilerCountPerCPU overrides CICompilerCount."); } diff --git a/hotspot/src/share/vm/runtime/arguments.hpp b/hotspot/src/share/vm/runtime/arguments.hpp index a0bc1f4adbe..d3889996c1f 100644 --- a/hotspot/src/share/vm/runtime/arguments.hpp +++ b/hotspot/src/share/vm/runtime/arguments.hpp @@ -445,9 +445,6 @@ class Arguments : AllStatic { static char* SharedArchivePath; public: - // Tiered - static int get_min_number_of_compiler_threads(); - // Scale compile thresholds // Returns threshold scaled with CompileThresholdScaling static intx scaled_compile_threshold(intx threshold, double scale); diff --git a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp index 09c4b20e036..c4270d6f400 100644 --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp @@ -42,3 +42,45 @@ Flag::Error AliasLevelConstraintFunc(bool verbose, intx* value) { } return Flag::SUCCESS; } + +/** + * Validate the minimum number of compiler threads needed to run the + * JVM. The following configurations are possible. + * + * 1) The JVM is build using an interpreter only. As a result, the minimum number of + * compiler threads is 0. + * 2) The JVM is build using the compiler(s) and tiered compilation is disabled. As + * a result, either C1 or C2 is used, so the minimum number of compiler threads is 1. + * 3) The JVM is build using the compiler(s) and tiered compilation is enabled. However, + * the option "TieredStopAtLevel < CompLevel_full_optimization". As a result, only + * C1 can be used, so the minimum number of compiler threads is 1. + * 4) The JVM is build using the compilers and tiered compilation is enabled. The option + * 'TieredStopAtLevel = CompLevel_full_optimization' (the default value). As a result, + * the minimum number of compiler threads is 2. + */ +Flag::Error CICompilerCountConstraintFunc(bool verbose, intx* value) { + int min_number_of_compiler_threads = 0; +#if !defined(COMPILER1) && !defined(COMPILER2) && !defined(SHARK) + // case 1 +#else + if (!TieredCompilation || (TieredStopAtLevel < CompLevel_full_optimization)) { + min_number_of_compiler_threads = 1; // case 2 or case 3 + } else { + min_number_of_compiler_threads = 2; // case 4 (tiered) + } +#endif + + // The default CICompilerCount's value is CI_COMPILER_COUNT. + assert(min_number_of_compiler_threads <= CI_COMPILER_COUNT, "minimum should be less or equal default number"); + + if (*value < (intx)min_number_of_compiler_threads) { + if (verbose == true) { + jio_fprintf(defaultStream::error_stream(), + "CICompilerCount=" INTX_FORMAT " must be at least %d \n", + *value, min_number_of_compiler_threads); + } + return Flag::VIOLATES_CONSTRAINT; + } else { + return Flag::SUCCESS; + } +} diff --git a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp index c141c73e27d..faa1a6ec08a 100644 --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp @@ -36,4 +36,6 @@ Flag::Error AliasLevelConstraintFunc(bool verbose, intx* value); +Flag::Error CICompilerCountConstraintFunc(bool verbose, intx* value); + #endif /* SHARE_VM_RUNTIME_COMMANDLINEFLAGCONSTRAINTSCOMPILER_HPP */ diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 06a32fe8d11..daec828a2c0 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -2649,8 +2649,8 @@ public: /* because of overflow issue */ \ product(intx, CICompilerCount, CI_COMPILER_COUNT, \ "Number of compiler threads to run") \ - range((intx)Arguments::get_min_number_of_compiler_threads(), \ - max_jint) \ + range(0, max_jint) \ + constraint(CICompilerCountConstraintFunc) \ \ product(intx, CompilationPolicyChoice, 0, \ "which compilation policy (0-3)") \ diff --git a/hotspot/test/compiler/arguments/CheckCICompilerCount.java b/hotspot/test/compiler/arguments/CheckCICompilerCount.java new file mode 100644 index 00000000000..4f50e3b3162 --- /dev/null +++ b/hotspot/test/compiler/arguments/CheckCICompilerCount.java @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import jdk.test.lib.*; + +/* + * @test CheckCheckCICompilerCount + * @bug 8130858 + * @summary Check that correct range of values for CICompilerCount are allowed depending on whether tiered is enabled or not + * @library /testlibrary + * @modules java.base/sun.misc + * java.management + * @run main CheckCICompilerCount + */ + +public class CheckCICompilerCount { + private static final String[][] NON_TIERED_ARGUMENTS = { + { + "-XX:-TieredCompilation", + "-XX:+PrintFlagsFinal", + "-XX:CICompilerCount=0", + "-version" + }, + { + "-XX:-TieredCompilation", + "-XX:+PrintFlagsFinal", + "-XX:CICompilerCount=1", + "-version" + } + }; + + private static final String[][] NON_TIERED_EXPECTED_OUTPUTS = { + { + "CICompilerCount=0 must be at least 1", + "Improperly specified VM option 'CICompilerCount=0'" + }, + { + "intx CICompilerCount := 1 {product}" + } + }; + + private static final int[] NON_TIERED_EXIT = { + 1, + 0 + }; + + private static final String[][] TIERED_ARGUMENTS = { + { + "-XX:+TieredCompilation", + "-XX:+PrintFlagsFinal", + "-XX:CICompilerCount=1", + "-version" + }, + { + "-XX:+TieredCompilation", + "-XX:+PrintFlagsFinal", + "-XX:CICompilerCount=2", + "-version" + } + }; + + private static final String[][] TIERED_EXPECTED_OUTPUTS = { + { + "CICompilerCount=1 must be at least 2", + "Improperly specified VM option 'CICompilerCount=1'" + }, + { + "intx CICompilerCount := 2 {product}" + } + }; + + private static final int[] TIERED_EXIT = { + 1, + 0 + }; + + private static void verifyValidOption(String[] arguments, String[] expected_outputs, int exit, boolean tiered) throws Exception { + ProcessBuilder pb; + OutputAnalyzer out; + + pb = ProcessTools.createJavaProcessBuilder(arguments); + out = new OutputAnalyzer(pb.start()); + + try { + out.shouldHaveExitValue(exit); + for (String expected_output : expected_outputs) { + out.shouldContain(expected_output); + } + } catch (RuntimeException e) { + // Check if tiered compilation is available in this JVM + // Version. Throw exception only if it is available. + if (!(tiered && out.getOutput().contains("TieredCompilation is disabled in this release."))) { + throw new RuntimeException(e); + } + } + } + + public static void main(String[] args) throws Exception { + if (NON_TIERED_ARGUMENTS.length != NON_TIERED_EXPECTED_OUTPUTS.length || NON_TIERED_ARGUMENTS.length != NON_TIERED_EXIT.length) { + throw new RuntimeException("Test is set up incorrectly: length of arguments, expected outputs and exit codes in non-tiered mode of operation do not match."); + } + + if (TIERED_ARGUMENTS.length != TIERED_EXPECTED_OUTPUTS.length || TIERED_ARGUMENTS.length != TIERED_EXIT.length) { + throw new RuntimeException("Test is set up incorrectly: length of arguments, expected outputs and exit codes in tiered mode of operation do not match."); + } + + for (int i = 0; i < NON_TIERED_ARGUMENTS.length; i++) { + verifyValidOption(NON_TIERED_ARGUMENTS[i], NON_TIERED_EXPECTED_OUTPUTS[i], NON_TIERED_EXIT[i], false); + } + + for (int i = 0; i < TIERED_ARGUMENTS.length; i++) { + verifyValidOption(TIERED_ARGUMENTS[i], TIERED_EXPECTED_OUTPUTS[i], TIERED_EXIT[i], true); + } + } +} From b6cfe54a6462452063e9b56108a98166e8eaed22 Mon Sep 17 00:00:00 2001 From: Alexander Alexeev Date: Tue, 21 Jul 2015 14:23:08 +0000 Subject: [PATCH 5/7] 8132010: aarch64: regression test fails compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnSupportedCPU.java Fix type in SHA flag setting code Reviewed-by: kvn, goetz, aph, zmajo --- hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp index 76277df495b..164c1d840c4 100644 --- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp @@ -232,7 +232,7 @@ void VM_Version::get_processor_features() { } } else if (UseSHA256Intrinsics) { warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); - FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); } if (UseSHA512Intrinsics) { From f3d31d3866b13395adc246f35cea42488698b89c Mon Sep 17 00:00:00 2001 From: Alexander Alexeev Date: Tue, 21 Jul 2015 13:36:28 +0000 Subject: [PATCH 6/7] 8131062: aarch64: add support for GHASH acceleration Add support for GHASH using pmull Reviewed-by: kvn, goetz, aph --- .../src/cpu/aarch64/vm/assembler_aarch64.hpp | 72 ++++++++-- .../cpu/aarch64/vm/stubGenerator_aarch64.cpp | 136 ++++++++++++++++++ .../src/cpu/aarch64/vm/vm_version_aarch64.cpp | 18 ++- 3 files changed, 207 insertions(+), 19 deletions(-) diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp index a05351ea15b..e91fb19db06 100644 --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -1896,7 +1896,7 @@ public: public: enum SIMD_Arrangement { - T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D + T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q }; enum SIMD_RegVariant { @@ -2225,14 +2225,16 @@ public: f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0); } - // We do not handle the 1Q arrangement. void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { starti; - assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier"); - f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10); - rf(Vn, 5), rf(Vd, 0); + assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) || + (Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier"); + int size = (Ta == T1Q) ? 0b11 : 0b00; + f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22); + f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0); } void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier"); pmull(Vd, Ta, Vn, Vm, Tb); } @@ -2245,15 +2247,6 @@ public: f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0); } - void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) - { - starti; - assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H"); - f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24); - f(T <= T16B ? 0b00 : 0b01, 23, 22), f(0b100000000010, 21, 10); - rf(Vn, 5), rf(Vd, 0); - } - void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs) { starti; @@ -2290,6 +2283,57 @@ public: #undef INSN + // Table vector lookup +#define INSN(NAME, op) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \ + starti; \ + assert(T == T8B || T == T16B, "invalid arrangement"); \ + assert(0 < registers && registers <= 4, "invalid number of registers"); \ + f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \ + f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(tbl, 0); + INSN(tbx, 1); + +#undef INSN + +#define INSN(NAME, U, opcode) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ + starti; \ + assert((ASSERTION), MSG); \ + f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24); \ + f((int)(T >> 1), 23, 22), f(0b10000, 21, 17), f(opcode, 16, 12); \ + f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0); \ + } + +#define MSG "invalid arrangement" + +#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S) + INSN(rev64, 0, 0b00000); +#undef ASSERTION + +#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H) + INSN(rev32, 1, 0b00000); +#undef ASSERTION + +#define ASSERTION (T == T8B || T == T16B) + INSN(rev16, 0, 0b00001); +#undef ASSERTION + +#undef MSG + +#undef INSN + +void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) + { + starti; + assert(T == T8B || T == T16B, "invalid arrangement"); + assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value"); + f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21); + rf(Vm, 16), f(0, 15), f(index, 14, 11); + f(0, 10), rf(Vn, 5), rf(Vd, 0); + } /* Simulator extensions to the ISA diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp index ec45397fd26..7fe24874b9b 100644 --- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp @@ -2435,6 +2435,137 @@ class StubGenerator: public StubCodeGenerator { return start; } + /** + * Arguments: + * + * Input: + * c_rarg0 - current state address + * c_rarg1 - H key address + * c_rarg2 - data address + * c_rarg3 - number of blocks + * + * Output: + * Updated state at c_rarg0 + */ + address generate_ghash_processBlocks() { + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_exit; + + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + Register state = c_rarg0; + Register subkeyH = c_rarg1; + Register data = c_rarg2; + Register blocks = c_rarg3; + + FloatRegister vzr = v30; + __ eor(vzr, __ T16B, vzr, vzr); // zero register + + __ mov(v26, __ T16B, 1); + __ mov(v27, __ T16B, 63); + __ mov(v28, __ T16B, 62); + __ mov(v29, __ T16B, 57); + + __ ldrq(v6, Address(state)); + __ ldrq(v16, Address(subkeyH)); + + __ ext(v0, __ T16B, v6, v6, 0x08); + __ ext(v1, __ T16B, v16, v16, 0x08); + __ eor(v16, __ T16B, v16, v1); + + __ bind(L_ghash_loop); + + __ ldrq(v2, Address(__ post(data, 0x10))); + __ rev64(v2, __ T16B, v2); // swap data + + __ ext(v6, __ T16B, v0, v0, 0x08); + __ eor(v6, __ T16B, v6, v2); + __ ext(v2, __ T16B, v6, v6, 0x08); + + __ pmull2(v7, __ T1Q, v2, v1, __ T2D); // A1*B1 + __ eor(v6, __ T16B, v6, v2); + __ pmull(v5, __ T1Q, v2, v1, __ T1D); // A0*B0 + __ pmull(v20, __ T1Q, v6, v16, __ T1D); // (A1 + A0)(B1 + B0) + + __ ext(v21, __ T16B, v5, v7, 0x08); + __ eor(v18, __ T16B, v7, v5); // A1*B1 xor A0*B0 + __ eor(v20, __ T16B, v20, v21); + __ eor(v20, __ T16B, v20, v18); + + // Registers pair holds the result of carry-less multiplication + __ ins(v7, __ D, v20, 0, 1); + __ ins(v5, __ D, v20, 1, 0); + + // Result of the multiplication is shifted by one bit position + // [X3:X2:X1:X0] = [X3:X2:X1:X0] << 1 + __ ushr(v18, __ T2D, v5, -63 & 63); + __ ins(v25, __ D, v18, 1, 0); + __ ins(v25, __ D, vzr, 0, 0); + __ ushl(v5, __ T2D, v5, v26); + __ orr(v5, __ T16B, v5, v25); + + __ ushr(v19, __ T2D, v7, -63 & 63); + __ ins(v19, __ D, v19, 1, 0); + __ ins(v19, __ D, v18, 0, 1); + __ ushl(v7, __ T2D, v7, v26); + __ orr(v6, __ T16B, v7, v19); + + __ ins(v24, __ D, v5, 0, 1); + + // A = X0 << 63 + __ ushl(v21, __ T2D, v5, v27); + + // A = X0 << 62 + __ ushl(v22, __ T2D, v5, v28); + + // A = X0 << 57 + __ ushl(v23, __ T2D, v5, v29); + + // D = X1^A^B^C + __ eor(v21, __ T16B, v21, v22); + __ eor(v21, __ T16B, v21, v23); + __ eor(v21, __ T16B, v21, v24); + __ ins(v5, __ D, v21, 1, 0); + + // [E1:E0] = [D:X0] >> 1 + __ ushr(v20, __ T2D, v5, -1 & 63); + __ ushl(v18, __ T2D, v5, v27); + __ ext(v25, __ T16B, v18, vzr, 0x08); + __ orr(v19, __ T16B, v20, v25); + + __ eor(v7, __ T16B, v5, v19); + + // [F1:F0] = [D:X0] >> 2 + __ ushr(v20, __ T2D, v5, -2 & 63); + __ ushl(v18, __ T2D, v5, v28); + __ ins(v25, __ D, v18, 0, 1); + __ orr(v19, __ T16B, v20, v25); + + __ eor(v7, __ T16B, v7, v19); + + // [G1:G0] = [D:X0] >> 7 + __ ushr(v20, __ T2D, v5, -7 & 63); + __ ushl(v18, __ T2D, v5, v29); + __ ins(v25, __ D, v18, 0, 1); + __ orr(v19, __ T16B, v20, v25); + + // [H1:H0] = [D^E1^F1^G1:X0^E0^F0^G0] + __ eor(v7, __ T16B, v7, v19); + + // Result = [H1:H0]^[X3:X2] + __ eor(v0, __ T16B, v7, v6); + + __ subs(blocks, blocks, 1); + __ cbnz(blocks, L_ghash_loop); + + __ ext(v1, __ T16B, v0, v0, 0x08); + __ st1(v1, __ T16B, state); + __ ret(lr); + + return start; + } + // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this @@ -3438,6 +3569,11 @@ class StubGenerator: public StubCodeGenerator { } #ifndef BUILTIN_SIM + // generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + if (UseAESIntrinsics) { StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); diff --git a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp index 164c1d840c4..7af494f8113 100644 --- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp @@ -45,6 +45,10 @@ #define HWCAP_AES (1<<3) #endif +#ifndef HWCAP_PMULL +#define HWCAP_PMULL (1<<4) +#endif + #ifndef HWCAP_SHA1 #define HWCAP_SHA1 (1<<5) #endif @@ -190,11 +194,6 @@ void VM_Version::get_processor_features() { } } - if (UseGHASHIntrinsics) { - warning("GHASH intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); - } - if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { UseCRC32Intrinsics = true; } @@ -244,6 +243,15 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseSHA, false); } + if (auxv & HWCAP_PMULL) { + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { + FLAG_SET_DEFAULT(UseGHASHIntrinsics, true); + } + } else if (UseGHASHIntrinsics) { + warning("GHASH intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + // This machine allows unaligned memory accesses if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { FLAG_SET_DEFAULT(UseUnalignedAccesses, true); From 422cd18f23da6ff656cdd5d0318cafe8b5f54c39 Mon Sep 17 00:00:00 2001 From: Zoltan Majo Date: Tue, 28 Jul 2015 19:20:42 +0200 Subject: [PATCH 7/7] 8130832: Extend the WhiteBox API to provide information about the availability of compiler intrinsics Add a new method, sun.hotspot.WhiteBox.isIntrinsicAvailable, that can be used to determine if an intrinsic is available. Reviewed-by: kvn, jrose --- hotspot/src/share/vm/c1/c1_Compiler.cpp | 162 +++++++ hotspot/src/share/vm/c1/c1_Compiler.hpp | 12 + hotspot/src/share/vm/c1/c1_GraphBuilder.cpp | 409 ++++++------------ hotspot/src/share/vm/c1/c1_GraphBuilder.hpp | 12 +- hotspot/src/share/vm/classfile/vmSymbols.cpp | 313 ++++++++++++++ hotspot/src/share/vm/classfile/vmSymbols.hpp | 20 + .../share/vm/compiler/abstractCompiler.hpp | 52 +++ hotspot/src/share/vm/opto/c2compiler.cpp | 353 ++++++++++++++- hotspot/src/share/vm/opto/c2compiler.hpp | 21 +- hotspot/src/share/vm/opto/library_call.cpp | 332 +------------- hotspot/src/share/vm/prims/whitebox.cpp | 25 +- .../intrinsics/IntrinsicAvailableTest.java | 126 ++++++ .../mathexact/sanity/IntrinsicBase.java | 32 +- .../mathexact/sanity/MathIntrinsic.java | 85 +++- 14 files changed, 1347 insertions(+), 607 deletions(-) create mode 100644 hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java diff --git a/hotspot/src/share/vm/c1/c1_Compiler.cpp b/hotspot/src/share/vm/c1/c1_Compiler.cpp index 3f896950c38..84a7bd09682 100644 --- a/hotspot/src/share/vm/c1/c1_Compiler.cpp +++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp @@ -99,6 +99,164 @@ BufferBlob* Compiler::init_buffer_blob() { return buffer_blob; } +bool Compiler::is_intrinsic_supported(methodHandle method) { + vmIntrinsics::ID id = method->intrinsic_id(); + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + + if (method->is_synchronized()) { + // C1 does not support intrinsification of synchronized methods. + return false; + } + + switch (id) { + case vmIntrinsics::_compareAndSwapLong: + if (!VM_Version::supports_cx8()) return false; + break; + case vmIntrinsics::_getAndAddInt: + if (!VM_Version::supports_atomic_getadd4()) return false; + break; + case vmIntrinsics::_getAndAddLong: + if (!VM_Version::supports_atomic_getadd8()) return false; + break; + case vmIntrinsics::_getAndSetInt: + if (!VM_Version::supports_atomic_getset4()) return false; + break; + case vmIntrinsics::_getAndSetLong: + if (!VM_Version::supports_atomic_getset8()) return false; + break; + case vmIntrinsics::_getAndSetObject: +#ifdef _LP64 + if (!UseCompressedOops && !VM_Version::supports_atomic_getset8()) return false; + if (UseCompressedOops && !VM_Version::supports_atomic_getset4()) return false; +#else + if (!VM_Version::supports_atomic_getset4()) return false; +#endif + break; + case vmIntrinsics::_arraycopy: + case vmIntrinsics::_currentTimeMillis: + case vmIntrinsics::_nanoTime: + case vmIntrinsics::_Reference_get: + // Use the intrinsic version of Reference.get() so that the value in + // the referent field can be registered by the G1 pre-barrier code. + // Also to prevent commoning reads from this field across safepoint + // since GC can change its value. + case vmIntrinsics::_loadFence: + case vmIntrinsics::_storeFence: + case vmIntrinsics::_fullFence: + case vmIntrinsics::_floatToRawIntBits: + case vmIntrinsics::_intBitsToFloat: + case vmIntrinsics::_doubleToRawLongBits: + case vmIntrinsics::_longBitsToDouble: + case vmIntrinsics::_getClass: + case vmIntrinsics::_isInstance: + case vmIntrinsics::_currentThread: + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: + case vmIntrinsics::_dsin: + case vmIntrinsics::_dcos: + case vmIntrinsics::_dtan: + case vmIntrinsics::_dlog: + case vmIntrinsics::_dlog10: + case vmIntrinsics::_dexp: + case vmIntrinsics::_dpow: + case vmIntrinsics::_getObject: + case vmIntrinsics::_getBoolean: + case vmIntrinsics::_getByte: + case vmIntrinsics::_getShort: + case vmIntrinsics::_getChar: + case vmIntrinsics::_getInt: + case vmIntrinsics::_getLong: + case vmIntrinsics::_getFloat: + case vmIntrinsics::_getDouble: + case vmIntrinsics::_putObject: + case vmIntrinsics::_putBoolean: + case vmIntrinsics::_putByte: + case vmIntrinsics::_putShort: + case vmIntrinsics::_putChar: + case vmIntrinsics::_putInt: + case vmIntrinsics::_putLong: + case vmIntrinsics::_putFloat: + case vmIntrinsics::_putDouble: + case vmIntrinsics::_getObjectVolatile: + case vmIntrinsics::_getBooleanVolatile: + case vmIntrinsics::_getByteVolatile: + case vmIntrinsics::_getShortVolatile: + case vmIntrinsics::_getCharVolatile: + case vmIntrinsics::_getIntVolatile: + case vmIntrinsics::_getLongVolatile: + case vmIntrinsics::_getFloatVolatile: + case vmIntrinsics::_getDoubleVolatile: + case vmIntrinsics::_putObjectVolatile: + case vmIntrinsics::_putBooleanVolatile: + case vmIntrinsics::_putByteVolatile: + case vmIntrinsics::_putShortVolatile: + case vmIntrinsics::_putCharVolatile: + case vmIntrinsics::_putIntVolatile: + case vmIntrinsics::_putLongVolatile: + case vmIntrinsics::_putFloatVolatile: + case vmIntrinsics::_putDoubleVolatile: + case vmIntrinsics::_getByte_raw: + case vmIntrinsics::_getShort_raw: + case vmIntrinsics::_getChar_raw: + case vmIntrinsics::_getInt_raw: + case vmIntrinsics::_getLong_raw: + case vmIntrinsics::_getFloat_raw: + case vmIntrinsics::_getDouble_raw: + case vmIntrinsics::_putByte_raw: + case vmIntrinsics::_putShort_raw: + case vmIntrinsics::_putChar_raw: + case vmIntrinsics::_putInt_raw: + case vmIntrinsics::_putLong_raw: + case vmIntrinsics::_putFloat_raw: + case vmIntrinsics::_putDouble_raw: + case vmIntrinsics::_putOrderedObject: + case vmIntrinsics::_putOrderedInt: + case vmIntrinsics::_putOrderedLong: + case vmIntrinsics::_getShortUnaligned: + case vmIntrinsics::_getCharUnaligned: + case vmIntrinsics::_getIntUnaligned: + case vmIntrinsics::_getLongUnaligned: + case vmIntrinsics::_putShortUnaligned: + case vmIntrinsics::_putCharUnaligned: + case vmIntrinsics::_putIntUnaligned: + case vmIntrinsics::_putLongUnaligned: + case vmIntrinsics::_checkIndex: + case vmIntrinsics::_updateCRC32: + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: + case vmIntrinsics::_compareAndSwapInt: + case vmIntrinsics::_compareAndSwapObject: +#ifdef TRACE_HAVE_INTRINSICS + case vmIntrinsics::_classID: + case vmIntrinsics::_threadID: + case vmIntrinsics::_counterTime: +#endif + break; + default: + return false; // Intrinsics not on the previous list are not available. + } + + return true; +} + +bool Compiler::is_intrinsic_disabled_by_flag(methodHandle method) { + vmIntrinsics::ID id = method->intrinsic_id(); + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + + if (vmIntrinsics::is_disabled_by_flags(id)) { + return true; + } + + if (!InlineNatives && id != vmIntrinsics::_Reference_get) { + return true; + } + + if (!InlineClassNatives && id == vmIntrinsics::_getClass) { + return true; + } + + return false; +} void Compiler::compile_method(ciEnv* env, ciMethod* method, int entry_bci) { BufferBlob* buffer_blob = CompilerThread::current()->get_buffer_blob(); @@ -117,3 +275,7 @@ void Compiler::compile_method(ciEnv* env, ciMethod* method, int entry_bci) { void Compiler::print_timers() { Compilation::print_timers(); } + +bool Compiler::is_intrinsic_available(methodHandle method, methodHandle compilation_context) { + return is_intrinsic_supported(method) && !is_intrinsic_disabled_by_flag(method); +} diff --git a/hotspot/src/share/vm/c1/c1_Compiler.hpp b/hotspot/src/share/vm/c1/c1_Compiler.hpp index b3261a71265..8b8c8d414e3 100644 --- a/hotspot/src/share/vm/c1/c1_Compiler.hpp +++ b/hotspot/src/share/vm/c1/c1_Compiler.hpp @@ -55,6 +55,18 @@ class Compiler: public AbstractCompiler { // Print compilation timers and statistics virtual void print_timers(); + // Check the availability of an intrinsic for 'method' given a compilation context. + // The compilation context is needed to support per-method usage of the + // DisableIntrinsic flag. However, as C1 ignores the DisableIntrinsic flag, it + // ignores the compilation context. + virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context); + + // Check if the C1 compiler supports an intrinsic for 'method'. + virtual bool is_intrinsic_supported(methodHandle method); + + // Processing of command-line flags specific to the C1 compiler. + virtual bool is_intrinsic_disabled_by_flag(methodHandle method); + // Size of the code buffer static int code_buffer_size(); }; diff --git a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp index ec90e425c30..e607b02febc 100644 --- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp +++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp @@ -3372,231 +3372,85 @@ const char* GraphBuilder::should_not_inline(ciMethod* callee) const { return NULL; } - -bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) { - if (callee->is_synchronized()) { - // We don't currently support any synchronized intrinsics - return false; - } - - // callee seems like a good candidate - // determine id +void GraphBuilder::build_graph_for_intrinsic(ciMethod* callee) { vmIntrinsics::ID id = callee->intrinsic_id(); - if (!InlineNatives && id != vmIntrinsics::_Reference_get) { - // InlineNatives does not control Reference.get - INLINE_BAILOUT("intrinsic method inlining disabled"); + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + + // Some intrinsics need special IR nodes. + switch(id) { + case vmIntrinsics::_getObject : append_unsafe_get_obj(callee, T_OBJECT, false); return; + case vmIntrinsics::_getBoolean : append_unsafe_get_obj(callee, T_BOOLEAN, false); return; + case vmIntrinsics::_getByte : append_unsafe_get_obj(callee, T_BYTE, false); return; + case vmIntrinsics::_getShort : append_unsafe_get_obj(callee, T_SHORT, false); return; + case vmIntrinsics::_getChar : append_unsafe_get_obj(callee, T_CHAR, false); return; + case vmIntrinsics::_getInt : append_unsafe_get_obj(callee, T_INT, false); return; + case vmIntrinsics::_getLong : append_unsafe_get_obj(callee, T_LONG, false); return; + case vmIntrinsics::_getFloat : append_unsafe_get_obj(callee, T_FLOAT, false); return; + case vmIntrinsics::_getDouble : append_unsafe_get_obj(callee, T_DOUBLE, false); return; + case vmIntrinsics::_putObject : append_unsafe_put_obj(callee, T_OBJECT, false); return; + case vmIntrinsics::_putBoolean : append_unsafe_put_obj(callee, T_BOOLEAN, false); return; + case vmIntrinsics::_putByte : append_unsafe_put_obj(callee, T_BYTE, false); return; + case vmIntrinsics::_putShort : append_unsafe_put_obj(callee, T_SHORT, false); return; + case vmIntrinsics::_putChar : append_unsafe_put_obj(callee, T_CHAR, false); return; + case vmIntrinsics::_putInt : append_unsafe_put_obj(callee, T_INT, false); return; + case vmIntrinsics::_putLong : append_unsafe_put_obj(callee, T_LONG, false); return; + case vmIntrinsics::_putFloat : append_unsafe_put_obj(callee, T_FLOAT, false); return; + case vmIntrinsics::_putDouble : append_unsafe_put_obj(callee, T_DOUBLE, false); return; + case vmIntrinsics::_getShortUnaligned : append_unsafe_get_obj(callee, T_SHORT, false); return; + case vmIntrinsics::_getCharUnaligned : append_unsafe_get_obj(callee, T_CHAR, false); return; + case vmIntrinsics::_getIntUnaligned : append_unsafe_get_obj(callee, T_INT, false); return; + case vmIntrinsics::_getLongUnaligned : append_unsafe_get_obj(callee, T_LONG, false); return; + case vmIntrinsics::_putShortUnaligned : append_unsafe_put_obj(callee, T_SHORT, false); return; + case vmIntrinsics::_putCharUnaligned : append_unsafe_put_obj(callee, T_CHAR, false); return; + case vmIntrinsics::_putIntUnaligned : append_unsafe_put_obj(callee, T_INT, false); return; + case vmIntrinsics::_putLongUnaligned : append_unsafe_put_obj(callee, T_LONG, false); return; + case vmIntrinsics::_getObjectVolatile : append_unsafe_get_obj(callee, T_OBJECT, true); return; + case vmIntrinsics::_getBooleanVolatile : append_unsafe_get_obj(callee, T_BOOLEAN, true); return; + case vmIntrinsics::_getByteVolatile : append_unsafe_get_obj(callee, T_BYTE, true); return; + case vmIntrinsics::_getShortVolatile : append_unsafe_get_obj(callee, T_SHORT, true); return; + case vmIntrinsics::_getCharVolatile : append_unsafe_get_obj(callee, T_CHAR, true); return; + case vmIntrinsics::_getIntVolatile : append_unsafe_get_obj(callee, T_INT, true); return; + case vmIntrinsics::_getLongVolatile : append_unsafe_get_obj(callee, T_LONG, true); return; + case vmIntrinsics::_getFloatVolatile : append_unsafe_get_obj(callee, T_FLOAT, true); return; + case vmIntrinsics::_getDoubleVolatile : append_unsafe_get_obj(callee, T_DOUBLE, true); return; + case vmIntrinsics::_putObjectVolatile : append_unsafe_put_obj(callee, T_OBJECT, true); return; + case vmIntrinsics::_putBooleanVolatile : append_unsafe_put_obj(callee, T_BOOLEAN, true); return; + case vmIntrinsics::_putByteVolatile : append_unsafe_put_obj(callee, T_BYTE, true); return; + case vmIntrinsics::_putShortVolatile : append_unsafe_put_obj(callee, T_SHORT, true); return; + case vmIntrinsics::_putCharVolatile : append_unsafe_put_obj(callee, T_CHAR, true); return; + case vmIntrinsics::_putIntVolatile : append_unsafe_put_obj(callee, T_INT, true); return; + case vmIntrinsics::_putLongVolatile : append_unsafe_put_obj(callee, T_LONG, true); return; + case vmIntrinsics::_putFloatVolatile : append_unsafe_put_obj(callee, T_FLOAT, true); return; + case vmIntrinsics::_putDoubleVolatile : append_unsafe_put_obj(callee, T_DOUBLE, true); return; + case vmIntrinsics::_getByte_raw : append_unsafe_get_raw(callee, T_BYTE ); return; + case vmIntrinsics::_getShort_raw : append_unsafe_get_raw(callee, T_SHORT ); return; + case vmIntrinsics::_getChar_raw : append_unsafe_get_raw(callee, T_CHAR ); return; + case vmIntrinsics::_getInt_raw : append_unsafe_get_raw(callee, T_INT ); return; + case vmIntrinsics::_getLong_raw : append_unsafe_get_raw(callee, T_LONG ); return; + case vmIntrinsics::_getFloat_raw : append_unsafe_get_raw(callee, T_FLOAT ); return; + case vmIntrinsics::_getDouble_raw : append_unsafe_get_raw(callee, T_DOUBLE); return; + case vmIntrinsics::_putByte_raw : append_unsafe_put_raw(callee, T_BYTE ); return; + case vmIntrinsics::_putShort_raw : append_unsafe_put_raw(callee, T_SHORT ); return; + case vmIntrinsics::_putChar_raw : append_unsafe_put_raw(callee, T_CHAR ); return; + case vmIntrinsics::_putInt_raw : append_unsafe_put_raw(callee, T_INT ); return; + case vmIntrinsics::_putLong_raw : append_unsafe_put_raw(callee, T_LONG ); return; + case vmIntrinsics::_putFloat_raw : append_unsafe_put_raw(callee, T_FLOAT ); return; + case vmIntrinsics::_putDouble_raw : append_unsafe_put_raw(callee, T_DOUBLE); return; + case vmIntrinsics::_putOrderedObject : append_unsafe_put_obj(callee, T_OBJECT, true); return; + case vmIntrinsics::_putOrderedInt : append_unsafe_put_obj(callee, T_INT, true); return; + case vmIntrinsics::_putOrderedLong : append_unsafe_put_obj(callee, T_LONG, true); return; + case vmIntrinsics::_compareAndSwapLong: + case vmIntrinsics::_compareAndSwapInt: + case vmIntrinsics::_compareAndSwapObject: append_unsafe_CAS(callee); return; + case vmIntrinsics::_getAndAddInt: + case vmIntrinsics::_getAndAddLong : append_unsafe_get_and_set_obj(callee, true); return; + case vmIntrinsics::_getAndSetInt : + case vmIntrinsics::_getAndSetLong : + case vmIntrinsics::_getAndSetObject : append_unsafe_get_and_set_obj(callee, false); return; + default: + break; } - bool preserves_state = false; - bool cantrap = true; - switch (id) { - case vmIntrinsics::_arraycopy: - if (!InlineArrayCopy) return false; - break; -#ifdef TRACE_HAVE_INTRINSICS - case vmIntrinsics::_classID: - case vmIntrinsics::_threadID: - preserves_state = true; - cantrap = true; - break; - - case vmIntrinsics::_counterTime: - preserves_state = true; - cantrap = false; - break; -#endif - - case vmIntrinsics::_currentTimeMillis: - case vmIntrinsics::_nanoTime: - preserves_state = true; - cantrap = false; - break; - - case vmIntrinsics::_floatToRawIntBits : - case vmIntrinsics::_intBitsToFloat : - case vmIntrinsics::_doubleToRawLongBits : - case vmIntrinsics::_longBitsToDouble : - if (!InlineMathNatives) return false; - preserves_state = true; - cantrap = false; - break; - - case vmIntrinsics::_getClass : - case vmIntrinsics::_isInstance : - if (!InlineClassNatives) return false; - preserves_state = true; - break; - - case vmIntrinsics::_currentThread : - if (!InlineThreadNatives) return false; - preserves_state = true; - cantrap = false; - break; - - case vmIntrinsics::_dabs : // fall through - case vmIntrinsics::_dsqrt : // fall through - case vmIntrinsics::_dsin : // fall through - case vmIntrinsics::_dcos : // fall through - case vmIntrinsics::_dtan : // fall through - case vmIntrinsics::_dlog : // fall through - case vmIntrinsics::_dlog10 : // fall through - case vmIntrinsics::_dexp : // fall through - case vmIntrinsics::_dpow : // fall through - if (!InlineMathNatives) return false; - cantrap = false; - preserves_state = true; - break; - - // Use special nodes for Unsafe instructions so we can more easily - // perform an address-mode optimization on the raw variants - case vmIntrinsics::_getObject : return append_unsafe_get_obj(callee, T_OBJECT, false); - case vmIntrinsics::_getBoolean: return append_unsafe_get_obj(callee, T_BOOLEAN, false); - case vmIntrinsics::_getByte : return append_unsafe_get_obj(callee, T_BYTE, false); - case vmIntrinsics::_getShort : return append_unsafe_get_obj(callee, T_SHORT, false); - case vmIntrinsics::_getChar : return append_unsafe_get_obj(callee, T_CHAR, false); - case vmIntrinsics::_getInt : return append_unsafe_get_obj(callee, T_INT, false); - case vmIntrinsics::_getLong : return append_unsafe_get_obj(callee, T_LONG, false); - case vmIntrinsics::_getFloat : return append_unsafe_get_obj(callee, T_FLOAT, false); - case vmIntrinsics::_getDouble : return append_unsafe_get_obj(callee, T_DOUBLE, false); - - case vmIntrinsics::_putObject : return append_unsafe_put_obj(callee, T_OBJECT, false); - case vmIntrinsics::_putBoolean: return append_unsafe_put_obj(callee, T_BOOLEAN, false); - case vmIntrinsics::_putByte : return append_unsafe_put_obj(callee, T_BYTE, false); - case vmIntrinsics::_putShort : return append_unsafe_put_obj(callee, T_SHORT, false); - case vmIntrinsics::_putChar : return append_unsafe_put_obj(callee, T_CHAR, false); - case vmIntrinsics::_putInt : return append_unsafe_put_obj(callee, T_INT, false); - case vmIntrinsics::_putLong : return append_unsafe_put_obj(callee, T_LONG, false); - case vmIntrinsics::_putFloat : return append_unsafe_put_obj(callee, T_FLOAT, false); - case vmIntrinsics::_putDouble : return append_unsafe_put_obj(callee, T_DOUBLE, false); - - case vmIntrinsics::_getShortUnaligned : - return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_SHORT, false) : false; - case vmIntrinsics::_getCharUnaligned : - return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_CHAR, false) : false; - case vmIntrinsics::_getIntUnaligned : - return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_INT, false) : false; - case vmIntrinsics::_getLongUnaligned : - return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_LONG, false) : false; - - case vmIntrinsics::_putShortUnaligned : - return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_SHORT, false) : false; - case vmIntrinsics::_putCharUnaligned : - return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_CHAR, false) : false; - case vmIntrinsics::_putIntUnaligned : - return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_INT, false) : false; - case vmIntrinsics::_putLongUnaligned : - return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_LONG, false) : false; - - case vmIntrinsics::_getObjectVolatile : return append_unsafe_get_obj(callee, T_OBJECT, true); - case vmIntrinsics::_getBooleanVolatile: return append_unsafe_get_obj(callee, T_BOOLEAN, true); - case vmIntrinsics::_getByteVolatile : return append_unsafe_get_obj(callee, T_BYTE, true); - case vmIntrinsics::_getShortVolatile : return append_unsafe_get_obj(callee, T_SHORT, true); - case vmIntrinsics::_getCharVolatile : return append_unsafe_get_obj(callee, T_CHAR, true); - case vmIntrinsics::_getIntVolatile : return append_unsafe_get_obj(callee, T_INT, true); - case vmIntrinsics::_getLongVolatile : return append_unsafe_get_obj(callee, T_LONG, true); - case vmIntrinsics::_getFloatVolatile : return append_unsafe_get_obj(callee, T_FLOAT, true); - case vmIntrinsics::_getDoubleVolatile : return append_unsafe_get_obj(callee, T_DOUBLE, true); - - case vmIntrinsics::_putObjectVolatile : return append_unsafe_put_obj(callee, T_OBJECT, true); - case vmIntrinsics::_putBooleanVolatile: return append_unsafe_put_obj(callee, T_BOOLEAN, true); - case vmIntrinsics::_putByteVolatile : return append_unsafe_put_obj(callee, T_BYTE, true); - case vmIntrinsics::_putShortVolatile : return append_unsafe_put_obj(callee, T_SHORT, true); - case vmIntrinsics::_putCharVolatile : return append_unsafe_put_obj(callee, T_CHAR, true); - case vmIntrinsics::_putIntVolatile : return append_unsafe_put_obj(callee, T_INT, true); - case vmIntrinsics::_putLongVolatile : return append_unsafe_put_obj(callee, T_LONG, true); - case vmIntrinsics::_putFloatVolatile : return append_unsafe_put_obj(callee, T_FLOAT, true); - case vmIntrinsics::_putDoubleVolatile : return append_unsafe_put_obj(callee, T_DOUBLE, true); - - case vmIntrinsics::_getByte_raw : return append_unsafe_get_raw(callee, T_BYTE); - case vmIntrinsics::_getShort_raw : return append_unsafe_get_raw(callee, T_SHORT); - case vmIntrinsics::_getChar_raw : return append_unsafe_get_raw(callee, T_CHAR); - case vmIntrinsics::_getInt_raw : return append_unsafe_get_raw(callee, T_INT); - case vmIntrinsics::_getLong_raw : return append_unsafe_get_raw(callee, T_LONG); - case vmIntrinsics::_getFloat_raw : return append_unsafe_get_raw(callee, T_FLOAT); - case vmIntrinsics::_getDouble_raw : return append_unsafe_get_raw(callee, T_DOUBLE); - - case vmIntrinsics::_putByte_raw : return append_unsafe_put_raw(callee, T_BYTE); - case vmIntrinsics::_putShort_raw : return append_unsafe_put_raw(callee, T_SHORT); - case vmIntrinsics::_putChar_raw : return append_unsafe_put_raw(callee, T_CHAR); - case vmIntrinsics::_putInt_raw : return append_unsafe_put_raw(callee, T_INT); - case vmIntrinsics::_putLong_raw : return append_unsafe_put_raw(callee, T_LONG); - case vmIntrinsics::_putFloat_raw : return append_unsafe_put_raw(callee, T_FLOAT); - case vmIntrinsics::_putDouble_raw : return append_unsafe_put_raw(callee, T_DOUBLE); - - case vmIntrinsics::_checkIndex : - if (!InlineNIOCheckIndex) return false; - preserves_state = true; - break; - case vmIntrinsics::_putOrderedObject : return append_unsafe_put_obj(callee, T_OBJECT, true); - case vmIntrinsics::_putOrderedInt : return append_unsafe_put_obj(callee, T_INT, true); - case vmIntrinsics::_putOrderedLong : return append_unsafe_put_obj(callee, T_LONG, true); - - case vmIntrinsics::_compareAndSwapLong: - if (!VM_Version::supports_cx8()) return false; - // fall through - case vmIntrinsics::_compareAndSwapInt: - case vmIntrinsics::_compareAndSwapObject: - append_unsafe_CAS(callee); - return true; - - case vmIntrinsics::_getAndAddInt: - if (!VM_Version::supports_atomic_getadd4()) { - return false; - } - return append_unsafe_get_and_set_obj(callee, true); - case vmIntrinsics::_getAndAddLong: - if (!VM_Version::supports_atomic_getadd8()) { - return false; - } - return append_unsafe_get_and_set_obj(callee, true); - case vmIntrinsics::_getAndSetInt: - if (!VM_Version::supports_atomic_getset4()) { - return false; - } - return append_unsafe_get_and_set_obj(callee, false); - case vmIntrinsics::_getAndSetLong: - if (!VM_Version::supports_atomic_getset8()) { - return false; - } - return append_unsafe_get_and_set_obj(callee, false); - case vmIntrinsics::_getAndSetObject: -#ifdef _LP64 - if (!UseCompressedOops && !VM_Version::supports_atomic_getset8()) { - return false; - } - if (UseCompressedOops && !VM_Version::supports_atomic_getset4()) { - return false; - } -#else - if (!VM_Version::supports_atomic_getset4()) { - return false; - } -#endif - return append_unsafe_get_and_set_obj(callee, false); - - case vmIntrinsics::_Reference_get: - // Use the intrinsic version of Reference.get() so that the value in - // the referent field can be registered by the G1 pre-barrier code. - // Also to prevent commoning reads from this field across safepoint - // since GC can change its value. - preserves_state = true; - break; - - case vmIntrinsics::_updateCRC32: - case vmIntrinsics::_updateBytesCRC32: - case vmIntrinsics::_updateByteBufferCRC32: - if (!UseCRC32Intrinsics) return false; - cantrap = false; - preserves_state = true; - break; - - case vmIntrinsics::_loadFence : - case vmIntrinsics::_storeFence: - case vmIntrinsics::_fullFence : - break; - - default : return false; // do not inline - } // create intrinsic node const bool has_receiver = !callee->is_static(); ValueType* result_type = as_ValueType(callee->return_type()); @@ -3621,8 +3475,10 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) { } } - Intrinsic* result = new Intrinsic(result_type, id, args, has_receiver, state_before, - preserves_state, cantrap); + Intrinsic* result = new Intrinsic(result_type, callee->intrinsic_id(), + args, has_receiver, state_before, + vmIntrinsics::preserves_state(id), + vmIntrinsics::can_trap(id)); // append instruction & push result Value value = append_split(result); if (result_type != voidType) push(result_type, value); @@ -3630,8 +3486,22 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) { if (callee != method() && profile_return() && result_type->is_object_kind()) { profile_return_type(result, callee); } +} - // done +bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) { + // For calling is_intrinsic_available we need to transition to + // the '_thread_in_vm' state because is_intrinsic_available() + // does not accesses critical VM-internal data. + if (!_compilation->compiler()->is_intrinsic_available(callee->get_Method(), NULL)) { + if (!InlineNatives) { + // Return false and also set message that the inlining of + // intrinsics has been disabled in general. + INLINE_BAILOUT("intrinsic method inlining disabled"); + } else { + return false; + } + } + build_graph_for_intrinsic(callee); return true; } @@ -4224,58 +4094,46 @@ void GraphBuilder::pop_scope_for_jsr() { _scope_data = scope_data()->parent(); } -bool GraphBuilder::append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile) { - if (InlineUnsafeOps) { - Values* args = state()->pop_arguments(callee->arg_size()); - null_check(args->at(0)); - Instruction* offset = args->at(2); +void GraphBuilder::append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile) { + Values* args = state()->pop_arguments(callee->arg_size()); + null_check(args->at(0)); + Instruction* offset = args->at(2); #ifndef _LP64 - offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); + offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); #endif - Instruction* op = append(new UnsafeGetObject(t, args->at(1), offset, is_volatile)); - push(op->type(), op); - compilation()->set_has_unsafe_access(true); - } - return InlineUnsafeOps; + Instruction* op = append(new UnsafeGetObject(t, args->at(1), offset, is_volatile)); + push(op->type(), op); + compilation()->set_has_unsafe_access(true); } -bool GraphBuilder::append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile) { - if (InlineUnsafeOps) { - Values* args = state()->pop_arguments(callee->arg_size()); - null_check(args->at(0)); - Instruction* offset = args->at(2); +void GraphBuilder::append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile) { + Values* args = state()->pop_arguments(callee->arg_size()); + null_check(args->at(0)); + Instruction* offset = args->at(2); #ifndef _LP64 - offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); + offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); #endif - Instruction* op = append(new UnsafePutObject(t, args->at(1), offset, args->at(3), is_volatile)); - compilation()->set_has_unsafe_access(true); - kill_all(); - } - return InlineUnsafeOps; + Instruction* op = append(new UnsafePutObject(t, args->at(1), offset, args->at(3), is_volatile)); + compilation()->set_has_unsafe_access(true); + kill_all(); } -bool GraphBuilder::append_unsafe_get_raw(ciMethod* callee, BasicType t) { - if (InlineUnsafeOps) { - Values* args = state()->pop_arguments(callee->arg_size()); - null_check(args->at(0)); - Instruction* op = append(new UnsafeGetRaw(t, args->at(1), false)); - push(op->type(), op); - compilation()->set_has_unsafe_access(true); - } - return InlineUnsafeOps; +void GraphBuilder::append_unsafe_get_raw(ciMethod* callee, BasicType t) { + Values* args = state()->pop_arguments(callee->arg_size()); + null_check(args->at(0)); + Instruction* op = append(new UnsafeGetRaw(t, args->at(1), false)); + push(op->type(), op); + compilation()->set_has_unsafe_access(true); } -bool GraphBuilder::append_unsafe_put_raw(ciMethod* callee, BasicType t) { - if (InlineUnsafeOps) { - Values* args = state()->pop_arguments(callee->arg_size()); - null_check(args->at(0)); - Instruction* op = append(new UnsafePutRaw(t, args->at(1), args->at(2))); - compilation()->set_has_unsafe_access(true); - } - return InlineUnsafeOps; +void GraphBuilder::append_unsafe_put_raw(ciMethod* callee, BasicType t) { + Values* args = state()->pop_arguments(callee->arg_size()); + null_check(args->at(0)); + Instruction* op = append(new UnsafePutRaw(t, args->at(1), args->at(2))); + compilation()->set_has_unsafe_access(true); } @@ -4352,21 +4210,18 @@ void GraphBuilder::print_inlining(ciMethod* callee, const char* msg, bool succes } } -bool GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) { - if (InlineUnsafeOps) { - Values* args = state()->pop_arguments(callee->arg_size()); - BasicType t = callee->return_type()->basic_type(); - null_check(args->at(0)); - Instruction* offset = args->at(2); +void GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) { + Values* args = state()->pop_arguments(callee->arg_size()); + BasicType t = callee->return_type()->basic_type(); + null_check(args->at(0)); + Instruction* offset = args->at(2); #ifndef _LP64 - offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); + offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); #endif - Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add)); - compilation()->set_has_unsafe_access(true); - kill_all(); - push(op->type(), op); - } - return InlineUnsafeOps; + Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add)); + compilation()->set_has_unsafe_access(true); + kill_all(); + push(op->type(), op); } #ifndef PRODUCT diff --git a/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp b/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp index d53c1c04953..40af90bc13d 100644 --- a/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp +++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp @@ -339,6 +339,8 @@ class GraphBuilder VALUE_OBJ_CLASS_SPEC { void inline_sync_entry(Value lock, BlockBegin* sync_handler); void fill_sync_handler(Value lock, BlockBegin* sync_handler, bool default_handler = false); + void build_graph_for_intrinsic(ciMethod* callee); + // inliners bool try_inline( ciMethod* callee, bool holder_known, Bytecodes::Code bc = Bytecodes::_illegal, Value receiver = NULL); bool try_inline_intrinsics(ciMethod* callee); @@ -364,12 +366,12 @@ class GraphBuilder VALUE_OBJ_CLASS_SPEC { void pop_scope(); void pop_scope_for_jsr(); - bool append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile); - bool append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile); - bool append_unsafe_get_raw(ciMethod* callee, BasicType t); - bool append_unsafe_put_raw(ciMethod* callee, BasicType t); + void append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile); + void append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile); + void append_unsafe_get_raw(ciMethod* callee, BasicType t); + void append_unsafe_put_raw(ciMethod* callee, BasicType t); void append_unsafe_CAS(ciMethod* callee); - bool append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add); + void append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add); void print_inlining(ciMethod* callee, const char* msg = NULL, bool success = true); diff --git a/hotspot/src/share/vm/classfile/vmSymbols.cpp b/hotspot/src/share/vm/classfile/vmSymbols.cpp index 6d13995c588..1a968acc82c 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.cpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp @@ -324,6 +324,319 @@ vmIntrinsics::ID vmIntrinsics::for_raw_conversion(BasicType src, BasicType dest) return vmIntrinsics::_none; } +bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) { + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + switch(id) { +#ifdef TRACE_HAVE_INTRINSICS + case vmIntrinsics::_classID: + case vmIntrinsics::_threadID: + case vmIntrinsics::_counterTime: +#endif + case vmIntrinsics::_currentTimeMillis: + case vmIntrinsics::_nanoTime: + case vmIntrinsics::_floatToRawIntBits: + case vmIntrinsics::_intBitsToFloat: + case vmIntrinsics::_doubleToRawLongBits: + case vmIntrinsics::_longBitsToDouble: + case vmIntrinsics::_getClass: + case vmIntrinsics::_isInstance: + case vmIntrinsics::_currentThread: + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: + case vmIntrinsics::_dsin: + case vmIntrinsics::_dcos: + case vmIntrinsics::_dtan: + case vmIntrinsics::_dlog: + case vmIntrinsics::_dlog10: + case vmIntrinsics::_dexp: + case vmIntrinsics::_dpow: + case vmIntrinsics::_checkIndex: + case vmIntrinsics::_Reference_get: + case vmIntrinsics::_updateCRC32: + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: + return true; + default: + return false; + } +} + +bool vmIntrinsics::can_trap(vmIntrinsics::ID id) { + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + switch(id) { +#ifdef TRACE_HAVE_INTRINSICS + case vmIntrinsics::_counterTime: +#endif + case vmIntrinsics::_currentTimeMillis: + case vmIntrinsics::_nanoTime: + case vmIntrinsics::_floatToRawIntBits: + case vmIntrinsics::_intBitsToFloat: + case vmIntrinsics::_doubleToRawLongBits: + case vmIntrinsics::_longBitsToDouble: + case vmIntrinsics::_currentThread: + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: + case vmIntrinsics::_dsin: + case vmIntrinsics::_dcos: + case vmIntrinsics::_dtan: + case vmIntrinsics::_dlog: + case vmIntrinsics::_dlog10: + case vmIntrinsics::_dexp: + case vmIntrinsics::_dpow: + case vmIntrinsics::_updateCRC32: + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: + return false; + default: + return true; + } +} + +bool vmIntrinsics::does_virtual_dispatch(vmIntrinsics::ID id) { + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + switch(id) { + case vmIntrinsics::_hashCode: + case vmIntrinsics::_clone: + return true; + break; + default: + return false; + } +} + +int vmIntrinsics::predicates_needed(vmIntrinsics::ID id) { + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + switch (id) { + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + return 1; + case vmIntrinsics::_digestBase_implCompressMB: + return 3; + default: + return 0; + } +} + +bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + switch (id) { + case vmIntrinsics::_isInstance: + case vmIntrinsics::_isAssignableFrom: + case vmIntrinsics::_getModifiers: + case vmIntrinsics::_isInterface: + case vmIntrinsics::_isArray: + case vmIntrinsics::_isPrimitive: + case vmIntrinsics::_getSuperclass: + case vmIntrinsics::_Class_cast: + case vmIntrinsics::_getLength: + case vmIntrinsics::_newArray: + if (!InlineClassNatives) return true; + break; + case vmIntrinsics::_currentThread: + case vmIntrinsics::_isInterrupted: + if (!InlineThreadNatives) return true; + break; + case vmIntrinsics::_floatToRawIntBits: + case vmIntrinsics::_intBitsToFloat: + case vmIntrinsics::_doubleToRawLongBits: + case vmIntrinsics::_longBitsToDouble: + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: + case vmIntrinsics::_dsin: + case vmIntrinsics::_dcos: + case vmIntrinsics::_dtan: + case vmIntrinsics::_dlog: + case vmIntrinsics::_dexp: + case vmIntrinsics::_dpow: + case vmIntrinsics::_dlog10: + case vmIntrinsics::_datan2: + case vmIntrinsics::_min: + case vmIntrinsics::_max: + case vmIntrinsics::_floatToIntBits: + case vmIntrinsics::_doubleToLongBits: + if (!InlineMathNatives) return true; + break; + case vmIntrinsics::_arraycopy: + if (!InlineArrayCopy) return true; + break; + case vmIntrinsics::_updateCRC32: + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: + if (!UseCRC32Intrinsics) return true; + break; + case vmIntrinsics::_getObject: + case vmIntrinsics::_getBoolean: + case vmIntrinsics::_getByte: + case vmIntrinsics::_getShort: + case vmIntrinsics::_getChar: + case vmIntrinsics::_getInt: + case vmIntrinsics::_getLong: + case vmIntrinsics::_getFloat: + case vmIntrinsics::_getDouble: + case vmIntrinsics::_putObject: + case vmIntrinsics::_putBoolean: + case vmIntrinsics::_putByte: + case vmIntrinsics::_putShort: + case vmIntrinsics::_putChar: + case vmIntrinsics::_putInt: + case vmIntrinsics::_putLong: + case vmIntrinsics::_putFloat: + case vmIntrinsics::_putDouble: + case vmIntrinsics::_getObjectVolatile: + case vmIntrinsics::_getBooleanVolatile: + case vmIntrinsics::_getByteVolatile: + case vmIntrinsics::_getShortVolatile: + case vmIntrinsics::_getCharVolatile: + case vmIntrinsics::_getIntVolatile: + case vmIntrinsics::_getLongVolatile: + case vmIntrinsics::_getFloatVolatile: + case vmIntrinsics::_getDoubleVolatile: + case vmIntrinsics::_putObjectVolatile: + case vmIntrinsics::_putBooleanVolatile: + case vmIntrinsics::_putByteVolatile: + case vmIntrinsics::_putShortVolatile: + case vmIntrinsics::_putCharVolatile: + case vmIntrinsics::_putIntVolatile: + case vmIntrinsics::_putLongVolatile: + case vmIntrinsics::_putFloatVolatile: + case vmIntrinsics::_putDoubleVolatile: + case vmIntrinsics::_getByte_raw: + case vmIntrinsics::_getShort_raw: + case vmIntrinsics::_getChar_raw: + case vmIntrinsics::_getInt_raw: + case vmIntrinsics::_getLong_raw: + case vmIntrinsics::_getFloat_raw: + case vmIntrinsics::_getDouble_raw: + case vmIntrinsics::_putByte_raw: + case vmIntrinsics::_putShort_raw: + case vmIntrinsics::_putChar_raw: + case vmIntrinsics::_putInt_raw: + case vmIntrinsics::_putLong_raw: + case vmIntrinsics::_putFloat_raw: + case vmIntrinsics::_putDouble_raw: + case vmIntrinsics::_putOrderedObject: + case vmIntrinsics::_putOrderedLong: + case vmIntrinsics::_putOrderedInt: + case vmIntrinsics::_getAndAddInt: + case vmIntrinsics::_getAndAddLong: + case vmIntrinsics::_getAndSetInt: + case vmIntrinsics::_getAndSetLong: + case vmIntrinsics::_getAndSetObject: + if (!InlineUnsafeOps) return true; + break; + case vmIntrinsics::_getShortUnaligned: + case vmIntrinsics::_getCharUnaligned: + case vmIntrinsics::_getIntUnaligned: + case vmIntrinsics::_getLongUnaligned: + case vmIntrinsics::_putShortUnaligned: + case vmIntrinsics::_putCharUnaligned: + case vmIntrinsics::_putIntUnaligned: + case vmIntrinsics::_putLongUnaligned: + case vmIntrinsics::_allocateInstance: + case vmIntrinsics::_getAddress_raw: + case vmIntrinsics::_putAddress_raw: + if (!InlineUnsafeOps || !UseUnalignedAccesses) return true; + break; + case vmIntrinsics::_hashCode: + if (!InlineObjectHash) return true; + break; + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + if (!UseAESIntrinsics) return true; + break; + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + if (!UseAESIntrinsics) return true; + break; + case vmIntrinsics::_sha_implCompress: + if (!UseSHA1Intrinsics) return true; + break; + case vmIntrinsics::_sha2_implCompress: + if (!UseSHA256Intrinsics) return true; + break; + case vmIntrinsics::_sha5_implCompress: + if (!UseSHA512Intrinsics) return true; + break; + case vmIntrinsics::_digestBase_implCompressMB: + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return true; + break; + case vmIntrinsics::_ghash_processBlocks: + if (!UseGHASHIntrinsics) return true; + break; + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + if (!UseCRC32CIntrinsics) return true; + break; + case vmIntrinsics::_copyMemory: + if (!InlineArrayCopy || !InlineUnsafeOps) return true; + break; +#ifdef COMPILER1 + case vmIntrinsics::_checkIndex: + if (!InlineNIOCheckIndex) return true; + break; +#endif // COMPILER1 +#ifdef COMPILER2 + case vmIntrinsics::_clone: + case vmIntrinsics::_copyOf: + case vmIntrinsics::_copyOfRange: + // These intrinsics use both the objectcopy and the arraycopy + // intrinsic mechanism. + if (!InlineObjectCopy || !InlineArrayCopy) return true; + break; + case vmIntrinsics::_compareTo: + if (!SpecialStringCompareTo) return true; + break; + case vmIntrinsics::_indexOf: + if (!SpecialStringIndexOf) return true; + break; + case vmIntrinsics::_equals: + if (!SpecialStringEquals) return true; + break; + case vmIntrinsics::_equalsC: + if (!SpecialArraysEquals) return true; + break; + case vmIntrinsics::_encodeISOArray: + if (!SpecialEncodeISOArray) return true; + break; + case vmIntrinsics::_getCallerClass: + if (!InlineReflectionGetCallerClass) return true; + break; + case vmIntrinsics::_multiplyToLen: + if (!UseMultiplyToLenIntrinsic) return true; + break; + case vmIntrinsics::_squareToLen: + if (!UseSquareToLenIntrinsic) return true; + break; + case vmIntrinsics::_mulAdd: + if (!UseMulAddIntrinsic) return true; + break; + case vmIntrinsics::_montgomeryMultiply: + if (!UseMontgomeryMultiplyIntrinsic) return true; + break; + case vmIntrinsics::_montgomerySquare: + if (!UseMontgomerySquareIntrinsic) return true; + break; + case vmIntrinsics::_addExactI: + case vmIntrinsics::_addExactL: + case vmIntrinsics::_decrementExactI: + case vmIntrinsics::_decrementExactL: + case vmIntrinsics::_incrementExactI: + case vmIntrinsics::_incrementExactL: + case vmIntrinsics::_multiplyExactI: + case vmIntrinsics::_multiplyExactL: + case vmIntrinsics::_negateExactI: + case vmIntrinsics::_negateExactL: + case vmIntrinsics::_subtractExactI: + case vmIntrinsics::_subtractExactL: + if (!UseMathExactIntrinsics || !InlineMathNatives) return true; + break; +#endif // COMPILER2 + default: + return false; + } + + return false; +} #define VM_INTRINSIC_INITIALIZE(id, klass, name, sig, flags) #id "\0" static const char* vm_intrinsic_name_bodies = diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp index b453e0ec1fa..9e67c5012e0 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp @@ -1368,6 +1368,26 @@ public: // Raw conversion: static ID for_raw_conversion(BasicType src, BasicType dest); + + // The methods below provide information related to compiling intrinsics. + + // (1) Information needed by the C1 compiler. + + static bool preserves_state(vmIntrinsics::ID id); + static bool can_trap(vmIntrinsics::ID id); + + // (2) Information needed by the C2 compiler. + + // Returns true if the intrinsic for method 'method' will perform a virtual dispatch. + static bool does_virtual_dispatch(vmIntrinsics::ID id); + // A return value larger than 0 indicates that the intrinsic for method + // 'method' requires predicated logic. + static int predicates_needed(vmIntrinsics::ID id); + + // Returns true if an intrinsic is disabled by command-line flags and + // false otherwise. Implements functionality common to the C1 + // and the C2 compiler. + static bool is_disabled_by_flags(vmIntrinsics::ID id); }; #endif // SHARE_VM_CLASSFILE_VMSYMBOLS_HPP diff --git a/hotspot/src/share/vm/compiler/abstractCompiler.hpp b/hotspot/src/share/vm/compiler/abstractCompiler.hpp index 67f7c95b03d..e3a727b0886 100644 --- a/hotspot/src/share/vm/compiler/abstractCompiler.hpp +++ b/hotspot/src/share/vm/compiler/abstractCompiler.hpp @@ -66,6 +66,58 @@ class AbstractCompiler : public CHeapObj { virtual bool supports_osr () { return true; } virtual bool can_compile_method(methodHandle method) { return true; } + // Determine if the current compiler provides an intrinsic + // for method 'method'. An intrinsic is available if: + // - the intrinsic is enabled (by using the appropriate command-line flag) and + // - the platform on which the VM is running supports the intrinsic + // (i.e., the platform provides the instructions necessary for the compiler + // to generate the intrinsic code). + // + // The second parameter, 'compilation_context', is needed to implement functionality + // related to the DisableIntrinsic command-line flag. The DisableIntrinsic flag can + // be used to prohibit the C2 compiler (but not the C1 compiler) to use an intrinsic. + // There are three ways to disable an intrinsic using the DisableIntrinsic flag: + // + // (1) -XX:DisableIntrinsic=_hashCode,_getClass + // Disables intrinsification of _hashCode and _getClass globally + // (i.e., the intrinsified version the methods will not be used at all). + // (2) -XX:CompileCommand=option,aClass::aMethod,ccstr,DisableIntrinsic,_hashCode + // Disables intrinsification of _hashCode if it is called from + // aClass::aMethod (but not for any other call site of _hashCode) + // (3) -XX:CompileCommand=option,java.lang.ref.Reference::get,ccstr,DisableIntrinsic,_Reference_get + // Some methods are not compiled by C2. Instead, the C2 compiler + // returns directly the intrinsified version of these methods. + // The command above forces C2 to compile _Reference_get, but + // allows using the intrinsified version of _Reference_get at all + // other call sites. + // + // From the modes above, (1) disable intrinsics globally, (2) and (3) + // disable intrinsics on a per-method basis. In cases (2) and (3) the + // compilation context is aClass::aMethod and java.lang.ref.Reference::get, + // respectively. + virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context) { + return false; + } + + // Determines if an intrinsic is supported by the compiler, that is, + // the compiler provides the instructions necessary to generate + // the intrinsic code for method 'method'. + // + // The 'is_intrinsic_supported' method is a white list, that is, + // by default no intrinsics are supported by a compiler except + // the ones listed in the method. Overriding methods should conform + // to this behavior. + virtual bool is_intrinsic_supported(methodHandle method) { + return false; + } + + // Implements compiler-specific processing of command-line flags. + // Processing of command-line flags common to all compilers is implemented + // in vmIntrinsicss::is_disabled_by_flag. + virtual bool is_intrinsic_disabled_by_flag(methodHandle method) { + return false; + } + // Compiler type queries. bool is_c1() { return _type == c1; } bool is_c2() { return _type == c2; } diff --git a/hotspot/src/share/vm/opto/c2compiler.cpp b/hotspot/src/share/vm/opto/c2compiler.cpp index 45732b2745d..64ac374d00d 100644 --- a/hotspot/src/share/vm/opto/c2compiler.cpp +++ b/hotspot/src/share/vm/opto/c2compiler.cpp @@ -79,7 +79,6 @@ bool C2Compiler::init_c2_runtime() { return OptoRuntime::generate(thread->env()); } - void C2Compiler::initialize() { // The first compiler thread that gets here will initialize the // small amount of global state (and runtime stubs) that C2 needs. @@ -154,11 +153,361 @@ void C2Compiler::compile_method(ciEnv* env, ciMethod* target, int entry_bci) { } } - void C2Compiler::print_timers() { Compile::print_timers(); } +bool C2Compiler::is_intrinsic_available(methodHandle method, methodHandle compilation_context) { + // Assume a non-virtual dispatch. A virtual dispatch is + // possible for only a limited set of available intrinsics whereas + // a non-virtual dispatch is possible for all available intrinsics. + return is_intrinsic_supported(method, false) && + !is_intrinsic_disabled_by_flag(method, compilation_context); +} + +bool C2Compiler::is_intrinsic_supported(methodHandle method, bool is_virtual) { + vmIntrinsics::ID id = method->intrinsic_id(); + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + + if (id < vmIntrinsics::FIRST_ID || id >= vmIntrinsics::LAST_COMPILER_INLINE) { + return false; + } + + // Only Object.hashCode and Object.clone intrinsics implement also a virtual + // dispatch because calling both methods is expensive but both methods are + // frequently overridden. All other intrinsics implement only a non-virtual + // dispatch. + if (is_virtual) { + switch (id) { + case vmIntrinsics::_hashCode: + case vmIntrinsics::_clone: + break; + default: + return false; + } + } + + switch (id) { + case vmIntrinsics::_compareTo: + if (!Matcher::match_rule_supported(Op_StrComp)) return false; + break; + case vmIntrinsics::_equals: + if (!Matcher::match_rule_supported(Op_StrEquals)) return false; + break; + case vmIntrinsics::_equalsC: + if (!Matcher::match_rule_supported(Op_AryEq)) return false; + break; + case vmIntrinsics::_copyMemory: + if (StubRoutines::unsafe_arraycopy() == NULL) return false; + break; + case vmIntrinsics::_encodeISOArray: + if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return false; + break; + case vmIntrinsics::_bitCount_i: + if (!Matcher::match_rule_supported(Op_PopCountI)) return false; + break; + case vmIntrinsics::_bitCount_l: + if (!Matcher::match_rule_supported(Op_PopCountL)) return false; + break; + case vmIntrinsics::_numberOfLeadingZeros_i: + if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false; + break; + case vmIntrinsics::_numberOfLeadingZeros_l: + if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false; + break; + case vmIntrinsics::_numberOfTrailingZeros_i: + if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false; + break; + case vmIntrinsics::_numberOfTrailingZeros_l: + if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false; + break; + case vmIntrinsics::_reverseBytes_c: + if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return false; + break; + case vmIntrinsics::_reverseBytes_s: + if (!Matcher::match_rule_supported(Op_ReverseBytesS)) return false; + break; + case vmIntrinsics::_reverseBytes_i: + if (!Matcher::match_rule_supported(Op_ReverseBytesI)) return false; + break; + case vmIntrinsics::_reverseBytes_l: + if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return false; + break; + case vmIntrinsics::_compareAndSwapObject: +#ifdef _LP64 + if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return false; +#endif + break; + case vmIntrinsics::_compareAndSwapLong: + if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return false; + break; + case vmIntrinsics::_getAndAddInt: + if (!Matcher::match_rule_supported(Op_GetAndAddI)) return false; + break; + case vmIntrinsics::_getAndAddLong: + if (!Matcher::match_rule_supported(Op_GetAndAddL)) return false; + break; + case vmIntrinsics::_getAndSetInt: + if (!Matcher::match_rule_supported(Op_GetAndSetI)) return false; + break; + case vmIntrinsics::_getAndSetLong: + if (!Matcher::match_rule_supported(Op_GetAndSetL)) return false; + break; + case vmIntrinsics::_getAndSetObject: +#ifdef _LP64 + if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return false; + if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return false; + break; +#else + if (!Matcher::match_rule_supported(Op_GetAndSetP)) return false; + break; +#endif + case vmIntrinsics::_incrementExactI: + case vmIntrinsics::_addExactI: + if (!Matcher::match_rule_supported(Op_OverflowAddI)) return false; + break; + case vmIntrinsics::_incrementExactL: + case vmIntrinsics::_addExactL: + if (!Matcher::match_rule_supported(Op_OverflowAddL)) return false; + break; + case vmIntrinsics::_decrementExactI: + case vmIntrinsics::_subtractExactI: + if (!Matcher::match_rule_supported(Op_OverflowSubI)) return false; + break; + case vmIntrinsics::_decrementExactL: + case vmIntrinsics::_subtractExactL: + if (!Matcher::match_rule_supported(Op_OverflowSubL)) return false; + break; + case vmIntrinsics::_negateExactI: + if (!Matcher::match_rule_supported(Op_OverflowSubI)) return false; + break; + case vmIntrinsics::_negateExactL: + if (!Matcher::match_rule_supported(Op_OverflowSubL)) return false; + break; + case vmIntrinsics::_multiplyExactI: + if (!Matcher::match_rule_supported(Op_OverflowMulI)) return false; + break; + case vmIntrinsics::_multiplyExactL: + if (!Matcher::match_rule_supported(Op_OverflowMulL)) return false; + break; + case vmIntrinsics::_getCallerClass: + if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return false; + break; + case vmIntrinsics::_hashCode: + case vmIntrinsics::_identityHashCode: + case vmIntrinsics::_getClass: + case vmIntrinsics::_dsin: + case vmIntrinsics::_dcos: + case vmIntrinsics::_dtan: + case vmIntrinsics::_dabs: + case vmIntrinsics::_datan2: + case vmIntrinsics::_dsqrt: + case vmIntrinsics::_dexp: + case vmIntrinsics::_dlog: + case vmIntrinsics::_dlog10: + case vmIntrinsics::_dpow: + case vmIntrinsics::_min: + case vmIntrinsics::_max: + case vmIntrinsics::_arraycopy: + case vmIntrinsics::_indexOf: + case vmIntrinsics::_getObject: + case vmIntrinsics::_getBoolean: + case vmIntrinsics::_getByte: + case vmIntrinsics::_getShort: + case vmIntrinsics::_getChar: + case vmIntrinsics::_getInt: + case vmIntrinsics::_getLong: + case vmIntrinsics::_getFloat: + case vmIntrinsics::_getDouble: + case vmIntrinsics::_putObject: + case vmIntrinsics::_putBoolean: + case vmIntrinsics::_putByte: + case vmIntrinsics::_putShort: + case vmIntrinsics::_putChar: + case vmIntrinsics::_putInt: + case vmIntrinsics::_putLong: + case vmIntrinsics::_putFloat: + case vmIntrinsics::_putDouble: + case vmIntrinsics::_getByte_raw: + case vmIntrinsics::_getShort_raw: + case vmIntrinsics::_getChar_raw: + case vmIntrinsics::_getInt_raw: + case vmIntrinsics::_getLong_raw: + case vmIntrinsics::_getFloat_raw: + case vmIntrinsics::_getDouble_raw: + case vmIntrinsics::_getAddress_raw: + case vmIntrinsics::_putByte_raw: + case vmIntrinsics::_putShort_raw: + case vmIntrinsics::_putChar_raw: + case vmIntrinsics::_putInt_raw: + case vmIntrinsics::_putLong_raw: + case vmIntrinsics::_putFloat_raw: + case vmIntrinsics::_putDouble_raw: + case vmIntrinsics::_putAddress_raw: + case vmIntrinsics::_getObjectVolatile: + case vmIntrinsics::_getBooleanVolatile: + case vmIntrinsics::_getByteVolatile: + case vmIntrinsics::_getShortVolatile: + case vmIntrinsics::_getCharVolatile: + case vmIntrinsics::_getIntVolatile: + case vmIntrinsics::_getLongVolatile: + case vmIntrinsics::_getFloatVolatile: + case vmIntrinsics::_getDoubleVolatile: + case vmIntrinsics::_putObjectVolatile: + case vmIntrinsics::_putBooleanVolatile: + case vmIntrinsics::_putByteVolatile: + case vmIntrinsics::_putShortVolatile: + case vmIntrinsics::_putCharVolatile: + case vmIntrinsics::_putIntVolatile: + case vmIntrinsics::_putLongVolatile: + case vmIntrinsics::_putFloatVolatile: + case vmIntrinsics::_putDoubleVolatile: + case vmIntrinsics::_getShortUnaligned: + case vmIntrinsics::_getCharUnaligned: + case vmIntrinsics::_getIntUnaligned: + case vmIntrinsics::_getLongUnaligned: + case vmIntrinsics::_putShortUnaligned: + case vmIntrinsics::_putCharUnaligned: + case vmIntrinsics::_putIntUnaligned: + case vmIntrinsics::_putLongUnaligned: + case vmIntrinsics::_compareAndSwapInt: + case vmIntrinsics::_putOrderedObject: + case vmIntrinsics::_putOrderedInt: + case vmIntrinsics::_putOrderedLong: + case vmIntrinsics::_loadFence: + case vmIntrinsics::_storeFence: + case vmIntrinsics::_fullFence: + case vmIntrinsics::_currentThread: + case vmIntrinsics::_isInterrupted: +#ifdef TRACE_HAVE_INTRINSICS + case vmIntrinsics::_classID: + case vmIntrinsics::_threadID: + case vmIntrinsics::_counterTime: +#endif + case vmIntrinsics::_currentTimeMillis: + case vmIntrinsics::_nanoTime: + case vmIntrinsics::_allocateInstance: + case vmIntrinsics::_newArray: + case vmIntrinsics::_getLength: + case vmIntrinsics::_copyOf: + case vmIntrinsics::_copyOfRange: + case vmIntrinsics::_clone: + case vmIntrinsics::_isAssignableFrom: + case vmIntrinsics::_isInstance: + case vmIntrinsics::_getModifiers: + case vmIntrinsics::_isInterface: + case vmIntrinsics::_isArray: + case vmIntrinsics::_isPrimitive: + case vmIntrinsics::_getSuperclass: + case vmIntrinsics::_getClassAccessFlags: + case vmIntrinsics::_floatToRawIntBits: + case vmIntrinsics::_floatToIntBits: + case vmIntrinsics::_intBitsToFloat: + case vmIntrinsics::_doubleToRawLongBits: + case vmIntrinsics::_doubleToLongBits: + case vmIntrinsics::_longBitsToDouble: + case vmIntrinsics::_Reference_get: + case vmIntrinsics::_Class_cast: + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + case vmIntrinsics::_sha_implCompress: + case vmIntrinsics::_sha2_implCompress: + case vmIntrinsics::_sha5_implCompress: + case vmIntrinsics::_digestBase_implCompressMB: + case vmIntrinsics::_multiplyToLen: + case vmIntrinsics::_squareToLen: + case vmIntrinsics::_mulAdd: + case vmIntrinsics::_montgomeryMultiply: + case vmIntrinsics::_montgomerySquare: + case vmIntrinsics::_ghash_processBlocks: + case vmIntrinsics::_updateCRC32: + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + case vmIntrinsics::_profileBoolean: + case vmIntrinsics::_isCompileConstant: + break; + default: + return false; + } + return true; +} + +bool C2Compiler::is_intrinsic_disabled_by_flag(methodHandle method, methodHandle compilation_context) { + vmIntrinsics::ID id = method->intrinsic_id(); + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + + if (vmIntrinsics::is_disabled_by_flags(method->intrinsic_id())) { + return true; + } + + // Check if the intrinsic corresponding to 'method' has been disabled on + // the command line by using the DisableIntrinsic flag (either globally + // or on a per-method level, see src/share/vm/compiler/abstractCompiler.hpp + // for details). + // Usually, the compilation context is the caller of the method 'method'. + // The only case when for a non-recursive method 'method' the compilation context + // is not the caller of the 'method' (but it is the method itself) is + // java.lang.ref.Referene::get. + // For java.lang.ref.Reference::get, the intrinsic version is used + // instead of the C2-compiled version so that the value in the referent + // field can be registered by the G1 pre-barrier code. The intrinsified + // version of Reference::get also adds a memory barrier to prevent + // commoning reads from the referent field across safepoint since GC + // can change the referent field's value. See Compile::Compile() + // in src/share/vm/opto/compile.cpp for more details. + ccstr disable_intr = NULL; + if ((DisableIntrinsic[0] != '\0' && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) || + (!compilation_context.is_null() && + CompilerOracle::has_option_value(compilation_context, "DisableIntrinsic", disable_intr) && + strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL) + ) { + return true; + } + + // -XX:-InlineNatives disables nearly all intrinsics except the ones listed in + // the following switch statement. + if (!InlineNatives) { + switch (id) { + case vmIntrinsics::_indexOf: + case vmIntrinsics::_compareTo: + case vmIntrinsics::_equals: + case vmIntrinsics::_equalsC: + case vmIntrinsics::_getAndAddInt: + case vmIntrinsics::_getAndAddLong: + case vmIntrinsics::_getAndSetInt: + case vmIntrinsics::_getAndSetLong: + case vmIntrinsics::_getAndSetObject: + case vmIntrinsics::_loadFence: + case vmIntrinsics::_storeFence: + case vmIntrinsics::_fullFence: + case vmIntrinsics::_Reference_get: + break; + default: + return true; + } + } + + if (!InlineUnsafeOps) { + switch (id) { + case vmIntrinsics::_loadFence: + case vmIntrinsics::_storeFence: + case vmIntrinsics::_fullFence: + case vmIntrinsics::_compareAndSwapObject: + case vmIntrinsics::_compareAndSwapLong: + case vmIntrinsics::_compareAndSwapInt: + return true; + default: + return false; + } + } + + return false; +} + int C2Compiler::initial_code_buffer_size() { assert(SegmentedCodeCache, "Should be only used with a segmented code cache"); return Compile::MAX_inst_size + Compile::MAX_locs_size + initial_const_capacity; diff --git a/hotspot/src/share/vm/opto/c2compiler.hpp b/hotspot/src/share/vm/opto/c2compiler.hpp index e457e13794f..d651b1de0e5 100644 --- a/hotspot/src/share/vm/opto/c2compiler.hpp +++ b/hotspot/src/share/vm/opto/c2compiler.hpp @@ -36,7 +36,6 @@ public: // Name const char *name() { return "C2"; } - void initialize(); // Compilation entry point for methods @@ -52,6 +51,26 @@ public: // Print compilation timers and statistics void print_timers(); + // Check the availability of an intrinsic for 'method' given a compilation context. + virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context); + + // Return true if the intrinsification of a method supported by the compiler + // assuming a non-virtual dispatch. Return false otherwise. + virtual bool is_intrinsic_supported(methodHandle method) { + return is_intrinsic_supported(method, false); + } + + // Check if the compiler supports an intrinsic for 'method' given the + // the dispatch mode specified by the 'is_virtual' parameter. + virtual bool is_intrinsic_supported(methodHandle method, bool is_virtual); + + // Processing of command-line flags specific to the C2 compiler. + virtual bool is_intrinsic_disabled_by_flag(methodHandle method) { + return is_intrinsic_disabled_by_flag(method, NULL); + } + + virtual bool is_intrinsic_disabled_by_flag(methodHandle method, methodHandle compilation_context); + // Initial size of the code buffer (may be increased at runtime) static int initial_code_buffer_size(); }; diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp index 024a545ea8a..23b993edcbd 100644 --- a/hotspot/src/share/vm/opto/library_call.cpp +++ b/hotspot/src/share/vm/opto/library_call.cpp @@ -31,6 +31,7 @@ #include "oops/objArrayKlass.hpp" #include "opto/addnode.hpp" #include "opto/arraycopynode.hpp" +#include "opto/c2compiler.hpp" #include "opto/callGenerator.hpp" #include "opto/castnode.hpp" #include "opto/cfgnode.hpp" @@ -305,330 +306,40 @@ class LibraryCallKit : public GraphKit { bool inline_isCompileConstant(); }; - //---------------------------make_vm_intrinsic---------------------------- CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { vmIntrinsics::ID id = m->intrinsic_id(); assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); - ccstr disable_intr = NULL; - - if ((DisableIntrinsic[0] != '\0' - && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) || - (method_has_option_value("DisableIntrinsic", disable_intr) - && strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) { - // disabled by a user request on the command line: - // example: -XX:DisableIntrinsic=_hashCode,_getClass - return NULL; - } - if (!m->is_loaded()) { - // do not attempt to inline unloaded methods + // Do not attempt to inline unloaded methods. return NULL; } - // Only a few intrinsics implement a virtual dispatch. - // They are expensive calls which are also frequently overridden. - if (is_virtual) { - switch (id) { - case vmIntrinsics::_hashCode: - case vmIntrinsics::_clone: - // OK, Object.hashCode and Object.clone intrinsics come in both flavors - break; - default: - return NULL; - } + C2Compiler* compiler = (C2Compiler*)CompileBroker::compiler(CompLevel_full_optimization); + bool is_available = false; + + { + // For calling is_intrinsic_supported and is_intrinsic_disabled_by_flag + // the compiler must transition to '_thread_in_vm' state because both + // methods access VM-internal data. + VM_ENTRY_MARK; + methodHandle mh(THREAD, m->get_Method()); + methodHandle ct(THREAD, method()->get_Method()); + is_available = compiler->is_intrinsic_supported(mh, is_virtual) && + !compiler->is_intrinsic_disabled_by_flag(mh, ct); } - // -XX:-InlineNatives disables nearly all intrinsics: - if (!InlineNatives) { - switch (id) { - case vmIntrinsics::_indexOf: - case vmIntrinsics::_compareTo: - case vmIntrinsics::_equals: - case vmIntrinsics::_equalsC: - case vmIntrinsics::_getAndAddInt: - case vmIntrinsics::_getAndAddLong: - case vmIntrinsics::_getAndSetInt: - case vmIntrinsics::_getAndSetLong: - case vmIntrinsics::_getAndSetObject: - case vmIntrinsics::_loadFence: - case vmIntrinsics::_storeFence: - case vmIntrinsics::_fullFence: - break; // InlineNatives does not control String.compareTo - case vmIntrinsics::_Reference_get: - break; // InlineNatives does not control Reference.get - default: - return NULL; - } - } - - int predicates = 0; - bool does_virtual_dispatch = false; - - switch (id) { - case vmIntrinsics::_compareTo: - if (!SpecialStringCompareTo) return NULL; - if (!Matcher::match_rule_supported(Op_StrComp)) return NULL; - break; - case vmIntrinsics::_indexOf: - if (!SpecialStringIndexOf) return NULL; - break; - case vmIntrinsics::_equals: - if (!SpecialStringEquals) return NULL; - if (!Matcher::match_rule_supported(Op_StrEquals)) return NULL; - break; - case vmIntrinsics::_equalsC: - if (!SpecialArraysEquals) return NULL; - if (!Matcher::match_rule_supported(Op_AryEq)) return NULL; - break; - case vmIntrinsics::_arraycopy: - if (!InlineArrayCopy) return NULL; - break; - case vmIntrinsics::_copyMemory: - if (StubRoutines::unsafe_arraycopy() == NULL) return NULL; - if (!InlineArrayCopy) return NULL; - break; - case vmIntrinsics::_hashCode: - if (!InlineObjectHash) return NULL; - does_virtual_dispatch = true; - break; - case vmIntrinsics::_clone: - does_virtual_dispatch = true; - case vmIntrinsics::_copyOf: - case vmIntrinsics::_copyOfRange: - if (!InlineObjectCopy) return NULL; - // These also use the arraycopy intrinsic mechanism: - if (!InlineArrayCopy) return NULL; - break; - case vmIntrinsics::_encodeISOArray: - if (!SpecialEncodeISOArray) return NULL; - if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return NULL; - break; - case vmIntrinsics::_checkIndex: - // We do not intrinsify this. The optimizer does fine with it. - return NULL; - - case vmIntrinsics::_getCallerClass: - if (!InlineReflectionGetCallerClass) return NULL; - if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return NULL; - break; - - case vmIntrinsics::_bitCount_i: - if (!Matcher::match_rule_supported(Op_PopCountI)) return NULL; - break; - - case vmIntrinsics::_bitCount_l: - if (!Matcher::match_rule_supported(Op_PopCountL)) return NULL; - break; - - case vmIntrinsics::_numberOfLeadingZeros_i: - if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return NULL; - break; - - case vmIntrinsics::_numberOfLeadingZeros_l: - if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return NULL; - break; - - case vmIntrinsics::_numberOfTrailingZeros_i: - if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return NULL; - break; - - case vmIntrinsics::_numberOfTrailingZeros_l: - if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL; - break; - - case vmIntrinsics::_reverseBytes_c: - if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return NULL; - break; - case vmIntrinsics::_reverseBytes_s: - if (!Matcher::match_rule_supported(Op_ReverseBytesS)) return NULL; - break; - case vmIntrinsics::_reverseBytes_i: - if (!Matcher::match_rule_supported(Op_ReverseBytesI)) return NULL; - break; - case vmIntrinsics::_reverseBytes_l: - if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return NULL; - break; - - case vmIntrinsics::_Reference_get: - // Use the intrinsic version of Reference.get() so that the value in - // the referent field can be registered by the G1 pre-barrier code. - // Also add memory barrier to prevent commoning reads from this field - // across safepoint since GC can change it value. - break; - - case vmIntrinsics::_compareAndSwapObject: -#ifdef _LP64 - if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return NULL; -#endif - break; - - case vmIntrinsics::_compareAndSwapLong: - if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return NULL; - break; - - case vmIntrinsics::_getAndAddInt: - if (!Matcher::match_rule_supported(Op_GetAndAddI)) return NULL; - break; - - case vmIntrinsics::_getAndAddLong: - if (!Matcher::match_rule_supported(Op_GetAndAddL)) return NULL; - break; - - case vmIntrinsics::_getAndSetInt: - if (!Matcher::match_rule_supported(Op_GetAndSetI)) return NULL; - break; - - case vmIntrinsics::_getAndSetLong: - if (!Matcher::match_rule_supported(Op_GetAndSetL)) return NULL; - break; - - case vmIntrinsics::_getAndSetObject: -#ifdef _LP64 - if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return NULL; - if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return NULL; - break; -#else - if (!Matcher::match_rule_supported(Op_GetAndSetP)) return NULL; - break; -#endif - - case vmIntrinsics::_aescrypt_encryptBlock: - case vmIntrinsics::_aescrypt_decryptBlock: - if (!UseAESIntrinsics) return NULL; - break; - - case vmIntrinsics::_multiplyToLen: - if (!UseMultiplyToLenIntrinsic) return NULL; - break; - - case vmIntrinsics::_squareToLen: - if (!UseSquareToLenIntrinsic) return NULL; - break; - - case vmIntrinsics::_mulAdd: - if (!UseMulAddIntrinsic) return NULL; - break; - - case vmIntrinsics::_montgomeryMultiply: - if (!UseMontgomeryMultiplyIntrinsic) return NULL; - break; - case vmIntrinsics::_montgomerySquare: - if (!UseMontgomerySquareIntrinsic) return NULL; - break; - - case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: - case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: - if (!UseAESIntrinsics) return NULL; - // these two require the predicated logic - predicates = 1; - break; - - case vmIntrinsics::_sha_implCompress: - if (!UseSHA1Intrinsics) return NULL; - break; - - case vmIntrinsics::_sha2_implCompress: - if (!UseSHA256Intrinsics) return NULL; - break; - - case vmIntrinsics::_sha5_implCompress: - if (!UseSHA512Intrinsics) return NULL; - break; - - case vmIntrinsics::_digestBase_implCompressMB: - if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL; - predicates = 3; - break; - - case vmIntrinsics::_ghash_processBlocks: - if (!UseGHASHIntrinsics) return NULL; - break; - - case vmIntrinsics::_updateCRC32: - case vmIntrinsics::_updateBytesCRC32: - case vmIntrinsics::_updateByteBufferCRC32: - if (!UseCRC32Intrinsics) return NULL; - break; - - case vmIntrinsics::_updateBytesCRC32C: - case vmIntrinsics::_updateDirectByteBufferCRC32C: - if (!UseCRC32CIntrinsics) return NULL; - break; - - case vmIntrinsics::_incrementExactI: - case vmIntrinsics::_addExactI: - if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL; - break; - case vmIntrinsics::_incrementExactL: - case vmIntrinsics::_addExactL: - if (!Matcher::match_rule_supported(Op_OverflowAddL) || !UseMathExactIntrinsics) return NULL; - break; - case vmIntrinsics::_decrementExactI: - case vmIntrinsics::_subtractExactI: - if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL; - break; - case vmIntrinsics::_decrementExactL: - case vmIntrinsics::_subtractExactL: - if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL; - break; - case vmIntrinsics::_negateExactI: - if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL; - break; - case vmIntrinsics::_negateExactL: - if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL; - break; - case vmIntrinsics::_multiplyExactI: - if (!Matcher::match_rule_supported(Op_OverflowMulI) || !UseMathExactIntrinsics) return NULL; - break; - case vmIntrinsics::_multiplyExactL: - if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL; - break; - - case vmIntrinsics::_getShortUnaligned: - case vmIntrinsics::_getCharUnaligned: - case vmIntrinsics::_getIntUnaligned: - case vmIntrinsics::_getLongUnaligned: - case vmIntrinsics::_putShortUnaligned: - case vmIntrinsics::_putCharUnaligned: - case vmIntrinsics::_putIntUnaligned: - case vmIntrinsics::_putLongUnaligned: - if (!UseUnalignedAccesses) return NULL; - break; - - default: + if (is_available) { assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility"); assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?"); - break; + return new LibraryIntrinsic(m, is_virtual, + vmIntrinsics::predicates_needed(id), + vmIntrinsics::does_virtual_dispatch(id), + (vmIntrinsics::ID) id); + } else { + return NULL; } - - // -XX:-InlineClassNatives disables natives from the Class class. - // The flag applies to all reflective calls, notably Array.newArray - // (visible to Java programmers as Array.newInstance). - if (m->holder()->name() == ciSymbol::java_lang_Class() || - m->holder()->name() == ciSymbol::java_lang_reflect_Array()) { - if (!InlineClassNatives) return NULL; - } - - // -XX:-InlineThreadNatives disables natives from the Thread class. - if (m->holder()->name() == ciSymbol::java_lang_Thread()) { - if (!InlineThreadNatives) return NULL; - } - - // -XX:-InlineMathNatives disables natives from the Math,Float and Double classes. - if (m->holder()->name() == ciSymbol::java_lang_Math() || - m->holder()->name() == ciSymbol::java_lang_Float() || - m->holder()->name() == ciSymbol::java_lang_Double()) { - if (!InlineMathNatives) return NULL; - } - - // -XX:-InlineUnsafeOps disables natives from the Unsafe class. - if (m->holder()->name() == ciSymbol::sun_misc_Unsafe()) { - if (!InlineUnsafeOps) return NULL; - } - - return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id); } //----------------------register_library_intrinsics----------------------- @@ -812,7 +523,6 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_getLong: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile); case vmIntrinsics::_getFloat: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, !is_volatile); case vmIntrinsics::_getDouble: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, !is_volatile); - case vmIntrinsics::_putObject: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, !is_volatile); case vmIntrinsics::_putBoolean: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, !is_volatile); case vmIntrinsics::_putByte: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, !is_volatile); diff --git a/hotspot/src/share/vm/prims/whitebox.cpp b/hotspot/src/share/vm/prims/whitebox.cpp index 4a97424f1aa..32748f89ce8 100644 --- a/hotspot/src/share/vm/prims/whitebox.cpp +++ b/hotspot/src/share/vm/prims/whitebox.cpp @@ -528,6 +528,24 @@ WB_ENTRY(jboolean, WB_IsMethodQueuedForCompilation(JNIEnv* env, jobject o, jobje return mh->queued_for_compilation(); WB_END +WB_ENTRY(jboolean, WB_IsIntrinsicAvailable(JNIEnv* env, jobject o, jobject method, jobject compilation_context, jint compLevel)) + if (compLevel < CompLevel_none || compLevel > CompLevel_highest_tier) { + return false; // Intrinsic is not available on a non-existent compilation level. + } + jmethodID method_id, compilation_context_id; + method_id = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); + methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(method_id)); + if (compilation_context != NULL) { + compilation_context_id = reflected_method_to_jmid(thread, env, compilation_context); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); + methodHandle cch(THREAD, Method::checked_resolve_jmethod_id(compilation_context_id)); + return CompileBroker::compiler(compLevel)->is_intrinsic_available(mh, cch); + } else { + return CompileBroker::compiler(compLevel)->is_intrinsic_available(mh, NULL); + } +WB_END + WB_ENTRY(jint, WB_GetMethodCompilationLevel(JNIEnv* env, jobject o, jobject method, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); CHECK_JNI_EXCEPTION_(env, CompLevel_none); @@ -1477,14 +1495,17 @@ static JNINativeMethod methods[] = { #endif // INCLUDE_NMT {CC"deoptimizeFrames", CC"(Z)I", (void*)&WB_DeoptimizeFrames }, {CC"deoptimizeAll", CC"()V", (void*)&WB_DeoptimizeAll }, - {CC"deoptimizeMethod0", CC"(Ljava/lang/reflect/Executable;Z)I", - (void*)&WB_DeoptimizeMethod }, + {CC"deoptimizeMethod0", CC"(Ljava/lang/reflect/Executable;Z)I", + (void*)&WB_DeoptimizeMethod }, {CC"isMethodCompiled0", CC"(Ljava/lang/reflect/Executable;Z)Z", (void*)&WB_IsMethodCompiled }, {CC"isMethodCompilable0", CC"(Ljava/lang/reflect/Executable;IZ)Z", (void*)&WB_IsMethodCompilable}, {CC"isMethodQueuedForCompilation0", CC"(Ljava/lang/reflect/Executable;)Z", (void*)&WB_IsMethodQueuedForCompilation}, + {CC"isIntrinsicAvailable0", + CC"(Ljava/lang/reflect/Executable;Ljava/lang/reflect/Executable;I)Z", + (void*)&WB_IsIntrinsicAvailable}, {CC"makeMethodNotCompilable0", CC"(Ljava/lang/reflect/Executable;IZ)V", (void*)&WB_MakeMethodNotCompilable}, {CC"testSetDontInlineMethod0", diff --git a/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java new file mode 100644 index 00000000000..1a547540382 --- /dev/null +++ b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +import java.lang.reflect.Executable; +import java.util.concurrent.Callable; +import java.util.Objects; +/* + * @test + * @bug 8130832 + * @library /testlibrary /../../test/lib /compiler/whitebox /compiler/testlibrary + * @build IntrinsicAvailableTest + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * -XX:+UseCRC32Intrinsics + * IntrinsicAvailableTest + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * -XX:-UseCRC32Intrinsics + * IntrinsicAvailableTest + */ +public class IntrinsicAvailableTest extends CompilerWhiteBoxTest { + protected String VMName; + + public IntrinsicAvailableTest(IntrinsicAvailableTestTestCase testCase) { + super(testCase); + VMName = System.getProperty("java.vm.name"); + } + + public static class IntrinsicAvailableTestTestCase implements TestCase { + + public String name() { + return "IntrinsicAvailableTestTestCase"; + } + + public Executable getExecutable() { + // Using a single method to test the + // WhiteBox.isIntrinsicAvailable(Executable method, int compLevel) + // call for the compilation level corresponding to both the C1 and C2 + // compiler keeps the current test simple. + // + // The tested method is java.util.zip.CRC32.update(int, int) because + // both C1 and C2 define an intrinsic for the method and + // the UseCRC32Intrinsics flag can be used to enable/disable + // intrinsification of the method in both product and fastdebug + // builds. + try { + return Class.forName("java.util.zip.CRC32").getDeclaredMethod("update", int.class, int.class); + } catch (NoSuchMethodException e) { + throw new RuntimeException("Test bug, method unavailable. " + e); + } catch (ClassNotFoundException e) { + throw new RuntimeException("Test bug, class unavailable. " + e); + } + } + + public Callable getCallable() { + return null; + } + + public boolean isOsr() { + return false; + } + + } + + protected void checkIntrinsicForCompilationLevel(Executable method, int compLevel) throws Exception { + boolean intrinsicEnabled = Boolean.valueOf(getVMOption("UseCRC32Intrinsics")); + boolean intrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(method, + compLevel); + + String intrinsicEnabledMessage = intrinsicEnabled ? "enabled" : "disabled"; + String intrinsicAvailableMessage = intrinsicAvailable ? "available" : "not available"; + + if (intrinsicEnabled == intrinsicAvailable) { + System.out.println("Expected result: intrinsic for java.util.zip.CRC32.update() is " + + intrinsicEnabledMessage + " and intrinsic is " + intrinsicAvailableMessage + + " at compilation level " + compLevel); + } else { + throw new RuntimeException("Unexpected result: intrinsic for java.util.zip.CRC32.update() is " + + intrinsicEnabledMessage + " but intrinsic is " + intrinsicAvailableMessage + + " at compilation level " + compLevel); + } + } + + protected boolean isServerVM() { + return VMName.toLowerCase().contains("server"); + } + + public void test() throws Exception { + Executable intrinsicMethod = testCase.getExecutable(); + if (isServerVM()) { + if (TIERED_COMPILATION) { + checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_SIMPLE); + } + checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_FULL_OPTIMIZATION); + } else { + checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_SIMPLE); + } + } + + public static void main(String args[]) throws Exception { + new IntrinsicAvailableTest(new IntrinsicAvailableTestTestCase()).test(); + } +} diff --git a/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java b/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java index f98f5926f1b..1fcd33a7dd1 100644 --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java @@ -67,7 +67,7 @@ public abstract class IntrinsicBase extends CompilerWhiteBoxTest { compileAtLevel(CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE); } - if (!isIntrinsicSupported()) { + if (!isIntrinsicAvailable()) { expectedIntrinsicCount = 0; } break; @@ -114,7 +114,11 @@ public abstract class IntrinsicBase extends CompilerWhiteBoxTest { } } - protected abstract boolean isIntrinsicSupported(); + // An intrinsic is available if: + // - the intrinsic is enabled (by using the appropriate command-line flag) and + // - the intrinsic is supported by the VM (i.e., the platform on which the VM is + // running provides the instructions necessary for the VM to generate the intrinsic). + protected abstract boolean isIntrinsicAvailable(); protected abstract String getIntrinsicId(); @@ -123,14 +127,20 @@ public abstract class IntrinsicBase extends CompilerWhiteBoxTest { } static class IntTest extends IntrinsicBase { + + protected boolean isIntrinsicAvailable; // The tested intrinsic is available on the current platform. + protected IntTest(MathIntrinsic.IntIntrinsic testCase) { super(testCase); + // Only the C2 compiler intrinsifies exact math methods + // so check if the intrinsics are available with C2. + isIntrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(testCase.getTestMethod(), + COMP_LEVEL_FULL_OPTIMIZATION); } @Override - protected boolean isIntrinsicSupported() { - return isServerVM() && Boolean.valueOf(useMathExactIntrinsics) - && (Platform.isX86() || Platform.isX64() || Platform.isAArch64()); + protected boolean isIntrinsicAvailable() { + return isIntrinsicAvailable; } @Override @@ -140,14 +150,20 @@ public abstract class IntrinsicBase extends CompilerWhiteBoxTest { } static class LongTest extends IntrinsicBase { + + protected boolean isIntrinsicAvailable; // The tested intrinsic is available on the current platform. + protected LongTest(MathIntrinsic.LongIntrinsic testCase) { super(testCase); + // Only the C2 compiler intrinsifies exact math methods + // so check if the intrinsics are available with C2. + isIntrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(testCase.getTestMethod(), + COMP_LEVEL_FULL_OPTIMIZATION); } @Override - protected boolean isIntrinsicSupported() { - return isServerVM() && Boolean.valueOf(useMathExactIntrinsics) && - (Platform.isX64() || Platform.isPPC() || Platform.isAArch64()); + protected boolean isIntrinsicAvailable() { + return isIntrinsicAvailable; } @Override diff --git a/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java b/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java index 99039f9a7f3..29d5e9916e8 100644 --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java @@ -28,47 +28,89 @@ public class MathIntrinsic { enum IntIntrinsic implements CompilerWhiteBoxTest.TestCase { Add { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("addExact", int.class, int.class); + } + @Override Object execMathMethod() { return intR = Math.addExact(int1, int2); } }, - Subtract { + Subtract { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("subtractExact", int.class, int.class); + } + @Override Object execMathMethod() { return intR = Math.subtractExact(int1, int2); } }, Multiply { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("multiplyExact", int.class, int.class); + } + @Override Object execMathMethod() { return intR = Math.multiplyExact(int1, int2); } }, Increment { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("incrementExact", int.class); + } + @Override Object execMathMethod() { return intR = Math.incrementExact(int1); } }, Decrement { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("decrementExact", int.class); + } + @Override Object execMathMethod() { return intR = Math.decrementExact(int1); } }, Negate { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("negateExact", int.class); + } + @Override Object execMathMethod() { return intR = Math.negateExact(int1); } }; + protected int int1; protected int int2; protected int intR; + abstract Executable testMethod() throws NoSuchMethodException, ClassNotFoundException; abstract Object execMathMethod(); + public Executable getTestMethod() { + try { + return testMethod(); + } catch (NoSuchMethodException e) { + throw new RuntimeException("Test bug, no such method: " + e); + } catch (ClassNotFoundException e) { + throw new RuntimeException("Test bug, no such class: " + e); + } + } + @Override public Executable getExecutable() { try { @@ -92,36 +134,66 @@ public class MathIntrinsic { enum LongIntrinsic implements CompilerWhiteBoxTest.TestCase { Add { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("addExact", long.class, long.class); + } + @Override Object execMathMethod() { return longR = Math.addExact(long1, long2); } }, Subtract { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("subtractExact", long.class, long.class); + } + @Override Object execMathMethod() { return longR = Math.subtractExact(long1, long2); } }, Multiply { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("multiplyExact", long.class, long.class); + } + @Override Object execMathMethod() { return longR = Math.multiplyExact(long1, long2); } }, Increment { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("incrementExact", long.class); + } + @Override Object execMathMethod() { return longR = Math.incrementExact(long1); } }, Decrement { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("decrementExact", long.class); + } + @Override Object execMathMethod() { return longR = Math.decrementExact(long1); } }, Negate { + @Override + Executable testMethod() throws NoSuchMethodException, ClassNotFoundException { + return Class.forName("java.lang.Math").getDeclaredMethod("negateExact", long.class); + } + @Override Object execMathMethod() { return longR = Math.negateExact(long1); @@ -131,8 +203,19 @@ public class MathIntrinsic { protected long long2; protected long longR; + abstract Executable testMethod() throws NoSuchMethodException, ClassNotFoundException; abstract Object execMathMethod(); + public Executable getTestMethod() { + try { + return testMethod(); + } catch (NoSuchMethodException e) { + throw new RuntimeException("Test bug, no such method: " + e); + } catch (ClassNotFoundException e) { + throw new RuntimeException("Test bug, no such class: " + e); + } + } + @Override public Executable getExecutable() { try {